generativelanguage/v1beta2/text_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.ai.generativelanguage.v1beta2;

import "google/ai/generativelanguage/v1beta2/citation.proto";
import "google/ai/generativelanguage/v1beta2/safety.proto";
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta2/generativelanguagepb;generativelanguagepb";
option java_multiple_files = true;
option java_outer_classname = "TextServiceProto";
option java_package = "com.google.ai.generativelanguage.v1beta2";

// API for using Generative Language Models (GLMs) trained to generate text.
//
// Also known as Large Language Models (LLM)s, these generate text given an
// input prompt from the user.
service TextService {
  option (google.api.default_host) = "generativelanguage.googleapis.com";

  // Generates a response from the model given an input message.
  rpc GenerateText(GenerateTextRequest) returns (GenerateTextResponse) {
    option (google.api.http) = {
      post: "/v1beta2/{model=models/*}:generateText"
      body: "*"
    };
    option (google.api.method_signature) =
        "model,prompt,temperature,candidate_count,max_output_tokens,top_p,top_k";
  }

  // Generates an embedding from the model given an input message.
  rpc EmbedText(EmbedTextRequest) returns (EmbedTextResponse) {
    option (google.api.http) = {
      post: "/v1beta2/{model=models/*}:embedText"
      body: "*"
    };
    option (google.api.method_signature) = "model,text";
  }
}

// Request to generate a text completion response from the model.
message GenerateTextRequest {
  // Required. The model name to use with the format name=models/{model}.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The free-form input text given to the model as a prompt.
  //
  // Given a prompt, the model will generate a TextCompletion response it
  // predicts as the completion of the input text.
  TextPrompt prompt = 2 [(google.api.field_behavior) = REQUIRED];

  // Controls the randomness of the output.
  // Note: The default value varies by model, see the `Model.temperature`
  // attribute of the `Model` returned the `getModel` function.
  //
  // Values can range from [0.0,1.0],
  // inclusive. A value closer to 1.0 will produce responses that are more
  // varied and creative, while a value closer to 0.0 will typically result in
  // more straightforward responses from the model.
  optional float temperature = 3;

  // Number of generated responses to return.
  //
  // This value must be between [1, 8], inclusive. If unset, this will default
  // to 1.
  optional int32 candidate_count = 4;

  // The maximum number of tokens to include in a candidate.
  //
  // If unset, this will default to 64.
  optional int32 max_output_tokens = 5;

  // The maximum cumulative probability of tokens to consider when sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Tokens are sorted based on their assigned probabilities so that only the
  // most liekly tokens are considered. Top-k sampling directly limits the
  // maximum number of tokens to consider, while Nucleus sampling limits number
  // of tokens based on the cumulative probability.
  //
  // Note: The default value varies by model, see the `Model.top_p`
  // attribute of the `Model` returned the `getModel` function.
  optional float top_p = 6;

  // The maximum number of tokens to consider when sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Top-k sampling considers the set of `top_k` most probable tokens.
  // Defaults to 40.
  //
  // Note: The default value varies by model, see the `Model.top_k`
  // attribute of the `Model` returned the `getModel` function.
  optional int32 top_k = 7;

  // A list of unique `SafetySetting` instances for blocking unsafe content.
  //
  // that will be enforced on the `GenerateTextRequest.prompt` and
  // `GenerateTextResponse.candidates`. There should not be more than one
  // setting for each `SafetyCategory` type. The API will block any prompts and
  // responses that fail to meet the thresholds set by these settings. This list
  // overrides the default settings for each `SafetyCategory` specified in the
  // safety_settings. If there is no `SafetySetting` for a given
  // `SafetyCategory` provided in the list, the API will use the default safety
  // setting for that category.
  repeated SafetySetting safety_settings = 8;

  // The set of character sequences (up to 5) that will stop output generation.
  // If specified, the API will stop at the first appearance of a stop
  // sequence. The stop sequence will not be included as part of the response.
  repeated string stop_sequences = 9;
}

// The response from the model, including candidate completions.
message GenerateTextResponse {
  // Candidate responses from the model.
  repeated TextCompletion candidates = 1;

  // A set of content filtering metadata for the prompt and response
  // text.
  //
  // This indicates which `SafetyCategory`(s) blocked a
  // candidate from this response, the lowest `HarmProbability`
  // that triggered a block, and the HarmThreshold setting for that category.
  // This indicates the smallest change to the `SafetySettings` that would be
  // necessary to unblock at least 1 response.
  //
  // The blocking is configured by the `SafetySettings` in the request (or the
  // default `SafetySettings` of the API).
  repeated ContentFilter filters = 3;

  // Returns any safety feedback related to content filtering.
  repeated SafetyFeedback safety_feedback = 4;
}

// Text given to the model as a prompt.
//
// The Model will use this TextPrompt to Generate a text completion.
message TextPrompt {
  // Required. The prompt text.
  string text = 1 [(google.api.field_behavior) = REQUIRED];
}

// Output text returned from a model.
message TextCompletion {
  // Output only. The generated text returned from the model.
  string output = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Ratings for the safety of a response.
  //
  // There is at most one rating per category.
  repeated SafetyRating safety_ratings = 2;

  // Output only. Citation information for model-generated `output` in this
  // `TextCompletion`.
  //
  // This field may be populated with attribution information for any text
  // included in the `output`.
  optional CitationMetadata citation_metadata = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request to get a text embedding from the model.
message EmbedTextRequest {
  // Required. The model name to use with the format model=models/{model}.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The free-form input text that the model will turn into an
  // embedding.
  string text = 2 [(google.api.field_behavior) = REQUIRED];
}

// The response to a EmbedTextRequest.
message EmbedTextResponse {
  // Output only. The embedding generated from the input text.
  optional Embedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A list of floats representing the embedding.
message Embedding {
  // The embedding values.
  repeated float value = 1;
}