generativelanguage/v1beta2/discuss_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.ai.generativelanguage.v1beta2;

import "google/ai/generativelanguage/v1beta2/citation.proto";
import "google/ai/generativelanguage/v1beta2/safety.proto";
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta2/generativelanguagepb;generativelanguagepb";
option java_multiple_files = true;
option java_outer_classname = "DiscussServiceProto";
option java_package = "com.google.ai.generativelanguage.v1beta2";

// An API for using Generative Language Models (GLMs) in dialog applications.
//
// Also known as large language models (LLMs), this API provides models that
// are trained for multi-turn dialog.
service DiscussService {
  option (google.api.default_host) = "generativelanguage.googleapis.com";

  // Generates a response from the model given an input `MessagePrompt`.
  rpc GenerateMessage(GenerateMessageRequest)
      returns (GenerateMessageResponse) {
    option (google.api.http) = {
      post: "/v1beta2/{model=models/*}:generateMessage"
      body: "*"
    };
    option (google.api.method_signature) =
        "model,prompt,temperature,candidate_count,top_p,top_k";
  }

  // Runs a model's tokenizer on a string and returns the token count.
  rpc CountMessageTokens(CountMessageTokensRequest)
      returns (CountMessageTokensResponse) {
    option (google.api.http) = {
      post: "/v1beta2/{model=models/*}:countMessageTokens"
      body: "*"
    };
    option (google.api.method_signature) = "model,prompt";
  }
}

// Request to generate a message response from the model.
message GenerateMessageRequest {
  // Required. The name of the model to use.
  //
  // Format: `name=models/{model}`.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The structured textual input given to the model as a prompt.
  //
  // Given a
  // prompt, the model will return what it predicts is the next message in the
  // discussion.
  MessagePrompt prompt = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Controls the randomness of the output.
  //
  // Values can range over `[0.0,1.0]`,
  // inclusive. A value closer to `1.0` will produce responses that are more
  // varied, while a value closer to `0.0` will typically result in
  // less surprising responses from the model.
  optional float temperature = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The number of generated response messages to return.
  //
  // This value must be between
  // `[1, 8]`, inclusive. If unset, this will default to `1`.
  optional int32 candidate_count = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum cumulative probability of tokens to consider when
  // sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Nucleus sampling considers the smallest set of tokens whose probability
  // sum is at least `top_p`.
  optional float top_p = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to consider when sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Top-k sampling considers the set of `top_k` most probable tokens.
  optional int32 top_k = 6 [(google.api.field_behavior) = OPTIONAL];
}

// The response from the model.
//
// This includes candidate messages and
// conversation history in the form of chronologically-ordered messages.
message GenerateMessageResponse {
  // Candidate response messages from the model.
  repeated Message candidates = 1;

  // The conversation history used by the model.
  repeated Message messages = 2;

  // A set of content filtering metadata for the prompt and response
  // text.
  //
  // This indicates which `SafetyCategory`(s) blocked a
  // candidate from this response, the lowest `HarmProbability`
  // that triggered a block, and the HarmThreshold setting for that category.
  repeated ContentFilter filters = 3;
}

// The base unit of structured text.
//
// A `Message` includes an `author` and the `content` of
// the `Message`.
//
// The `author` is used to tag messages when they are fed to the
// model as text.
message Message {
  // Optional. The author of this Message.
  //
  // This serves as a key for tagging
  // the content of this Message when it is fed to the model as text.
  //
  // The author can be any alphanumeric string.
  string author = 1 [(google.api.field_behavior) = OPTIONAL];

  // Required. The text content of the structured `Message`.
  string content = 2 [(google.api.field_behavior) = REQUIRED];

  // Output only. Citation information for model-generated `content` in this
  // `Message`.
  //
  // If this `Message` was generated as output from the model, this field may be
  // populated with attribution information for any text included in the
  // `content`. This field is used only on output.
  optional CitationMetadata citation_metadata = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// All of the structured input text passed to the model as a prompt.
//
// A `MessagePrompt` contains a structured set of fields that provide context
// for the conversation, examples of user input/model output message pairs that
// prime the model to respond in different ways, and the conversation history
// or list of messages representing the alternating turns of the conversation
// between the user and the model.
message MessagePrompt {
  // Optional. Text that should be provided to the model first to ground the
  // response.
  //
  // If not empty, this `context` will be given to the model first before the
  // `examples` and `messages`. When using a `context` be sure to provide it
  // with every request to maintain continuity.
  //
  // This field can be a description of your prompt to the model to help provide
  // context and guide the responses. Examples: "Translate the phrase from
  // English to French." or "Given a statement, classify the sentiment as happy,
  // sad or neutral."
  //
  // Anything included in this field will take precedence over message history
  // if the total input size exceeds the model's `input_token_limit` and the
  // input request is truncated.
  string context = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Examples of what the model should generate.
  //
  // This includes both user input and the response that the model should
  // emulate.
  //
  // These `examples` are treated identically to conversation messages except
  // that they take precedence over the history in `messages`:
  // If the total input size exceeds the model's `input_token_limit` the input
  // will be truncated. Items will be dropped from `messages` before `examples`.
  repeated Example examples = 2 [(google.api.field_behavior) = OPTIONAL];

  // Required. A snapshot of the recent conversation history sorted
  // chronologically.
  //
  // Turns alternate between two authors.
  //
  // If the total input size exceeds the model's `input_token_limit` the input
  // will be truncated: The oldest items will be dropped from `messages`.
  repeated Message messages = 3 [(google.api.field_behavior) = REQUIRED];
}

// An input/output example used to instruct the Model.
//
// It demonstrates how the model should respond or format its response.
message Example {
  // Required. An example of an input `Message` from the user.
  Message input = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. An example of what the model should output given the input.
  Message output = 2 [(google.api.field_behavior) = REQUIRED];
}

// Counts the number of tokens in the `prompt` sent to a model.
//
// Models may tokenize text differently, so each model may return a different
// `token_count`.
message CountMessageTokensRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The prompt, whose token count is to be returned.
  MessagePrompt prompt = 2 [(google.api.field_behavior) = REQUIRED];
}

// A response from `CountMessageTokens`.
//
// It returns the model's `token_count` for the `prompt`.
message CountMessageTokensResponse {
  // The number of tokens that the `model` tokenizes the `prompt` into.
  //
  // Always non-negative.
  int32 token_count = 1;
}