1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.ai.generativelanguage.v1beta; 18 19import "google/ai/generativelanguage/v1beta/citation.proto"; 20import "google/ai/generativelanguage/v1beta/content.proto"; 21import "google/ai/generativelanguage/v1beta/retriever.proto"; 22import "google/ai/generativelanguage/v1beta/safety.proto"; 23import "google/api/annotations.proto"; 24import "google/api/client.proto"; 25import "google/api/field_behavior.proto"; 26import "google/api/resource.proto"; 27 28option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb"; 29option java_multiple_files = true; 30option java_outer_classname = "GenerativeServiceProto"; 31option java_package = "com.google.ai.generativelanguage.v1beta"; 32 33// API for using Large Models that generate multimodal content and have 34// additional capabilities beyond text generation. 35service GenerativeService { 36 option (google.api.default_host) = "generativelanguage.googleapis.com"; 37 38 // Generates a response from the model given an input 39 // `GenerateContentRequest`. 40 rpc GenerateContent(GenerateContentRequest) 41 returns (GenerateContentResponse) { 42 option (google.api.http) = { 43 post: "/v1beta/{model=models/*}:generateContent" 44 body: "*" 45 additional_bindings { 46 post: "/v1beta/{model=tunedModels/*}:generateContent" 47 body: "*" 48 } 49 }; 50 option (google.api.method_signature) = "model,contents"; 51 } 52 53 // Generates a grounded answer from the model given an input 54 // `GenerateAnswerRequest`. 55 rpc GenerateAnswer(GenerateAnswerRequest) returns (GenerateAnswerResponse) { 56 option (google.api.http) = { 57 post: "/v1beta/{model=models/*}:generateAnswer" 58 body: "*" 59 }; 60 option (google.api.method_signature) = 61 "model,contents,safety_settings,answer_style"; 62 } 63 64 // Generates a streamed response from the model given an input 65 // `GenerateContentRequest`. 66 rpc StreamGenerateContent(GenerateContentRequest) 67 returns (stream GenerateContentResponse) { 68 option (google.api.http) = { 69 post: "/v1beta/{model=models/*}:streamGenerateContent" 70 body: "*" 71 }; 72 option (google.api.method_signature) = "model,contents"; 73 } 74 75 // Generates an embedding from the model given an input `Content`. 76 rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) { 77 option (google.api.http) = { 78 post: "/v1beta/{model=models/*}:embedContent" 79 body: "*" 80 }; 81 option (google.api.method_signature) = "model,content"; 82 } 83 84 // Generates multiple embeddings from the model given input text in a 85 // synchronous call. 86 rpc BatchEmbedContents(BatchEmbedContentsRequest) 87 returns (BatchEmbedContentsResponse) { 88 option (google.api.http) = { 89 post: "/v1beta/{model=models/*}:batchEmbedContents" 90 body: "*" 91 }; 92 option (google.api.method_signature) = "model,requests"; 93 } 94 95 // Runs a model's tokenizer on input content and returns the token count. 96 rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { 97 option (google.api.http) = { 98 post: "/v1beta/{model=models/*}:countTokens" 99 body: "*" 100 }; 101 option (google.api.method_signature) = "model,contents"; 102 } 103} 104 105// Type of task for which the embedding will be used. 106enum TaskType { 107 // Unset value, which will default to one of the other enum values. 108 TASK_TYPE_UNSPECIFIED = 0; 109 110 // Specifies the given text is a query in a search/retrieval setting. 111 RETRIEVAL_QUERY = 1; 112 113 // Specifies the given text is a document from the corpus being searched. 114 RETRIEVAL_DOCUMENT = 2; 115 116 // Specifies the given text will be used for STS. 117 SEMANTIC_SIMILARITY = 3; 118 119 // Specifies that the given text will be classified. 120 CLASSIFICATION = 4; 121 122 // Specifies that the embeddings will be used for clustering. 123 CLUSTERING = 5; 124 125 // Specifies that the given text will be used for question answering. 126 QUESTION_ANSWERING = 6; 127 128 // Specifies that the given text will be used for fact verification. 129 FACT_VERIFICATION = 7; 130} 131 132// Request to generate a completion from the model. 133message GenerateContentRequest { 134 // Required. The name of the `Model` to use for generating the completion. 135 // 136 // Format: `name=models/{model}`. 137 string model = 1 [ 138 (google.api.field_behavior) = REQUIRED, 139 (google.api.resource_reference) = { 140 type: "generativelanguage.googleapis.com/Model" 141 } 142 ]; 143 144 // Optional. Developer set system instruction. Currently, text only. 145 optional Content system_instruction = 8 146 [(google.api.field_behavior) = OPTIONAL]; 147 148 // Required. The content of the current conversation with the model. 149 // 150 // For single-turn queries, this is a single instance. For multi-turn queries, 151 // this is a repeated field that contains conversation history + latest 152 // request. 153 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 154 155 // Optional. A list of `Tools` the model may use to generate the next 156 // response. 157 // 158 // A `Tool` is a piece of code that enables the system to interact with 159 // external systems to perform an action, or set of actions, outside of 160 // knowledge and scope of the model. The only supported tool is currently 161 // `Function`. 162 repeated Tool tools = 5 [(google.api.field_behavior) = OPTIONAL]; 163 164 // Optional. Tool configuration for any `Tool` specified in the request. 165 ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL]; 166 167 // Optional. A list of unique `SafetySetting` instances for blocking unsafe 168 // content. 169 // 170 // This will be enforced on the `GenerateContentRequest.contents` and 171 // `GenerateContentResponse.candidates`. There should not be more than one 172 // setting for each `SafetyCategory` type. The API will block any contents and 173 // responses that fail to meet the thresholds set by these settings. This list 174 // overrides the default settings for each `SafetyCategory` specified in the 175 // safety_settings. If there is no `SafetySetting` for a given 176 // `SafetyCategory` provided in the list, the API will use the default safety 177 // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, 178 // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, 179 // HARM_CATEGORY_HARASSMENT are supported. 180 repeated SafetySetting safety_settings = 3 181 [(google.api.field_behavior) = OPTIONAL]; 182 183 // Optional. Configuration options for model generation and outputs. 184 optional GenerationConfig generation_config = 4 185 [(google.api.field_behavior) = OPTIONAL]; 186} 187 188// Configuration options for model generation and outputs. Not all parameters 189// may be configurable for every model. 190message GenerationConfig { 191 // Optional. Number of generated responses to return. 192 // 193 // Currently, this value can only be set to 1. If unset, this will default 194 // to 1. 195 optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL]; 196 197 // Optional. The set of character sequences (up to 5) that will stop output 198 // generation. If specified, the API will stop at the first appearance of a 199 // stop sequence. The stop sequence will not be included as part of the 200 // response. 201 repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL]; 202 203 // Optional. The maximum number of tokens to include in a candidate. 204 // 205 // Note: The default value varies by model, see the `Model.output_token_limit` 206 // attribute of the `Model` returned from the `getModel` function. 207 optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL]; 208 209 // Optional. Controls the randomness of the output. 210 // 211 // Note: The default value varies by model, see the `Model.temperature` 212 // attribute of the `Model` returned from the `getModel` function. 213 // 214 // Values can range from [0.0, 2.0]. 215 optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL]; 216 217 // Optional. The maximum cumulative probability of tokens to consider when 218 // sampling. 219 // 220 // The model uses combined Top-k and nucleus sampling. 221 // 222 // Tokens are sorted based on their assigned probabilities so that only the 223 // most likely tokens are considered. Top-k sampling directly limits the 224 // maximum number of tokens to consider, while Nucleus sampling limits number 225 // of tokens based on the cumulative probability. 226 // 227 // Note: The default value varies by model, see the `Model.top_p` 228 // attribute of the `Model` returned from the `getModel` function. 229 optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL]; 230 231 // Optional. The maximum number of tokens to consider when sampling. 232 // 233 // Models use nucleus sampling or combined Top-k and nucleus sampling. 234 // Top-k sampling considers the set of `top_k` most probable tokens. 235 // Models running with nucleus sampling don't allow top_k setting. 236 // 237 // Note: The default value varies by model, see the `Model.top_k` 238 // attribute of the `Model` returned from the `getModel` function. Empty 239 // `top_k` field in `Model` indicates the model doesn't apply top-k sampling 240 // and doesn't allow setting `top_k` on requests. 241 optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL]; 242 243 // Optional. Output response mimetype of the generated candidate text. 244 // Supported mimetype: 245 // `text/plain`: (default) Text output. 246 // `application/json`: JSON response in the candidates. 247 string response_mime_type = 13 [(google.api.field_behavior) = OPTIONAL]; 248} 249 250// Configuration for retrieving grounding content from a `Corpus` or 251// `Document` created using the Semantic Retriever API. 252message SemanticRetrieverConfig { 253 // Required. Name of the resource for retrieval, e.g. corpora/123 or 254 // corpora/123/documents/abc. 255 string source = 1 [(google.api.field_behavior) = REQUIRED]; 256 257 // Required. Query to use for similarity matching `Chunk`s in the given 258 // resource. 259 Content query = 2 [(google.api.field_behavior) = REQUIRED]; 260 261 // Optional. Filters for selecting `Document`s and/or `Chunk`s from the 262 // resource. 263 repeated MetadataFilter metadata_filters = 3 264 [(google.api.field_behavior) = OPTIONAL]; 265 266 // Optional. Maximum number of relevant `Chunk`s to retrieve. 267 optional int32 max_chunks_count = 4 [(google.api.field_behavior) = OPTIONAL]; 268 269 // Optional. Minimum relevance score for retrieved relevant `Chunk`s. 270 optional float minimum_relevance_score = 5 271 [(google.api.field_behavior) = OPTIONAL]; 272} 273 274// Response from the model supporting multiple candidates. 275// 276// Note on safety ratings and content filtering. They are reported for both 277// prompt in `GenerateContentResponse.prompt_feedback` and for each candidate 278// in `finish_reason` and in `safety_ratings`. The API contract is that: 279// - either all requested candidates are returned or no candidates at all 280// - no candidates are returned only if there was something wrong with the 281// prompt (see `prompt_feedback`) 282// - feedback on each candidate is reported on `finish_reason` and 283// `safety_ratings`. 284message GenerateContentResponse { 285 // A set of the feedback metadata the prompt specified in 286 // `GenerateContentRequest.content`. 287 message PromptFeedback { 288 // Specifies what was the reason why prompt was blocked. 289 enum BlockReason { 290 // Default value. This value is unused. 291 BLOCK_REASON_UNSPECIFIED = 0; 292 293 // Prompt was blocked due to safety reasons. You can inspect 294 // `safety_ratings` to understand which safety category blocked it. 295 SAFETY = 1; 296 297 // Prompt was blocked due to unknown reaasons. 298 OTHER = 2; 299 } 300 301 // Optional. If set, the prompt was blocked and no candidates are returned. 302 // Rephrase your prompt. 303 BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL]; 304 305 // Ratings for safety of the prompt. 306 // There is at most one rating per category. 307 repeated SafetyRating safety_ratings = 2; 308 } 309 310 // Candidate responses from the model. 311 repeated Candidate candidates = 1; 312 313 // Returns the prompt's feedback related to the content filters. 314 PromptFeedback prompt_feedback = 2; 315} 316 317// A response candidate generated from the model. 318message Candidate { 319 // Defines the reason why the model stopped generating tokens. 320 enum FinishReason { 321 // Default value. This value is unused. 322 FINISH_REASON_UNSPECIFIED = 0; 323 324 // Natural stop point of the model or provided stop sequence. 325 STOP = 1; 326 327 // The maximum number of tokens as specified in the request was reached. 328 MAX_TOKENS = 2; 329 330 // The candidate content was flagged for safety reasons. 331 SAFETY = 3; 332 333 // The candidate content was flagged for recitation reasons. 334 RECITATION = 4; 335 336 // Unknown reason. 337 OTHER = 5; 338 } 339 340 // Output only. Index of the candidate in the list of candidates. 341 optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 342 343 // Output only. Generated content returned from the model. 344 Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 345 346 // Optional. Output only. The reason why the model stopped generating tokens. 347 // 348 // If empty, the model has not stopped generating the tokens. 349 FinishReason finish_reason = 2 [ 350 (google.api.field_behavior) = OPTIONAL, 351 (google.api.field_behavior) = OUTPUT_ONLY 352 ]; 353 354 // List of ratings for the safety of a response candidate. 355 // 356 // There is at most one rating per category. 357 repeated SafetyRating safety_ratings = 5; 358 359 // Output only. Citation information for model-generated candidate. 360 // 361 // This field may be populated with recitation information for any text 362 // included in the `content`. These are passages that are "recited" from 363 // copyrighted material in the foundational LLM's training data. 364 CitationMetadata citation_metadata = 6 365 [(google.api.field_behavior) = OUTPUT_ONLY]; 366 367 // Output only. Token count for this candidate. 368 int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 369 370 // Output only. Attribution information for sources that contributed to a 371 // grounded answer. 372 // 373 // This field is populated for `GenerateAnswer` calls. 374 repeated GroundingAttribution grounding_attributions = 8 375 [(google.api.field_behavior) = OUTPUT_ONLY]; 376} 377 378// Identifier for the source contributing to this attribution. 379message AttributionSourceId { 380 // Identifier for a part within a `GroundingPassage`. 381 message GroundingPassageId { 382 // Output only. ID of the passage matching the `GenerateAnswerRequest`'s 383 // `GroundingPassage.id`. 384 string passage_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 385 386 // Output only. Index of the part within the `GenerateAnswerRequest`'s 387 // `GroundingPassage.content`. 388 int32 part_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 389 } 390 391 // Identifier for a `Chunk` retrieved via Semantic Retriever specified in the 392 // `GenerateAnswerRequest` using `SemanticRetrieverConfig`. 393 message SemanticRetrieverChunk { 394 // Output only. Name of the source matching the request's 395 // `SemanticRetrieverConfig.source`. Example: `corpora/123` or 396 // `corpora/123/documents/abc` 397 string source = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 398 399 // Output only. Name of the `Chunk` containing the attributed text. 400 // Example: `corpora/123/documents/abc/chunks/xyz` 401 string chunk = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 402 } 403 404 oneof source { 405 // Identifier for an inline passage. 406 GroundingPassageId grounding_passage = 1; 407 408 // Identifier for a `Chunk` fetched via Semantic Retriever. 409 SemanticRetrieverChunk semantic_retriever_chunk = 2; 410 } 411} 412 413// Attribution for a source that contributed to an answer. 414message GroundingAttribution { 415 // Output only. Identifier for the source contributing to this attribution. 416 AttributionSourceId source_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 417 418 // Grounding source content that makes up this attribution. 419 Content content = 2; 420} 421 422// Request to generate a grounded answer from the model. 423message GenerateAnswerRequest { 424 // Style for grounded answers. 425 enum AnswerStyle { 426 // Unspecified answer style. 427 ANSWER_STYLE_UNSPECIFIED = 0; 428 429 // Succint but abstract style. 430 ABSTRACTIVE = 1; 431 432 // Very brief and extractive style. 433 EXTRACTIVE = 2; 434 435 // Verbose style including extra details. The response may be formatted as a 436 // sentence, paragraph, multiple paragraphs, or bullet points, etc. 437 VERBOSE = 3; 438 } 439 440 // The sources in which to ground the answer. 441 oneof grounding_source { 442 // Passages provided inline with the request. 443 GroundingPassages inline_passages = 6; 444 445 // Content retrieved from resources created via the Semantic Retriever 446 // API. 447 SemanticRetrieverConfig semantic_retriever = 7; 448 } 449 450 // Required. The name of the `Model` to use for generating the grounded 451 // response. 452 // 453 // Format: `model=models/{model}`. 454 string model = 1 [ 455 (google.api.field_behavior) = REQUIRED, 456 (google.api.resource_reference) = { 457 type: "generativelanguage.googleapis.com/Model" 458 } 459 ]; 460 461 // Required. The content of the current conversation with the model. For 462 // single-turn queries, this is a single question to answer. For multi-turn 463 // queries, this is a repeated field that contains conversation history and 464 // the last `Content` in the list containing the question. 465 // 466 // Note: GenerateAnswer currently only supports queries in English. 467 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 468 469 // Required. Style in which answers should be returned. 470 AnswerStyle answer_style = 5 [(google.api.field_behavior) = REQUIRED]; 471 472 // Optional. A list of unique `SafetySetting` instances for blocking unsafe 473 // content. 474 // 475 // This will be enforced on the `GenerateAnswerRequest.contents` and 476 // `GenerateAnswerResponse.candidate`. There should not be more than one 477 // setting for each `SafetyCategory` type. The API will block any contents and 478 // responses that fail to meet the thresholds set by these settings. This list 479 // overrides the default settings for each `SafetyCategory` specified in the 480 // safety_settings. If there is no `SafetySetting` for a given 481 // `SafetyCategory` provided in the list, the API will use the default safety 482 // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, 483 // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, 484 // HARM_CATEGORY_HARASSMENT are supported. 485 repeated SafetySetting safety_settings = 3 486 [(google.api.field_behavior) = OPTIONAL]; 487 488 // Optional. Controls the randomness of the output. 489 // 490 // Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will 491 // produce responses that are more varied and creative, while a value closer 492 // to 0.0 will typically result in more straightforward responses from the 493 // model. A low temperature (~0.2) is usually recommended for 494 // Attributed-Question-Answering use cases. 495 optional float temperature = 4 [(google.api.field_behavior) = OPTIONAL]; 496} 497 498// Response from the model for a grounded answer. 499message GenerateAnswerResponse { 500 // Feedback related to the input data used to answer the question, as opposed 501 // to model-generated response to the question. 502 message InputFeedback { 503 // Specifies what was the reason why input was blocked. 504 enum BlockReason { 505 // Default value. This value is unused. 506 BLOCK_REASON_UNSPECIFIED = 0; 507 508 // Input was blocked due to safety reasons. You can inspect 509 // `safety_ratings` to understand which safety category blocked it. 510 SAFETY = 1; 511 512 // Input was blocked due to other reasons. 513 OTHER = 2; 514 } 515 516 // Optional. If set, the input was blocked and no candidates are returned. 517 // Rephrase your input. 518 optional BlockReason block_reason = 1 519 [(google.api.field_behavior) = OPTIONAL]; 520 521 // Ratings for safety of the input. 522 // There is at most one rating per category. 523 repeated SafetyRating safety_ratings = 2; 524 } 525 526 // Candidate answer from the model. 527 // 528 // Note: The model *always* attempts to provide a grounded answer, even when 529 // the answer is unlikely to be answerable from the given passages. 530 // In that case, a low-quality or ungrounded answer may be provided, along 531 // with a low `answerable_probability`. 532 Candidate answer = 1; 533 534 // Output only. The model's estimate of the probability that its answer is 535 // correct and grounded in the input passages. 536 // 537 // A low answerable_probability indicates that the answer might not be 538 // grounded in the sources. 539 // 540 // When `answerable_probability` is low, some clients may wish to: 541 // 542 // * Display a message to the effect of "We couldn’t answer that question" to 543 // the user. 544 // * Fall back to a general-purpose LLM that answers the question from world 545 // knowledge. The threshold and nature of such fallbacks will depend on 546 // individual clients’ use cases. 0.5 is a good starting threshold. 547 optional float answerable_probability = 2 548 [(google.api.field_behavior) = OUTPUT_ONLY]; 549 550 // Output only. Feedback related to the input data used to answer the 551 // question, as opposed to model-generated response to the question. 552 // 553 // "Input data" can be one or more of the following: 554 // 555 // - Question specified by the last entry in `GenerateAnswerRequest.content` 556 // - Conversation history specified by the other entries in 557 // `GenerateAnswerRequest.content` 558 // - Grounding sources (`GenerateAnswerRequest.semantic_retriever` or 559 // `GenerateAnswerRequest.inline_passages`) 560 optional InputFeedback input_feedback = 3 561 [(google.api.field_behavior) = OUTPUT_ONLY]; 562} 563 564// Request containing the `Content` for the model to embed. 565message EmbedContentRequest { 566 // Required. The model's resource name. This serves as an ID for the Model to 567 // use. 568 // 569 // This name should match a model name returned by the `ListModels` method. 570 // 571 // Format: `models/{model}` 572 string model = 1 [ 573 (google.api.field_behavior) = REQUIRED, 574 (google.api.resource_reference) = { 575 type: "generativelanguage.googleapis.com/Model" 576 } 577 ]; 578 579 // Required. The content to embed. Only the `parts.text` fields will be 580 // counted. 581 Content content = 2 [(google.api.field_behavior) = REQUIRED]; 582 583 // Optional. Optional task type for which the embeddings will be used. Can 584 // only be set for `models/embedding-001`. 585 optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL]; 586 587 // Optional. An optional title for the text. Only applicable when TaskType is 588 // `RETRIEVAL_DOCUMENT`. 589 // 590 // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality 591 // embeddings for retrieval. 592 optional string title = 4 [(google.api.field_behavior) = OPTIONAL]; 593 594 // Optional. Optional reduced dimension for the output embedding. If set, 595 // excessive values in the output embedding are truncated from the end. 596 // Supported by `models/text-embedding-latest`. 597 optional int32 output_dimensionality = 5 598 [(google.api.field_behavior) = OPTIONAL]; 599} 600 601// A list of floats representing an embedding. 602message ContentEmbedding { 603 // The embedding values. 604 repeated float values = 1; 605} 606 607// The response to an `EmbedContentRequest`. 608message EmbedContentResponse { 609 // Output only. The embedding generated from the input content. 610 ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 611} 612 613// Batch request to get embeddings from the model for a list of prompts. 614message BatchEmbedContentsRequest { 615 // Required. The model's resource name. This serves as an ID for the Model to 616 // use. 617 // 618 // This name should match a model name returned by the `ListModels` method. 619 // 620 // Format: `models/{model}` 621 string model = 1 [ 622 (google.api.field_behavior) = REQUIRED, 623 (google.api.resource_reference) = { 624 type: "generativelanguage.googleapis.com/Model" 625 } 626 ]; 627 628 // Required. Embed requests for the batch. The model in each of these requests 629 // must match the model specified `BatchEmbedContentsRequest.model`. 630 repeated EmbedContentRequest requests = 2 631 [(google.api.field_behavior) = REQUIRED]; 632} 633 634// The response to a `BatchEmbedContentsRequest`. 635message BatchEmbedContentsResponse { 636 // Output only. The embeddings for each request, in the same order as provided 637 // in the batch request. 638 repeated ContentEmbedding embeddings = 1 639 [(google.api.field_behavior) = OUTPUT_ONLY]; 640} 641 642// Counts the number of tokens in the `prompt` sent to a model. 643// 644// Models may tokenize text differently, so each model may return a different 645// `token_count`. 646message CountTokensRequest { 647 // Required. The model's resource name. This serves as an ID for the Model to 648 // use. 649 // 650 // This name should match a model name returned by the `ListModels` method. 651 // 652 // Format: `models/{model}` 653 string model = 1 [ 654 (google.api.field_behavior) = REQUIRED, 655 (google.api.resource_reference) = { 656 type: "generativelanguage.googleapis.com/Model" 657 } 658 ]; 659 660 // Required. The input given to the model as a prompt. 661 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 662} 663 664// A response from `CountTokens`. 665// 666// It returns the model's `token_count` for the `prompt`. 667message CountTokensResponse { 668 // The number of tokens that the `model` tokenizes the `prompt` into. 669 // 670 // Always non-negative. 671 int32 total_tokens = 1; 672} 673