1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.language.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22 23option go_package = "cloud.google.com/go/language/apiv1/languagepb;languagepb"; 24option java_multiple_files = true; 25option java_outer_classname = "LanguageServiceProto"; 26option java_package = "com.google.cloud.language.v1"; 27 28// Provides text analysis operations such as sentiment analysis and entity 29// recognition. 30service LanguageService { 31 option (google.api.default_host) = "language.googleapis.com"; 32 option (google.api.oauth_scopes) = 33 "https://www.googleapis.com/auth/cloud-language," 34 "https://www.googleapis.com/auth/cloud-platform"; 35 36 // Analyzes the sentiment of the provided text. 37 rpc AnalyzeSentiment(AnalyzeSentimentRequest) 38 returns (AnalyzeSentimentResponse) { 39 option (google.api.http) = { 40 post: "/v1/documents:analyzeSentiment" 41 body: "*" 42 }; 43 option (google.api.method_signature) = "document,encoding_type"; 44 option (google.api.method_signature) = "document"; 45 } 46 47 // Finds named entities (currently proper names and common nouns) in the text 48 // along with entity types, salience, mentions for each entity, and 49 // other properties. 50 rpc AnalyzeEntities(AnalyzeEntitiesRequest) 51 returns (AnalyzeEntitiesResponse) { 52 option (google.api.http) = { 53 post: "/v1/documents:analyzeEntities" 54 body: "*" 55 }; 56 option (google.api.method_signature) = "document,encoding_type"; 57 option (google.api.method_signature) = "document"; 58 } 59 60 // Finds entities, similar to 61 // [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities] 62 // in the text and analyzes sentiment associated with each entity and its 63 // mentions. 64 rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) 65 returns (AnalyzeEntitySentimentResponse) { 66 option (google.api.http) = { 67 post: "/v1/documents:analyzeEntitySentiment" 68 body: "*" 69 }; 70 option (google.api.method_signature) = "document,encoding_type"; 71 option (google.api.method_signature) = "document"; 72 } 73 74 // Analyzes the syntax of the text and provides sentence boundaries and 75 // tokenization along with part of speech tags, dependency trees, and other 76 // properties. 77 rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { 78 option (google.api.http) = { 79 post: "/v1/documents:analyzeSyntax" 80 body: "*" 81 }; 82 option (google.api.method_signature) = "document,encoding_type"; 83 option (google.api.method_signature) = "document"; 84 } 85 86 // Classifies a document into categories. 87 rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { 88 option (google.api.http) = { 89 post: "/v1/documents:classifyText" 90 body: "*" 91 }; 92 option (google.api.method_signature) = "document"; 93 } 94 95 // Moderates a document for harmful and sensitive categories. 96 rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) { 97 option (google.api.http) = { 98 post: "/v1/documents:moderateText" 99 body: "*" 100 }; 101 option (google.api.method_signature) = "document"; 102 } 103 104 // A convenience method that provides all the features that analyzeSentiment, 105 // analyzeEntities, and analyzeSyntax provide in one call. 106 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { 107 option (google.api.http) = { 108 post: "/v1/documents:annotateText" 109 body: "*" 110 }; 111 option (google.api.method_signature) = "document,features,encoding_type"; 112 option (google.api.method_signature) = "document,features"; 113 } 114} 115 116// Represents the input to API methods. 117message Document { 118 // The document types enum. 119 enum Type { 120 // The content type is not specified. 121 TYPE_UNSPECIFIED = 0; 122 123 // Plain text 124 PLAIN_TEXT = 1; 125 126 // HTML 127 HTML = 2; 128 } 129 130 // Required. If the type is not set or is `TYPE_UNSPECIFIED`, 131 // returns an `INVALID_ARGUMENT` error. 132 Type type = 1; 133 134 // The source of the document: a string containing the content or a 135 // Google Cloud Storage URI. 136 oneof source { 137 // The content of the input in string format. 138 // Cloud audit logging exempt since it is based on user data. 139 string content = 2; 140 141 // The Google Cloud Storage URI where the file content is located. 142 // This URI must be of the form: gs://bucket_name/object_name. For more 143 // details, see https://cloud.google.com/storage/docs/reference-uris. 144 // NOTE: Cloud Storage object versioning is not supported. 145 string gcs_content_uri = 3; 146 } 147 148 // The language of the document (if not specified, the language is 149 // automatically detected). Both ISO and BCP-47 language codes are 150 // accepted.<br> 151 // [Language 152 // Support](https://cloud.google.com/natural-language/docs/languages) lists 153 // currently supported languages for each API method. If the language (either 154 // specified by the caller or automatically detected) is not supported by the 155 // called API method, an `INVALID_ARGUMENT` error is returned. 156 string language = 4; 157} 158 159// Represents a sentence in the input document. 160message Sentence { 161 // The sentence text. 162 TextSpan text = 1; 163 164 // For calls to [AnalyzeSentiment][] or if 165 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment] 166 // is set to true, this field will contain the sentiment for the sentence. 167 Sentiment sentiment = 2; 168} 169 170// Represents the text encoding that the caller uses to process the output. 171// Providing an `EncodingType` is recommended because the API provides the 172// beginning offsets for various outputs, such as tokens and mentions, and 173// languages that natively use different text encodings may access offsets 174// differently. 175enum EncodingType { 176 // If `EncodingType` is not specified, encoding-dependent information (such as 177 // `begin_offset`) will be set at `-1`. 178 NONE = 0; 179 180 // Encoding-dependent information (such as `begin_offset`) is calculated based 181 // on the UTF-8 encoding of the input. C++ and Go are examples of languages 182 // that use this encoding natively. 183 UTF8 = 1; 184 185 // Encoding-dependent information (such as `begin_offset`) is calculated based 186 // on the UTF-16 encoding of the input. Java and JavaScript are examples of 187 // languages that use this encoding natively. 188 UTF16 = 2; 189 190 // Encoding-dependent information (such as `begin_offset`) is calculated based 191 // on the UTF-32 encoding of the input. Python is an example of a language 192 // that uses this encoding natively. 193 UTF32 = 3; 194} 195 196// Represents a phrase in the text that is a known entity, such as 197// a person, an organization, or location. The API associates information, such 198// as salience and mentions, with entities. 199message Entity { 200 // The type of the entity. For most entity types, the associated metadata is a 201 // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table 202 // below lists the associated fields for entities that have different 203 // metadata. 204 enum Type { 205 // Unknown 206 UNKNOWN = 0; 207 208 // Person 209 PERSON = 1; 210 211 // Location 212 LOCATION = 2; 213 214 // Organization 215 ORGANIZATION = 3; 216 217 // Event 218 EVENT = 4; 219 220 // Artwork 221 WORK_OF_ART = 5; 222 223 // Consumer product 224 CONSUMER_GOOD = 6; 225 226 // Other types of entities 227 OTHER = 7; 228 229 // Phone number 230 // 231 // The metadata lists the phone number, formatted according to local 232 // convention, plus whichever additional elements appear in the text: 233 // 234 // * `number` - the actual number, broken down into sections as per local 235 // convention 236 // * `national_prefix` - country code, if detected 237 // * `area_code` - region or area code, if detected 238 // * `extension` - phone extension (to be dialed after connection), if 239 // detected 240 PHONE_NUMBER = 9; 241 242 // Address 243 // 244 // The metadata identifies the street number and locality plus whichever 245 // additional elements appear in the text: 246 // 247 // * `street_number` - street number 248 // * `locality` - city or town 249 // * `street_name` - street/route name, if detected 250 // * `postal_code` - postal code, if detected 251 // * `country` - country, if detected< 252 // * `broad_region` - administrative area, such as the state, if detected 253 // * `narrow_region` - smaller administrative area, such as county, if 254 // detected 255 // * `sublocality` - used in Asian addresses to demark a district within a 256 // city, if detected 257 ADDRESS = 10; 258 259 // Date 260 // 261 // The metadata identifies the components of the date: 262 // 263 // * `year` - four digit year, if detected 264 // * `month` - two digit month number, if detected 265 // * `day` - two digit day number, if detected 266 DATE = 11; 267 268 // Number 269 // 270 // The metadata is the number itself. 271 NUMBER = 12; 272 273 // Price 274 // 275 // The metadata identifies the `value` and `currency`. 276 PRICE = 13; 277 } 278 279 // The representative name for the entity. 280 string name = 1; 281 282 // The entity type. 283 Type type = 2; 284 285 // Metadata associated with the entity. 286 // 287 // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) 288 // and Knowledge Graph MID (`mid`), if they are available. For the metadata 289 // associated with other entity types, see the Type table below. 290 map<string, string> metadata = 3; 291 292 // The salience score associated with the entity in the [0, 1.0] range. 293 // 294 // The salience score for an entity provides information about the 295 // importance or centrality of that entity to the entire document text. 296 // Scores closer to 0 are less salient, while scores closer to 1.0 are highly 297 // salient. 298 float salience = 4; 299 300 // The mentions of this entity in the input document. The API currently 301 // supports proper noun mentions. 302 repeated EntityMention mentions = 5; 303 304 // For calls to [AnalyzeEntitySentiment][] or if 305 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] 306 // is set to true, this field will contain the aggregate sentiment expressed 307 // for this entity in the provided document. 308 Sentiment sentiment = 6; 309} 310 311// Represents the smallest syntactic building block of the text. 312message Token { 313 // The token text. 314 TextSpan text = 1; 315 316 // Parts of speech tag for this token. 317 PartOfSpeech part_of_speech = 2; 318 319 // Dependency tree parse for this token. 320 DependencyEdge dependency_edge = 3; 321 322 // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. 323 string lemma = 4; 324} 325 326// Represents the feeling associated with the entire text or entities in 327// the text. 328message Sentiment { 329 // A non-negative number in the [0, +inf) range, which represents 330 // the absolute magnitude of sentiment regardless of score (positive or 331 // negative). 332 float magnitude = 2; 333 334 // Sentiment score between -1.0 (negative sentiment) and 1.0 335 // (positive sentiment). 336 float score = 3; 337} 338 339// Represents part of speech information for a token. Parts of speech 340// are as defined in 341// http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf 342message PartOfSpeech { 343 // The part of speech tags enum. 344 enum Tag { 345 // Unknown 346 UNKNOWN = 0; 347 348 // Adjective 349 ADJ = 1; 350 351 // Adposition (preposition and postposition) 352 ADP = 2; 353 354 // Adverb 355 ADV = 3; 356 357 // Conjunction 358 CONJ = 4; 359 360 // Determiner 361 DET = 5; 362 363 // Noun (common and proper) 364 NOUN = 6; 365 366 // Cardinal number 367 NUM = 7; 368 369 // Pronoun 370 PRON = 8; 371 372 // Particle or other function word 373 PRT = 9; 374 375 // Punctuation 376 PUNCT = 10; 377 378 // Verb (all tenses and modes) 379 VERB = 11; 380 381 // Other: foreign words, typos, abbreviations 382 X = 12; 383 384 // Affix 385 AFFIX = 13; 386 } 387 388 // The characteristic of a verb that expresses time flow during an event. 389 enum Aspect { 390 // Aspect is not applicable in the analyzed language or is not predicted. 391 ASPECT_UNKNOWN = 0; 392 393 // Perfective 394 PERFECTIVE = 1; 395 396 // Imperfective 397 IMPERFECTIVE = 2; 398 399 // Progressive 400 PROGRESSIVE = 3; 401 } 402 403 // The grammatical function performed by a noun or pronoun in a phrase, 404 // clause, or sentence. In some languages, other parts of speech, such as 405 // adjective and determiner, take case inflection in agreement with the noun. 406 enum Case { 407 // Case is not applicable in the analyzed language or is not predicted. 408 CASE_UNKNOWN = 0; 409 410 // Accusative 411 ACCUSATIVE = 1; 412 413 // Adverbial 414 ADVERBIAL = 2; 415 416 // Complementive 417 COMPLEMENTIVE = 3; 418 419 // Dative 420 DATIVE = 4; 421 422 // Genitive 423 GENITIVE = 5; 424 425 // Instrumental 426 INSTRUMENTAL = 6; 427 428 // Locative 429 LOCATIVE = 7; 430 431 // Nominative 432 NOMINATIVE = 8; 433 434 // Oblique 435 OBLIQUE = 9; 436 437 // Partitive 438 PARTITIVE = 10; 439 440 // Prepositional 441 PREPOSITIONAL = 11; 442 443 // Reflexive 444 REFLEXIVE_CASE = 12; 445 446 // Relative 447 RELATIVE_CASE = 13; 448 449 // Vocative 450 VOCATIVE = 14; 451 } 452 453 // Depending on the language, Form can be categorizing different forms of 454 // verbs, adjectives, adverbs, etc. For example, categorizing inflected 455 // endings of verbs and adjectives or distinguishing between short and long 456 // forms of adjectives and participles 457 enum Form { 458 // Form is not applicable in the analyzed language or is not predicted. 459 FORM_UNKNOWN = 0; 460 461 // Adnomial 462 ADNOMIAL = 1; 463 464 // Auxiliary 465 AUXILIARY = 2; 466 467 // Complementizer 468 COMPLEMENTIZER = 3; 469 470 // Final ending 471 FINAL_ENDING = 4; 472 473 // Gerund 474 GERUND = 5; 475 476 // Realis 477 REALIS = 6; 478 479 // Irrealis 480 IRREALIS = 7; 481 482 // Short form 483 SHORT = 8; 484 485 // Long form 486 LONG = 9; 487 488 // Order form 489 ORDER = 10; 490 491 // Specific form 492 SPECIFIC = 11; 493 } 494 495 // Gender classes of nouns reflected in the behaviour of associated words. 496 enum Gender { 497 // Gender is not applicable in the analyzed language or is not predicted. 498 GENDER_UNKNOWN = 0; 499 500 // Feminine 501 FEMININE = 1; 502 503 // Masculine 504 MASCULINE = 2; 505 506 // Neuter 507 NEUTER = 3; 508 } 509 510 // The grammatical feature of verbs, used for showing modality and attitude. 511 enum Mood { 512 // Mood is not applicable in the analyzed language or is not predicted. 513 MOOD_UNKNOWN = 0; 514 515 // Conditional 516 CONDITIONAL_MOOD = 1; 517 518 // Imperative 519 IMPERATIVE = 2; 520 521 // Indicative 522 INDICATIVE = 3; 523 524 // Interrogative 525 INTERROGATIVE = 4; 526 527 // Jussive 528 JUSSIVE = 5; 529 530 // Subjunctive 531 SUBJUNCTIVE = 6; 532 } 533 534 // Count distinctions. 535 enum Number { 536 // Number is not applicable in the analyzed language or is not predicted. 537 NUMBER_UNKNOWN = 0; 538 539 // Singular 540 SINGULAR = 1; 541 542 // Plural 543 PLURAL = 2; 544 545 // Dual 546 DUAL = 3; 547 } 548 549 // The distinction between the speaker, second person, third person, etc. 550 enum Person { 551 // Person is not applicable in the analyzed language or is not predicted. 552 PERSON_UNKNOWN = 0; 553 554 // First 555 FIRST = 1; 556 557 // Second 558 SECOND = 2; 559 560 // Third 561 THIRD = 3; 562 563 // Reflexive 564 REFLEXIVE_PERSON = 4; 565 } 566 567 // This category shows if the token is part of a proper name. 568 enum Proper { 569 // Proper is not applicable in the analyzed language or is not predicted. 570 PROPER_UNKNOWN = 0; 571 572 // Proper 573 PROPER = 1; 574 575 // Not proper 576 NOT_PROPER = 2; 577 } 578 579 // Reciprocal features of a pronoun. 580 enum Reciprocity { 581 // Reciprocity is not applicable in the analyzed language or is not 582 // predicted. 583 RECIPROCITY_UNKNOWN = 0; 584 585 // Reciprocal 586 RECIPROCAL = 1; 587 588 // Non-reciprocal 589 NON_RECIPROCAL = 2; 590 } 591 592 // Time reference. 593 enum Tense { 594 // Tense is not applicable in the analyzed language or is not predicted. 595 TENSE_UNKNOWN = 0; 596 597 // Conditional 598 CONDITIONAL_TENSE = 1; 599 600 // Future 601 FUTURE = 2; 602 603 // Past 604 PAST = 3; 605 606 // Present 607 PRESENT = 4; 608 609 // Imperfect 610 IMPERFECT = 5; 611 612 // Pluperfect 613 PLUPERFECT = 6; 614 } 615 616 // The relationship between the action that a verb expresses and the 617 // participants identified by its arguments. 618 enum Voice { 619 // Voice is not applicable in the analyzed language or is not predicted. 620 VOICE_UNKNOWN = 0; 621 622 // Active 623 ACTIVE = 1; 624 625 // Causative 626 CAUSATIVE = 2; 627 628 // Passive 629 PASSIVE = 3; 630 } 631 632 // The part of speech tag. 633 Tag tag = 1; 634 635 // The grammatical aspect. 636 Aspect aspect = 2; 637 638 // The grammatical case. 639 Case case = 3; 640 641 // The grammatical form. 642 Form form = 4; 643 644 // The grammatical gender. 645 Gender gender = 5; 646 647 // The grammatical mood. 648 Mood mood = 6; 649 650 // The grammatical number. 651 Number number = 7; 652 653 // The grammatical person. 654 Person person = 8; 655 656 // The grammatical properness. 657 Proper proper = 9; 658 659 // The grammatical reciprocity. 660 Reciprocity reciprocity = 10; 661 662 // The grammatical tense. 663 Tense tense = 11; 664 665 // The grammatical voice. 666 Voice voice = 12; 667} 668 669// Represents dependency parse tree information for a token. (For more 670// information on dependency labels, see 671// http://www.aclweb.org/anthology/P13-2017 672message DependencyEdge { 673 // The parse label enum for the token. 674 enum Label { 675 // Unknown 676 UNKNOWN = 0; 677 678 // Abbreviation modifier 679 ABBREV = 1; 680 681 // Adjectival complement 682 ACOMP = 2; 683 684 // Adverbial clause modifier 685 ADVCL = 3; 686 687 // Adverbial modifier 688 ADVMOD = 4; 689 690 // Adjectival modifier of an NP 691 AMOD = 5; 692 693 // Appositional modifier of an NP 694 APPOS = 6; 695 696 // Attribute dependent of a copular verb 697 ATTR = 7; 698 699 // Auxiliary (non-main) verb 700 AUX = 8; 701 702 // Passive auxiliary 703 AUXPASS = 9; 704 705 // Coordinating conjunction 706 CC = 10; 707 708 // Clausal complement of a verb or adjective 709 CCOMP = 11; 710 711 // Conjunct 712 CONJ = 12; 713 714 // Clausal subject 715 CSUBJ = 13; 716 717 // Clausal passive subject 718 CSUBJPASS = 14; 719 720 // Dependency (unable to determine) 721 DEP = 15; 722 723 // Determiner 724 DET = 16; 725 726 // Discourse 727 DISCOURSE = 17; 728 729 // Direct object 730 DOBJ = 18; 731 732 // Expletive 733 EXPL = 19; 734 735 // Goes with (part of a word in a text not well edited) 736 GOESWITH = 20; 737 738 // Indirect object 739 IOBJ = 21; 740 741 // Marker (word introducing a subordinate clause) 742 MARK = 22; 743 744 // Multi-word expression 745 MWE = 23; 746 747 // Multi-word verbal expression 748 MWV = 24; 749 750 // Negation modifier 751 NEG = 25; 752 753 // Noun compound modifier 754 NN = 26; 755 756 // Noun phrase used as an adverbial modifier 757 NPADVMOD = 27; 758 759 // Nominal subject 760 NSUBJ = 28; 761 762 // Passive nominal subject 763 NSUBJPASS = 29; 764 765 // Numeric modifier of a noun 766 NUM = 30; 767 768 // Element of compound number 769 NUMBER = 31; 770 771 // Punctuation mark 772 P = 32; 773 774 // Parataxis relation 775 PARATAXIS = 33; 776 777 // Participial modifier 778 PARTMOD = 34; 779 780 // The complement of a preposition is a clause 781 PCOMP = 35; 782 783 // Object of a preposition 784 POBJ = 36; 785 786 // Possession modifier 787 POSS = 37; 788 789 // Postverbal negative particle 790 POSTNEG = 38; 791 792 // Predicate complement 793 PRECOMP = 39; 794 795 // Preconjunt 796 PRECONJ = 40; 797 798 // Predeterminer 799 PREDET = 41; 800 801 // Prefix 802 PREF = 42; 803 804 // Prepositional modifier 805 PREP = 43; 806 807 // The relationship between a verb and verbal morpheme 808 PRONL = 44; 809 810 // Particle 811 PRT = 45; 812 813 // Associative or possessive marker 814 PS = 46; 815 816 // Quantifier phrase modifier 817 QUANTMOD = 47; 818 819 // Relative clause modifier 820 RCMOD = 48; 821 822 // Complementizer in relative clause 823 RCMODREL = 49; 824 825 // Ellipsis without a preceding predicate 826 RDROP = 50; 827 828 // Referent 829 REF = 51; 830 831 // Remnant 832 REMNANT = 52; 833 834 // Reparandum 835 REPARANDUM = 53; 836 837 // Root 838 ROOT = 54; 839 840 // Suffix specifying a unit of number 841 SNUM = 55; 842 843 // Suffix 844 SUFF = 56; 845 846 // Temporal modifier 847 TMOD = 57; 848 849 // Topic marker 850 TOPIC = 58; 851 852 // Clause headed by an infinite form of the verb that modifies a noun 853 VMOD = 59; 854 855 // Vocative 856 VOCATIVE = 60; 857 858 // Open clausal complement 859 XCOMP = 61; 860 861 // Name suffix 862 SUFFIX = 62; 863 864 // Name title 865 TITLE = 63; 866 867 // Adverbial phrase modifier 868 ADVPHMOD = 64; 869 870 // Causative auxiliary 871 AUXCAUS = 65; 872 873 // Helper auxiliary 874 AUXVV = 66; 875 876 // Rentaishi (Prenominal modifier) 877 DTMOD = 67; 878 879 // Foreign words 880 FOREIGN = 68; 881 882 // Keyword 883 KW = 69; 884 885 // List for chains of comparable items 886 LIST = 70; 887 888 // Nominalized clause 889 NOMC = 71; 890 891 // Nominalized clausal subject 892 NOMCSUBJ = 72; 893 894 // Nominalized clausal passive 895 NOMCSUBJPASS = 73; 896 897 // Compound of numeric modifier 898 NUMC = 74; 899 900 // Copula 901 COP = 75; 902 903 // Dislocated relation (for fronted/topicalized elements) 904 DISLOCATED = 76; 905 906 // Aspect marker 907 ASP = 77; 908 909 // Genitive modifier 910 GMOD = 78; 911 912 // Genitive object 913 GOBJ = 79; 914 915 // Infinitival modifier 916 INFMOD = 80; 917 918 // Measure 919 MES = 81; 920 921 // Nominal complement of a noun 922 NCOMP = 82; 923 } 924 925 // Represents the head of this token in the dependency tree. 926 // This is the index of the token which has an arc going to this token. 927 // The index is the position of the token in the array of tokens returned 928 // by the API method. If this token is a root token, then the 929 // `head_token_index` is its own index. 930 int32 head_token_index = 1; 931 932 // The parse label for the token. 933 Label label = 2; 934} 935 936// Represents a mention for an entity in the text. Currently, proper noun 937// mentions are supported. 938message EntityMention { 939 // The supported types of mentions. 940 enum Type { 941 // Unknown 942 TYPE_UNKNOWN = 0; 943 944 // Proper name 945 PROPER = 1; 946 947 // Common noun (or noun compound) 948 COMMON = 2; 949 } 950 951 // The mention text. 952 TextSpan text = 1; 953 954 // The type of the entity mention. 955 Type type = 2; 956 957 // For calls to [AnalyzeEntitySentiment][] or if 958 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] 959 // is set to true, this field will contain the sentiment expressed for this 960 // mention of the entity in the provided document. 961 Sentiment sentiment = 3; 962} 963 964// Represents an output piece of text. 965message TextSpan { 966 // The content of the output text. 967 string content = 1; 968 969 // The API calculates the beginning offset of the content in the original 970 // document according to the 971 // [EncodingType][google.cloud.language.v1.EncodingType] specified in the API 972 // request. 973 int32 begin_offset = 2; 974} 975 976// Represents a category returned from the text classifier. 977message ClassificationCategory { 978 // The name of the category representing the document. 979 string name = 1; 980 981 // The classifier's confidence of the category. Number represents how certain 982 // the classifier is that this category represents the given text. 983 float confidence = 2; 984} 985 986// Model options available for classification requests. 987message ClassificationModelOptions { 988 // Options for the V1 model. 989 message V1Model {} 990 991 // Options for the V2 model. 992 message V2Model { 993 // The content categories used for classification. 994 enum ContentCategoriesVersion { 995 // If `ContentCategoriesVersion` is not specified, this option will 996 // default to `V1`. 997 CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0; 998 999 // Legacy content categories of our initial launch in 2017. 1000 V1 = 1; 1001 1002 // Updated content categories in 2022. 1003 V2 = 2; 1004 } 1005 1006 // The content categories used for classification. 1007 ContentCategoriesVersion content_categories_version = 1; 1008 } 1009 1010 // If this field is not set, then the `v1_model` will be used by default. 1011 oneof model_type { 1012 // Setting this field will use the V1 model and V1 content categories 1013 // version. The V1 model is a legacy model; support for this will be 1014 // discontinued in the future. 1015 V1Model v1_model = 1; 1016 1017 // Setting this field will use the V2 model with the appropriate content 1018 // categories version. The V2 model is a better performing model. 1019 V2Model v2_model = 2; 1020 } 1021} 1022 1023// The sentiment analysis request message. 1024message AnalyzeSentimentRequest { 1025 // Required. Input document. 1026 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1027 1028 // The encoding type used by the API to calculate sentence offsets. 1029 EncodingType encoding_type = 2; 1030} 1031 1032// The sentiment analysis response message. 1033message AnalyzeSentimentResponse { 1034 // The overall sentiment of the input document. 1035 Sentiment document_sentiment = 1; 1036 1037 // The language of the text, which will be the same as the language specified 1038 // in the request or, if not specified, the automatically-detected language. 1039 // See [Document.language][google.cloud.language.v1.Document.language] field 1040 // for more details. 1041 string language = 2; 1042 1043 // The sentiment for all the sentences in the document. 1044 repeated Sentence sentences = 3; 1045} 1046 1047// The entity-level sentiment analysis request message. 1048message AnalyzeEntitySentimentRequest { 1049 // Required. Input document. 1050 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1051 1052 // The encoding type used by the API to calculate offsets. 1053 EncodingType encoding_type = 2; 1054} 1055 1056// The entity-level sentiment analysis response message. 1057message AnalyzeEntitySentimentResponse { 1058 // The recognized entities in the input document with associated sentiments. 1059 repeated Entity entities = 1; 1060 1061 // The language of the text, which will be the same as the language specified 1062 // in the request or, if not specified, the automatically-detected language. 1063 // See [Document.language][google.cloud.language.v1.Document.language] field 1064 // for more details. 1065 string language = 2; 1066} 1067 1068// The entity analysis request message. 1069message AnalyzeEntitiesRequest { 1070 // Required. Input document. 1071 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1072 1073 // The encoding type used by the API to calculate offsets. 1074 EncodingType encoding_type = 2; 1075} 1076 1077// The entity analysis response message. 1078message AnalyzeEntitiesResponse { 1079 // The recognized entities in the input document. 1080 repeated Entity entities = 1; 1081 1082 // The language of the text, which will be the same as the language specified 1083 // in the request or, if not specified, the automatically-detected language. 1084 // See [Document.language][google.cloud.language.v1.Document.language] field 1085 // for more details. 1086 string language = 2; 1087} 1088 1089// The syntax analysis request message. 1090message AnalyzeSyntaxRequest { 1091 // Required. Input document. 1092 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1093 1094 // The encoding type used by the API to calculate offsets. 1095 EncodingType encoding_type = 2; 1096} 1097 1098// The syntax analysis response message. 1099message AnalyzeSyntaxResponse { 1100 // Sentences in the input document. 1101 repeated Sentence sentences = 1; 1102 1103 // Tokens, along with their syntactic information, in the input document. 1104 repeated Token tokens = 2; 1105 1106 // The language of the text, which will be the same as the language specified 1107 // in the request or, if not specified, the automatically-detected language. 1108 // See [Document.language][google.cloud.language.v1.Document.language] field 1109 // for more details. 1110 string language = 3; 1111} 1112 1113// The document classification request message. 1114message ClassifyTextRequest { 1115 // Required. Input document. 1116 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1117 1118 // Model options to use for classification. Defaults to v1 options if not 1119 // specified. 1120 ClassificationModelOptions classification_model_options = 3; 1121} 1122 1123// The document classification response message. 1124message ClassifyTextResponse { 1125 // Categories representing the input document. 1126 repeated ClassificationCategory categories = 1; 1127} 1128 1129// The document moderation request message. 1130message ModerateTextRequest { 1131 // Required. Input document. 1132 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1133} 1134 1135// The document moderation response message. 1136message ModerateTextResponse { 1137 // Harmful and sensitive categories representing the input document. 1138 repeated ClassificationCategory moderation_categories = 1; 1139} 1140 1141// The request message for the text annotation API, which can perform multiple 1142// analysis types (sentiment, entities, and syntax) in one call. 1143message AnnotateTextRequest { 1144 // All available features for sentiment, syntax, and semantic analysis. 1145 // Setting each one to true will enable that specific analysis for the input. 1146 message Features { 1147 // Extract syntax information. 1148 bool extract_syntax = 1; 1149 1150 // Extract entities. 1151 bool extract_entities = 2; 1152 1153 // Extract document-level sentiment. 1154 bool extract_document_sentiment = 3; 1155 1156 // Extract entities and their associated sentiment. 1157 bool extract_entity_sentiment = 4; 1158 1159 // Classify the full document into categories. 1160 bool classify_text = 6; 1161 1162 // Moderate the document for harmful and sensitive categories. 1163 bool moderate_text = 11; 1164 1165 // The model options to use for classification. Defaults to v1 options 1166 // if not specified. Only used if `classify_text` is set to true. 1167 ClassificationModelOptions classification_model_options = 10; 1168 } 1169 1170 // Required. Input document. 1171 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1172 1173 // Required. The enabled features. 1174 Features features = 2 [(google.api.field_behavior) = REQUIRED]; 1175 1176 // The encoding type used by the API to calculate offsets. 1177 EncodingType encoding_type = 3; 1178} 1179 1180// The text annotations response message. 1181message AnnotateTextResponse { 1182 // Sentences in the input document. Populated if the user enables 1183 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax]. 1184 repeated Sentence sentences = 1; 1185 1186 // Tokens, along with their syntactic information, in the input document. 1187 // Populated if the user enables 1188 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax]. 1189 repeated Token tokens = 2; 1190 1191 // Entities, along with their semantic information, in the input document. 1192 // Populated if the user enables 1193 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities]. 1194 repeated Entity entities = 3; 1195 1196 // The overall sentiment for the document. Populated if the user enables 1197 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment]. 1198 Sentiment document_sentiment = 4; 1199 1200 // The language of the text, which will be the same as the language specified 1201 // in the request or, if not specified, the automatically-detected language. 1202 // See [Document.language][google.cloud.language.v1.Document.language] field 1203 // for more details. 1204 string language = 5; 1205 1206 // Categories identified in the input document. 1207 repeated ClassificationCategory categories = 6; 1208 1209 // Harmful and sensitive categories identified in the input document. 1210 repeated ClassificationCategory moderation_categories = 7; 1211} 1212