1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.language.v1beta2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22 23option go_package = "cloud.google.com/go/language/apiv1beta2/languagepb;languagepb"; 24option java_multiple_files = true; 25option java_outer_classname = "LanguageServiceProto"; 26option java_package = "com.google.cloud.language.v1beta2"; 27 28// Provides text analysis operations such as sentiment analysis and entity 29// recognition. 30service LanguageService { 31 option (google.api.default_host) = "language.googleapis.com"; 32 option (google.api.oauth_scopes) = 33 "https://www.googleapis.com/auth/cloud-language," 34 "https://www.googleapis.com/auth/cloud-platform"; 35 36 // Analyzes the sentiment of the provided text. 37 rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) { 38 option (google.api.http) = { 39 post: "/v1beta2/documents:analyzeSentiment" 40 body: "*" 41 }; 42 option (google.api.method_signature) = "document,encoding_type"; 43 option (google.api.method_signature) = "document"; 44 } 45 46 // Finds named entities (currently proper names and common nouns) in the text 47 // along with entity types, salience, mentions for each entity, and 48 // other properties. 49 rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) { 50 option (google.api.http) = { 51 post: "/v1beta2/documents:analyzeEntities" 52 body: "*" 53 }; 54 option (google.api.method_signature) = "document,encoding_type"; 55 option (google.api.method_signature) = "document"; 56 } 57 58 // Finds entities, similar to [AnalyzeEntities][google.cloud.language.v1beta2.LanguageService.AnalyzeEntities] in the text and analyzes 59 // sentiment associated with each entity and its mentions. 60 rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) returns (AnalyzeEntitySentimentResponse) { 61 option (google.api.http) = { 62 post: "/v1beta2/documents:analyzeEntitySentiment" 63 body: "*" 64 }; 65 option (google.api.method_signature) = "document,encoding_type"; 66 option (google.api.method_signature) = "document"; 67 } 68 69 // Analyzes the syntax of the text and provides sentence boundaries and 70 // tokenization along with part of speech tags, dependency trees, and other 71 // properties. 72 rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { 73 option (google.api.http) = { 74 post: "/v1beta2/documents:analyzeSyntax" 75 body: "*" 76 }; 77 option (google.api.method_signature) = "document,encoding_type"; 78 option (google.api.method_signature) = "document"; 79 } 80 81 // Classifies a document into categories. 82 rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { 83 option (google.api.http) = { 84 post: "/v1beta2/documents:classifyText" 85 body: "*" 86 }; 87 option (google.api.method_signature) = "document"; 88 } 89 90 // A convenience method that provides all syntax, sentiment, entity, and 91 // classification features in one call. 92 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { 93 option (google.api.http) = { 94 post: "/v1beta2/documents:annotateText" 95 body: "*" 96 }; 97 option (google.api.method_signature) = "document,features,encoding_type"; 98 option (google.api.method_signature) = "document,features"; 99 } 100} 101 102// Represents the input to API methods. 103message Document { 104 // The document types enum. 105 enum Type { 106 // The content type is not specified. 107 TYPE_UNSPECIFIED = 0; 108 109 // Plain text 110 PLAIN_TEXT = 1; 111 112 // HTML 113 HTML = 2; 114 } 115 116 // Ways of handling boilerplate detected in the document 117 enum BoilerplateHandling { 118 // The boilerplate handling is not specified. 119 BOILERPLATE_HANDLING_UNSPECIFIED = 0; 120 121 // Do not analyze detected boilerplate. Reference web URI is required for 122 // detecting boilerplate. 123 SKIP_BOILERPLATE = 1; 124 125 // Treat boilerplate the same as content. 126 KEEP_BOILERPLATE = 2; 127 } 128 129 // Required. If the type is not set or is `TYPE_UNSPECIFIED`, 130 // returns an `INVALID_ARGUMENT` error. 131 Type type = 1; 132 133 // The source of the document: a string containing the content or a 134 // Google Cloud Storage URI. 135 oneof source { 136 // The content of the input in string format. 137 // Cloud audit logging exempt since it is based on user data. 138 string content = 2; 139 140 // The Google Cloud Storage URI where the file content is located. 141 // This URI must be of the form: gs://bucket_name/object_name. For more 142 // details, see https://cloud.google.com/storage/docs/reference-uris. 143 // NOTE: Cloud Storage object versioning is not supported. 144 string gcs_content_uri = 3; 145 } 146 147 // The language of the document (if not specified, the language is 148 // automatically detected). Both ISO and BCP-47 language codes are 149 // accepted.<br> 150 // [Language 151 // Support](https://cloud.google.com/natural-language/docs/languages) lists 152 // currently supported languages for each API method. If the language (either 153 // specified by the caller or automatically detected) is not supported by the 154 // called API method, an `INVALID_ARGUMENT` error is returned. 155 string language = 4; 156 157 // The web URI where the document comes from. This URI is not used for 158 // fetching the content, but as a hint for analyzing the document. 159 string reference_web_uri = 5; 160 161 // Indicates how detected boilerplate(e.g. advertisements, copyright 162 // declarations, banners) should be handled for this document. If not 163 // specified, boilerplate will be treated the same as content. 164 BoilerplateHandling boilerplate_handling = 6; 165} 166 167// Represents a sentence in the input document. 168message Sentence { 169 // The sentence text. 170 TextSpan text = 1; 171 172 // For calls to [AnalyzeSentiment][] or if 173 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment] is set to 174 // true, this field will contain the sentiment for the sentence. 175 Sentiment sentiment = 2; 176} 177 178// Represents the text encoding that the caller uses to process the output. 179// Providing an `EncodingType` is recommended because the API provides the 180// beginning offsets for various outputs, such as tokens and mentions, and 181// languages that natively use different text encodings may access offsets 182// differently. 183enum EncodingType { 184 // If `EncodingType` is not specified, encoding-dependent information (such as 185 // `begin_offset`) will be set at `-1`. 186 NONE = 0; 187 188 // Encoding-dependent information (such as `begin_offset`) is calculated based 189 // on the UTF-8 encoding of the input. C++ and Go are examples of languages 190 // that use this encoding natively. 191 UTF8 = 1; 192 193 // Encoding-dependent information (such as `begin_offset`) is calculated based 194 // on the UTF-16 encoding of the input. Java and JavaScript are examples of 195 // languages that use this encoding natively. 196 UTF16 = 2; 197 198 // Encoding-dependent information (such as `begin_offset`) is calculated based 199 // on the UTF-32 encoding of the input. Python is an example of a language 200 // that uses this encoding natively. 201 UTF32 = 3; 202} 203 204// Represents a phrase in the text that is a known entity, such as 205// a person, an organization, or location. The API associates information, such 206// as salience and mentions, with entities. 207message Entity { 208 // The type of the entity. For most entity types, the associated metadata is a 209 // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table 210 // below lists the associated fields for entities that have different 211 // metadata. 212 enum Type { 213 // Unknown 214 UNKNOWN = 0; 215 216 // Person 217 PERSON = 1; 218 219 // Location 220 LOCATION = 2; 221 222 // Organization 223 ORGANIZATION = 3; 224 225 // Event 226 EVENT = 4; 227 228 // Artwork 229 WORK_OF_ART = 5; 230 231 // Consumer product 232 CONSUMER_GOOD = 6; 233 234 // Other types of entities 235 OTHER = 7; 236 237 // Phone number 238 // 239 // The metadata lists the phone number, formatted according to local 240 // convention, plus whichever additional elements appear in the text: 241 // 242 // * `number` - the actual number, broken down into sections as per local 243 // convention 244 // * `national_prefix` - country code, if detected 245 // * `area_code` - region or area code, if detected 246 // * `extension` - phone extension (to be dialed after connection), if 247 // detected 248 PHONE_NUMBER = 9; 249 250 // Address 251 // 252 // The metadata identifies the street number and locality plus whichever 253 // additional elements appear in the text: 254 // 255 // * `street_number` - street number 256 // * `locality` - city or town 257 // * `street_name` - street/route name, if detected 258 // * `postal_code` - postal code, if detected 259 // * `country` - country, if detected< 260 // * `broad_region` - administrative area, such as the state, if detected 261 // * `narrow_region` - smaller administrative area, such as county, if 262 // detected 263 // * `sublocality` - used in Asian addresses to demark a district within a 264 // city, if detected 265 ADDRESS = 10; 266 267 // Date 268 // 269 // The metadata identifies the components of the date: 270 // 271 // * `year` - four digit year, if detected 272 // * `month` - two digit month number, if detected 273 // * `day` - two digit day number, if detected 274 DATE = 11; 275 276 // Number 277 // 278 // The metadata is the number itself. 279 NUMBER = 12; 280 281 // Price 282 // 283 // The metadata identifies the `value` and `currency`. 284 PRICE = 13; 285 } 286 287 // The representative name for the entity. 288 string name = 1; 289 290 // The entity type. 291 Type type = 2; 292 293 // Metadata associated with the entity. 294 // 295 // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) 296 // and Knowledge Graph MID (`mid`), if they are available. For the metadata 297 // associated with other entity types, see the Type table below. 298 map<string, string> metadata = 3; 299 300 // The salience score associated with the entity in the [0, 1.0] range. 301 // 302 // The salience score for an entity provides information about the 303 // importance or centrality of that entity to the entire document text. 304 // Scores closer to 0 are less salient, while scores closer to 1.0 are highly 305 // salient. 306 float salience = 4; 307 308 // The mentions of this entity in the input document. The API currently 309 // supports proper noun mentions. 310 repeated EntityMention mentions = 5; 311 312 // For calls to [AnalyzeEntitySentiment][] or if 313 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] is set to 314 // true, this field will contain the aggregate sentiment expressed for this 315 // entity in the provided document. 316 Sentiment sentiment = 6; 317} 318 319// Represents the smallest syntactic building block of the text. 320message Token { 321 // The token text. 322 TextSpan text = 1; 323 324 // Parts of speech tag for this token. 325 PartOfSpeech part_of_speech = 2; 326 327 // Dependency tree parse for this token. 328 DependencyEdge dependency_edge = 3; 329 330 // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. 331 string lemma = 4; 332} 333 334// Represents the feeling associated with the entire text or entities in 335// the text. 336// Next ID: 6 337message Sentiment { 338 // A non-negative number in the [0, +inf) range, which represents 339 // the absolute magnitude of sentiment regardless of score (positive or 340 // negative). 341 float magnitude = 2; 342 343 // Sentiment score between -1.0 (negative sentiment) and 1.0 344 // (positive sentiment). 345 float score = 3; 346} 347 348// Represents part of speech information for a token. 349message PartOfSpeech { 350 // The part of speech tags enum. 351 enum Tag { 352 // Unknown 353 UNKNOWN = 0; 354 355 // Adjective 356 ADJ = 1; 357 358 // Adposition (preposition and postposition) 359 ADP = 2; 360 361 // Adverb 362 ADV = 3; 363 364 // Conjunction 365 CONJ = 4; 366 367 // Determiner 368 DET = 5; 369 370 // Noun (common and proper) 371 NOUN = 6; 372 373 // Cardinal number 374 NUM = 7; 375 376 // Pronoun 377 PRON = 8; 378 379 // Particle or other function word 380 PRT = 9; 381 382 // Punctuation 383 PUNCT = 10; 384 385 // Verb (all tenses and modes) 386 VERB = 11; 387 388 // Other: foreign words, typos, abbreviations 389 X = 12; 390 391 // Affix 392 AFFIX = 13; 393 } 394 395 // The characteristic of a verb that expresses time flow during an event. 396 enum Aspect { 397 // Aspect is not applicable in the analyzed language or is not predicted. 398 ASPECT_UNKNOWN = 0; 399 400 // Perfective 401 PERFECTIVE = 1; 402 403 // Imperfective 404 IMPERFECTIVE = 2; 405 406 // Progressive 407 PROGRESSIVE = 3; 408 } 409 410 // The grammatical function performed by a noun or pronoun in a phrase, 411 // clause, or sentence. In some languages, other parts of speech, such as 412 // adjective and determiner, take case inflection in agreement with the noun. 413 enum Case { 414 // Case is not applicable in the analyzed language or is not predicted. 415 CASE_UNKNOWN = 0; 416 417 // Accusative 418 ACCUSATIVE = 1; 419 420 // Adverbial 421 ADVERBIAL = 2; 422 423 // Complementive 424 COMPLEMENTIVE = 3; 425 426 // Dative 427 DATIVE = 4; 428 429 // Genitive 430 GENITIVE = 5; 431 432 // Instrumental 433 INSTRUMENTAL = 6; 434 435 // Locative 436 LOCATIVE = 7; 437 438 // Nominative 439 NOMINATIVE = 8; 440 441 // Oblique 442 OBLIQUE = 9; 443 444 // Partitive 445 PARTITIVE = 10; 446 447 // Prepositional 448 PREPOSITIONAL = 11; 449 450 // Reflexive 451 REFLEXIVE_CASE = 12; 452 453 // Relative 454 RELATIVE_CASE = 13; 455 456 // Vocative 457 VOCATIVE = 14; 458 } 459 460 // Depending on the language, Form can be categorizing different forms of 461 // verbs, adjectives, adverbs, etc. For example, categorizing inflected 462 // endings of verbs and adjectives or distinguishing between short and long 463 // forms of adjectives and participles 464 enum Form { 465 // Form is not applicable in the analyzed language or is not predicted. 466 FORM_UNKNOWN = 0; 467 468 // Adnomial 469 ADNOMIAL = 1; 470 471 // Auxiliary 472 AUXILIARY = 2; 473 474 // Complementizer 475 COMPLEMENTIZER = 3; 476 477 // Final ending 478 FINAL_ENDING = 4; 479 480 // Gerund 481 GERUND = 5; 482 483 // Realis 484 REALIS = 6; 485 486 // Irrealis 487 IRREALIS = 7; 488 489 // Short form 490 SHORT = 8; 491 492 // Long form 493 LONG = 9; 494 495 // Order form 496 ORDER = 10; 497 498 // Specific form 499 SPECIFIC = 11; 500 } 501 502 // Gender classes of nouns reflected in the behaviour of associated words. 503 enum Gender { 504 // Gender is not applicable in the analyzed language or is not predicted. 505 GENDER_UNKNOWN = 0; 506 507 // Feminine 508 FEMININE = 1; 509 510 // Masculine 511 MASCULINE = 2; 512 513 // Neuter 514 NEUTER = 3; 515 } 516 517 // The grammatical feature of verbs, used for showing modality and attitude. 518 enum Mood { 519 // Mood is not applicable in the analyzed language or is not predicted. 520 MOOD_UNKNOWN = 0; 521 522 // Conditional 523 CONDITIONAL_MOOD = 1; 524 525 // Imperative 526 IMPERATIVE = 2; 527 528 // Indicative 529 INDICATIVE = 3; 530 531 // Interrogative 532 INTERROGATIVE = 4; 533 534 // Jussive 535 JUSSIVE = 5; 536 537 // Subjunctive 538 SUBJUNCTIVE = 6; 539 } 540 541 // Count distinctions. 542 enum Number { 543 // Number is not applicable in the analyzed language or is not predicted. 544 NUMBER_UNKNOWN = 0; 545 546 // Singular 547 SINGULAR = 1; 548 549 // Plural 550 PLURAL = 2; 551 552 // Dual 553 DUAL = 3; 554 } 555 556 // The distinction between the speaker, second person, third person, etc. 557 enum Person { 558 // Person is not applicable in the analyzed language or is not predicted. 559 PERSON_UNKNOWN = 0; 560 561 // First 562 FIRST = 1; 563 564 // Second 565 SECOND = 2; 566 567 // Third 568 THIRD = 3; 569 570 // Reflexive 571 REFLEXIVE_PERSON = 4; 572 } 573 574 // This category shows if the token is part of a proper name. 575 enum Proper { 576 // Proper is not applicable in the analyzed language or is not predicted. 577 PROPER_UNKNOWN = 0; 578 579 // Proper 580 PROPER = 1; 581 582 // Not proper 583 NOT_PROPER = 2; 584 } 585 586 // Reciprocal features of a pronoun. 587 enum Reciprocity { 588 // Reciprocity is not applicable in the analyzed language or is not 589 // predicted. 590 RECIPROCITY_UNKNOWN = 0; 591 592 // Reciprocal 593 RECIPROCAL = 1; 594 595 // Non-reciprocal 596 NON_RECIPROCAL = 2; 597 } 598 599 // Time reference. 600 enum Tense { 601 // Tense is not applicable in the analyzed language or is not predicted. 602 TENSE_UNKNOWN = 0; 603 604 // Conditional 605 CONDITIONAL_TENSE = 1; 606 607 // Future 608 FUTURE = 2; 609 610 // Past 611 PAST = 3; 612 613 // Present 614 PRESENT = 4; 615 616 // Imperfect 617 IMPERFECT = 5; 618 619 // Pluperfect 620 PLUPERFECT = 6; 621 } 622 623 // The relationship between the action that a verb expresses and the 624 // participants identified by its arguments. 625 enum Voice { 626 // Voice is not applicable in the analyzed language or is not predicted. 627 VOICE_UNKNOWN = 0; 628 629 // Active 630 ACTIVE = 1; 631 632 // Causative 633 CAUSATIVE = 2; 634 635 // Passive 636 PASSIVE = 3; 637 } 638 639 // The part of speech tag. 640 Tag tag = 1; 641 642 // The grammatical aspect. 643 Aspect aspect = 2; 644 645 // The grammatical case. 646 Case case = 3; 647 648 // The grammatical form. 649 Form form = 4; 650 651 // The grammatical gender. 652 Gender gender = 5; 653 654 // The grammatical mood. 655 Mood mood = 6; 656 657 // The grammatical number. 658 Number number = 7; 659 660 // The grammatical person. 661 Person person = 8; 662 663 // The grammatical properness. 664 Proper proper = 9; 665 666 // The grammatical reciprocity. 667 Reciprocity reciprocity = 10; 668 669 // The grammatical tense. 670 Tense tense = 11; 671 672 // The grammatical voice. 673 Voice voice = 12; 674} 675 676// Represents dependency parse tree information for a token. 677message DependencyEdge { 678 // The parse label enum for the token. 679 enum Label { 680 // Unknown 681 UNKNOWN = 0; 682 683 // Abbreviation modifier 684 ABBREV = 1; 685 686 // Adjectival complement 687 ACOMP = 2; 688 689 // Adverbial clause modifier 690 ADVCL = 3; 691 692 // Adverbial modifier 693 ADVMOD = 4; 694 695 // Adjectival modifier of an NP 696 AMOD = 5; 697 698 // Appositional modifier of an NP 699 APPOS = 6; 700 701 // Attribute dependent of a copular verb 702 ATTR = 7; 703 704 // Auxiliary (non-main) verb 705 AUX = 8; 706 707 // Passive auxiliary 708 AUXPASS = 9; 709 710 // Coordinating conjunction 711 CC = 10; 712 713 // Clausal complement of a verb or adjective 714 CCOMP = 11; 715 716 // Conjunct 717 CONJ = 12; 718 719 // Clausal subject 720 CSUBJ = 13; 721 722 // Clausal passive subject 723 CSUBJPASS = 14; 724 725 // Dependency (unable to determine) 726 DEP = 15; 727 728 // Determiner 729 DET = 16; 730 731 // Discourse 732 DISCOURSE = 17; 733 734 // Direct object 735 DOBJ = 18; 736 737 // Expletive 738 EXPL = 19; 739 740 // Goes with (part of a word in a text not well edited) 741 GOESWITH = 20; 742 743 // Indirect object 744 IOBJ = 21; 745 746 // Marker (word introducing a subordinate clause) 747 MARK = 22; 748 749 // Multi-word expression 750 MWE = 23; 751 752 // Multi-word verbal expression 753 MWV = 24; 754 755 // Negation modifier 756 NEG = 25; 757 758 // Noun compound modifier 759 NN = 26; 760 761 // Noun phrase used as an adverbial modifier 762 NPADVMOD = 27; 763 764 // Nominal subject 765 NSUBJ = 28; 766 767 // Passive nominal subject 768 NSUBJPASS = 29; 769 770 // Numeric modifier of a noun 771 NUM = 30; 772 773 // Element of compound number 774 NUMBER = 31; 775 776 // Punctuation mark 777 P = 32; 778 779 // Parataxis relation 780 PARATAXIS = 33; 781 782 // Participial modifier 783 PARTMOD = 34; 784 785 // The complement of a preposition is a clause 786 PCOMP = 35; 787 788 // Object of a preposition 789 POBJ = 36; 790 791 // Possession modifier 792 POSS = 37; 793 794 // Postverbal negative particle 795 POSTNEG = 38; 796 797 // Predicate complement 798 PRECOMP = 39; 799 800 // Preconjunt 801 PRECONJ = 40; 802 803 // Predeterminer 804 PREDET = 41; 805 806 // Prefix 807 PREF = 42; 808 809 // Prepositional modifier 810 PREP = 43; 811 812 // The relationship between a verb and verbal morpheme 813 PRONL = 44; 814 815 // Particle 816 PRT = 45; 817 818 // Associative or possessive marker 819 PS = 46; 820 821 // Quantifier phrase modifier 822 QUANTMOD = 47; 823 824 // Relative clause modifier 825 RCMOD = 48; 826 827 // Complementizer in relative clause 828 RCMODREL = 49; 829 830 // Ellipsis without a preceding predicate 831 RDROP = 50; 832 833 // Referent 834 REF = 51; 835 836 // Remnant 837 REMNANT = 52; 838 839 // Reparandum 840 REPARANDUM = 53; 841 842 // Root 843 ROOT = 54; 844 845 // Suffix specifying a unit of number 846 SNUM = 55; 847 848 // Suffix 849 SUFF = 56; 850 851 // Temporal modifier 852 TMOD = 57; 853 854 // Topic marker 855 TOPIC = 58; 856 857 // Clause headed by an infinite form of the verb that modifies a noun 858 VMOD = 59; 859 860 // Vocative 861 VOCATIVE = 60; 862 863 // Open clausal complement 864 XCOMP = 61; 865 866 // Name suffix 867 SUFFIX = 62; 868 869 // Name title 870 TITLE = 63; 871 872 // Adverbial phrase modifier 873 ADVPHMOD = 64; 874 875 // Causative auxiliary 876 AUXCAUS = 65; 877 878 // Helper auxiliary 879 AUXVV = 66; 880 881 // Rentaishi (Prenominal modifier) 882 DTMOD = 67; 883 884 // Foreign words 885 FOREIGN = 68; 886 887 // Keyword 888 KW = 69; 889 890 // List for chains of comparable items 891 LIST = 70; 892 893 // Nominalized clause 894 NOMC = 71; 895 896 // Nominalized clausal subject 897 NOMCSUBJ = 72; 898 899 // Nominalized clausal passive 900 NOMCSUBJPASS = 73; 901 902 // Compound of numeric modifier 903 NUMC = 74; 904 905 // Copula 906 COP = 75; 907 908 // Dislocated relation (for fronted/topicalized elements) 909 DISLOCATED = 76; 910 911 // Aspect marker 912 ASP = 77; 913 914 // Genitive modifier 915 GMOD = 78; 916 917 // Genitive object 918 GOBJ = 79; 919 920 // Infinitival modifier 921 INFMOD = 80; 922 923 // Measure 924 MES = 81; 925 926 // Nominal complement of a noun 927 NCOMP = 82; 928 } 929 930 // Represents the head of this token in the dependency tree. 931 // This is the index of the token which has an arc going to this token. 932 // The index is the position of the token in the array of tokens returned 933 // by the API method. If this token is a root token, then the 934 // `head_token_index` is its own index. 935 int32 head_token_index = 1; 936 937 // The parse label for the token. 938 Label label = 2; 939} 940 941// Represents a mention for an entity in the text. Currently, proper noun 942// mentions are supported. 943message EntityMention { 944 // The supported types of mentions. 945 enum Type { 946 // Unknown 947 TYPE_UNKNOWN = 0; 948 949 // Proper name 950 PROPER = 1; 951 952 // Common noun (or noun compound) 953 COMMON = 2; 954 } 955 956 // The mention text. 957 TextSpan text = 1; 958 959 // The type of the entity mention. 960 Type type = 2; 961 962 // For calls to [AnalyzeEntitySentiment][] or if 963 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] is set to 964 // true, this field will contain the sentiment expressed for this mention of 965 // the entity in the provided document. 966 Sentiment sentiment = 3; 967} 968 969// Represents an output piece of text. 970message TextSpan { 971 // The content of the output text. 972 string content = 1; 973 974 // The API calculates the beginning offset of the content in the original 975 // document according to the [EncodingType][google.cloud.language.v1beta2.EncodingType] specified in the API request. 976 int32 begin_offset = 2; 977} 978 979// Represents a category returned from the text classifier. 980message ClassificationCategory { 981 // The name of the category representing the document, from the [predefined 982 // taxonomy](https://cloud.google.com/natural-language/docs/categories). 983 string name = 1; 984 985 // The classifier's confidence of the category. Number represents how certain 986 // the classifier is that this category represents the given text. 987 float confidence = 2; 988} 989 990// Model options available for classification requests. 991message ClassificationModelOptions { 992 // Options for the V1 model. 993 message V1Model { 994 995 } 996 997 // Options for the V2 model. 998 message V2Model { 999 // The content categories used for classification. 1000 enum ContentCategoriesVersion { 1001 // If `ContentCategoriesVersion` is not specified, this option will 1002 // default to `V1`. 1003 CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0; 1004 1005 // Legacy content categories of our initial launch in 2017. 1006 V1 = 1; 1007 1008 // Updated content categories in 2022. 1009 V2 = 2; 1010 } 1011 1012 // The content categories used for classification. 1013 ContentCategoriesVersion content_categories_version = 1; 1014 } 1015 1016 // If this field is not set, then the `v1_model` will be used by default. 1017 oneof model_type { 1018 // Setting this field will use the V1 model and V1 content categories 1019 // version. The V1 model is a legacy model; support for this will be 1020 // discontinued in the future. 1021 V1Model v1_model = 1; 1022 1023 // Setting this field will use the V2 model with the appropriate content 1024 // categories version. The V2 model is a better performing model. 1025 V2Model v2_model = 2; 1026 } 1027} 1028 1029// The sentiment analysis request message. 1030message AnalyzeSentimentRequest { 1031 // Required. Input document. 1032 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1033 1034 // The encoding type used by the API to calculate sentence offsets for the 1035 // sentence sentiment. 1036 EncodingType encoding_type = 2; 1037} 1038 1039// The sentiment analysis response message. 1040message AnalyzeSentimentResponse { 1041 // The overall sentiment of the input document. 1042 Sentiment document_sentiment = 1; 1043 1044 // The language of the text, which will be the same as the language specified 1045 // in the request or, if not specified, the automatically-detected language. 1046 // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details. 1047 string language = 2; 1048 1049 // The sentiment for all the sentences in the document. 1050 repeated Sentence sentences = 3; 1051} 1052 1053// The entity-level sentiment analysis request message. 1054message AnalyzeEntitySentimentRequest { 1055 // Required. Input document. 1056 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1057 1058 // The encoding type used by the API to calculate offsets. 1059 EncodingType encoding_type = 2; 1060} 1061 1062// The entity-level sentiment analysis response message. 1063message AnalyzeEntitySentimentResponse { 1064 // The recognized entities in the input document with associated sentiments. 1065 repeated Entity entities = 1; 1066 1067 // The language of the text, which will be the same as the language specified 1068 // in the request or, if not specified, the automatically-detected language. 1069 // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details. 1070 string language = 2; 1071} 1072 1073// The entity analysis request message. 1074message AnalyzeEntitiesRequest { 1075 // Required. Input document. 1076 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1077 1078 // The encoding type used by the API to calculate offsets. 1079 EncodingType encoding_type = 2; 1080} 1081 1082// The entity analysis response message. 1083message AnalyzeEntitiesResponse { 1084 // The recognized entities in the input document. 1085 repeated Entity entities = 1; 1086 1087 // The language of the text, which will be the same as the language specified 1088 // in the request or, if not specified, the automatically-detected language. 1089 // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details. 1090 string language = 2; 1091} 1092 1093// The syntax analysis request message. 1094message AnalyzeSyntaxRequest { 1095 // Required. Input document. 1096 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1097 1098 // The encoding type used by the API to calculate offsets. 1099 EncodingType encoding_type = 2; 1100} 1101 1102// The syntax analysis response message. 1103message AnalyzeSyntaxResponse { 1104 // Sentences in the input document. 1105 repeated Sentence sentences = 1; 1106 1107 // Tokens, along with their syntactic information, in the input document. 1108 repeated Token tokens = 2; 1109 1110 // The language of the text, which will be the same as the language specified 1111 // in the request or, if not specified, the automatically-detected language. 1112 // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details. 1113 string language = 3; 1114} 1115 1116// The document classification request message. 1117message ClassifyTextRequest { 1118 // Required. Input document. 1119 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1120 1121 // Model options to use for classification. Defaults to v1 options if not 1122 // specified. 1123 ClassificationModelOptions classification_model_options = 3; 1124} 1125 1126// The document classification response message. 1127message ClassifyTextResponse { 1128 // Categories representing the input document. 1129 repeated ClassificationCategory categories = 1; 1130} 1131 1132// The request message for the text annotation API, which can perform multiple 1133// analysis types (sentiment, entities, and syntax) in one call. 1134message AnnotateTextRequest { 1135 // All available features for sentiment, syntax, and semantic analysis. 1136 // Setting each one to true will enable that specific analysis for the input. 1137 // Next ID: 11 1138 message Features { 1139 // Extract syntax information. 1140 bool extract_syntax = 1; 1141 1142 // Extract entities. 1143 bool extract_entities = 2; 1144 1145 // Extract document-level sentiment. 1146 bool extract_document_sentiment = 3; 1147 1148 // Extract entities and their associated sentiment. 1149 bool extract_entity_sentiment = 4; 1150 1151 // Classify the full document into categories. If this is true, 1152 // the API will use the default model which classifies into a 1153 // [predefined 1154 // taxonomy](https://cloud.google.com/natural-language/docs/categories). 1155 bool classify_text = 6; 1156 1157 // The model options to use for classification. Defaults to v1 options 1158 // if not specified. Only used if `classify_text` is set to true. 1159 ClassificationModelOptions classification_model_options = 10; 1160 } 1161 1162 // Required. Input document. 1163 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1164 1165 // Required. The enabled features. 1166 Features features = 2 [(google.api.field_behavior) = REQUIRED]; 1167 1168 // The encoding type used by the API to calculate offsets. 1169 EncodingType encoding_type = 3; 1170} 1171 1172// The text annotations response message. 1173message AnnotateTextResponse { 1174 // Sentences in the input document. Populated if the user enables 1175 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax]. 1176 repeated Sentence sentences = 1; 1177 1178 // Tokens, along with their syntactic information, in the input document. 1179 // Populated if the user enables 1180 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax]. 1181 repeated Token tokens = 2; 1182 1183 // Entities, along with their semantic information, in the input document. 1184 // Populated if the user enables 1185 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entities]. 1186 repeated Entity entities = 3; 1187 1188 // The overall sentiment for the document. Populated if the user enables 1189 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment]. 1190 Sentiment document_sentiment = 4; 1191 1192 // The language of the text, which will be the same as the language specified 1193 // in the request or, if not specified, the automatically-detected language. 1194 // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details. 1195 string language = 5; 1196 1197 // Categories identified in the input document. 1198 repeated ClassificationCategory categories = 6; 1199} 1200