1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.language.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22 23option go_package = "cloud.google.com/go/language/apiv1/languagepb;languagepb"; 24option java_multiple_files = true; 25option java_outer_classname = "LanguageServiceProto"; 26option java_package = "com.google.cloud.language.v1"; 27 28// Provides text analysis operations such as sentiment analysis and entity 29// recognition. 30service LanguageService { 31 option (google.api.default_host) = "language.googleapis.com"; 32 option (google.api.oauth_scopes) = 33 "https://www.googleapis.com/auth/cloud-language," 34 "https://www.googleapis.com/auth/cloud-platform"; 35 36 // Analyzes the sentiment of the provided text. 37 rpc AnalyzeSentiment(AnalyzeSentimentRequest) 38 returns (AnalyzeSentimentResponse) { 39 option (google.api.http) = { 40 post: "/v1/documents:analyzeSentiment" 41 body: "*" 42 }; 43 option (google.api.method_signature) = "document,encoding_type"; 44 option (google.api.method_signature) = "document"; 45 } 46 47 // Finds named entities (currently proper names and common nouns) in the text 48 // along with entity types, salience, mentions for each entity, and 49 // other properties. 50 rpc AnalyzeEntities(AnalyzeEntitiesRequest) 51 returns (AnalyzeEntitiesResponse) { 52 option (google.api.http) = { 53 post: "/v1/documents:analyzeEntities" 54 body: "*" 55 }; 56 option (google.api.method_signature) = "document,encoding_type"; 57 option (google.api.method_signature) = "document"; 58 } 59 60 // Finds entities, similar to 61 // [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities] 62 // in the text and analyzes sentiment associated with each entity and its 63 // mentions. 64 rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) 65 returns (AnalyzeEntitySentimentResponse) { 66 option (google.api.http) = { 67 post: "/v1/documents:analyzeEntitySentiment" 68 body: "*" 69 }; 70 option (google.api.method_signature) = "document,encoding_type"; 71 option (google.api.method_signature) = "document"; 72 } 73 74 // Analyzes the syntax of the text and provides sentence boundaries and 75 // tokenization along with part of speech tags, dependency trees, and other 76 // properties. 77 rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { 78 option (google.api.http) = { 79 post: "/v1/documents:analyzeSyntax" 80 body: "*" 81 }; 82 option (google.api.method_signature) = "document,encoding_type"; 83 option (google.api.method_signature) = "document"; 84 } 85 86 // Classifies a document into categories. 87 rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { 88 option (google.api.http) = { 89 post: "/v1/documents:classifyText" 90 body: "*" 91 }; 92 option (google.api.method_signature) = "document"; 93 } 94 95 // A convenience method that provides all the features that analyzeSentiment, 96 // analyzeEntities, and analyzeSyntax provide in one call. 97 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { 98 option (google.api.http) = { 99 post: "/v1/documents:annotateText" 100 body: "*" 101 }; 102 option (google.api.method_signature) = "document,features,encoding_type"; 103 option (google.api.method_signature) = "document,features"; 104 } 105} 106 107// Represents the input to API methods. 108message Document { 109 // The document types enum. 110 enum Type { 111 // The content type is not specified. 112 TYPE_UNSPECIFIED = 0; 113 114 // Plain text 115 PLAIN_TEXT = 1; 116 117 // HTML 118 HTML = 2; 119 } 120 121 // Required. If the type is not set or is `TYPE_UNSPECIFIED`, 122 // returns an `INVALID_ARGUMENT` error. 123 Type type = 1; 124 125 // The source of the document: a string containing the content or a 126 // Google Cloud Storage URI. 127 oneof source { 128 // The content of the input in string format. 129 // Cloud audit logging exempt since it is based on user data. 130 string content = 2; 131 132 // The Google Cloud Storage URI where the file content is located. 133 // This URI must be of the form: gs://bucket_name/object_name. For more 134 // details, see https://cloud.google.com/storage/docs/reference-uris. 135 // NOTE: Cloud Storage object versioning is not supported. 136 string gcs_content_uri = 3; 137 } 138 139 // The language of the document (if not specified, the language is 140 // automatically detected). Both ISO and BCP-47 language codes are 141 // accepted.<br> 142 // [Language 143 // Support](https://cloud.google.com/natural-language/docs/languages) lists 144 // currently supported languages for each API method. If the language (either 145 // specified by the caller or automatically detected) is not supported by the 146 // called API method, an `INVALID_ARGUMENT` error is returned. 147 string language = 4; 148} 149 150// Represents a sentence in the input document. 151message Sentence { 152 // The sentence text. 153 TextSpan text = 1; 154 155 // For calls to [AnalyzeSentiment][] or if 156 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment] 157 // is set to true, this field will contain the sentiment for the sentence. 158 Sentiment sentiment = 2; 159} 160 161// Represents the text encoding that the caller uses to process the output. 162// Providing an `EncodingType` is recommended because the API provides the 163// beginning offsets for various outputs, such as tokens and mentions, and 164// languages that natively use different text encodings may access offsets 165// differently. 166enum EncodingType { 167 // If `EncodingType` is not specified, encoding-dependent information (such as 168 // `begin_offset`) will be set at `-1`. 169 NONE = 0; 170 171 // Encoding-dependent information (such as `begin_offset`) is calculated based 172 // on the UTF-8 encoding of the input. C++ and Go are examples of languages 173 // that use this encoding natively. 174 UTF8 = 1; 175 176 // Encoding-dependent information (such as `begin_offset`) is calculated based 177 // on the UTF-16 encoding of the input. Java and JavaScript are examples of 178 // languages that use this encoding natively. 179 UTF16 = 2; 180 181 // Encoding-dependent information (such as `begin_offset`) is calculated based 182 // on the UTF-32 encoding of the input. Python is an example of a language 183 // that uses this encoding natively. 184 UTF32 = 3; 185} 186 187// Represents a phrase in the text that is a known entity, such as 188// a person, an organization, or location. The API associates information, such 189// as salience and mentions, with entities. 190message Entity { 191 // The type of the entity. For most entity types, the associated metadata is a 192 // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table 193 // below lists the associated fields for entities that have different 194 // metadata. 195 enum Type { 196 // Unknown 197 UNKNOWN = 0; 198 199 // Person 200 PERSON = 1; 201 202 // Location 203 LOCATION = 2; 204 205 // Organization 206 ORGANIZATION = 3; 207 208 // Event 209 EVENT = 4; 210 211 // Artwork 212 WORK_OF_ART = 5; 213 214 // Consumer product 215 CONSUMER_GOOD = 6; 216 217 // Other types of entities 218 OTHER = 7; 219 220 // Phone number 221 // 222 // The metadata lists the phone number, formatted according to local 223 // convention, plus whichever additional elements appear in the text: 224 // 225 // * `number` - the actual number, broken down into sections as per local 226 // convention 227 // * `national_prefix` - country code, if detected 228 // * `area_code` - region or area code, if detected 229 // * `extension` - phone extension (to be dialed after connection), if 230 // detected 231 PHONE_NUMBER = 9; 232 233 // Address 234 // 235 // The metadata identifies the street number and locality plus whichever 236 // additional elements appear in the text: 237 // 238 // * `street_number` - street number 239 // * `locality` - city or town 240 // * `street_name` - street/route name, if detected 241 // * `postal_code` - postal code, if detected 242 // * `country` - country, if detected< 243 // * `broad_region` - administrative area, such as the state, if detected 244 // * `narrow_region` - smaller administrative area, such as county, if 245 // detected 246 // * `sublocality` - used in Asian addresses to demark a district within a 247 // city, if detected 248 ADDRESS = 10; 249 250 // Date 251 // 252 // The metadata identifies the components of the date: 253 // 254 // * `year` - four digit year, if detected 255 // * `month` - two digit month number, if detected 256 // * `day` - two digit day number, if detected 257 DATE = 11; 258 259 // Number 260 // 261 // The metadata is the number itself. 262 NUMBER = 12; 263 264 // Price 265 // 266 // The metadata identifies the `value` and `currency`. 267 PRICE = 13; 268 } 269 270 // The representative name for the entity. 271 string name = 1; 272 273 // The entity type. 274 Type type = 2; 275 276 // Metadata associated with the entity. 277 // 278 // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) 279 // and Knowledge Graph MID (`mid`), if they are available. For the metadata 280 // associated with other entity types, see the Type table below. 281 map<string, string> metadata = 3; 282 283 // The salience score associated with the entity in the [0, 1.0] range. 284 // 285 // The salience score for an entity provides information about the 286 // importance or centrality of that entity to the entire document text. 287 // Scores closer to 0 are less salient, while scores closer to 1.0 are highly 288 // salient. 289 float salience = 4; 290 291 // The mentions of this entity in the input document. The API currently 292 // supports proper noun mentions. 293 repeated EntityMention mentions = 5; 294 295 // For calls to [AnalyzeEntitySentiment][] or if 296 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] 297 // is set to true, this field will contain the aggregate sentiment expressed 298 // for this entity in the provided document. 299 Sentiment sentiment = 6; 300} 301 302// Represents the smallest syntactic building block of the text. 303message Token { 304 // The token text. 305 TextSpan text = 1; 306 307 // Parts of speech tag for this token. 308 PartOfSpeech part_of_speech = 2; 309 310 // Dependency tree parse for this token. 311 DependencyEdge dependency_edge = 3; 312 313 // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. 314 string lemma = 4; 315} 316 317// Represents the feeling associated with the entire text or entities in 318// the text. 319message Sentiment { 320 // A non-negative number in the [0, +inf) range, which represents 321 // the absolute magnitude of sentiment regardless of score (positive or 322 // negative). 323 float magnitude = 2; 324 325 // Sentiment score between -1.0 (negative sentiment) and 1.0 326 // (positive sentiment). 327 float score = 3; 328} 329 330// Represents part of speech information for a token. Parts of speech 331// are as defined in 332// http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf 333message PartOfSpeech { 334 // The part of speech tags enum. 335 enum Tag { 336 // Unknown 337 UNKNOWN = 0; 338 339 // Adjective 340 ADJ = 1; 341 342 // Adposition (preposition and postposition) 343 ADP = 2; 344 345 // Adverb 346 ADV = 3; 347 348 // Conjunction 349 CONJ = 4; 350 351 // Determiner 352 DET = 5; 353 354 // Noun (common and proper) 355 NOUN = 6; 356 357 // Cardinal number 358 NUM = 7; 359 360 // Pronoun 361 PRON = 8; 362 363 // Particle or other function word 364 PRT = 9; 365 366 // Punctuation 367 PUNCT = 10; 368 369 // Verb (all tenses and modes) 370 VERB = 11; 371 372 // Other: foreign words, typos, abbreviations 373 X = 12; 374 375 // Affix 376 AFFIX = 13; 377 } 378 379 // The characteristic of a verb that expresses time flow during an event. 380 enum Aspect { 381 // Aspect is not applicable in the analyzed language or is not predicted. 382 ASPECT_UNKNOWN = 0; 383 384 // Perfective 385 PERFECTIVE = 1; 386 387 // Imperfective 388 IMPERFECTIVE = 2; 389 390 // Progressive 391 PROGRESSIVE = 3; 392 } 393 394 // The grammatical function performed by a noun or pronoun in a phrase, 395 // clause, or sentence. In some languages, other parts of speech, such as 396 // adjective and determiner, take case inflection in agreement with the noun. 397 enum Case { 398 // Case is not applicable in the analyzed language or is not predicted. 399 CASE_UNKNOWN = 0; 400 401 // Accusative 402 ACCUSATIVE = 1; 403 404 // Adverbial 405 ADVERBIAL = 2; 406 407 // Complementive 408 COMPLEMENTIVE = 3; 409 410 // Dative 411 DATIVE = 4; 412 413 // Genitive 414 GENITIVE = 5; 415 416 // Instrumental 417 INSTRUMENTAL = 6; 418 419 // Locative 420 LOCATIVE = 7; 421 422 // Nominative 423 NOMINATIVE = 8; 424 425 // Oblique 426 OBLIQUE = 9; 427 428 // Partitive 429 PARTITIVE = 10; 430 431 // Prepositional 432 PREPOSITIONAL = 11; 433 434 // Reflexive 435 REFLEXIVE_CASE = 12; 436 437 // Relative 438 RELATIVE_CASE = 13; 439 440 // Vocative 441 VOCATIVE = 14; 442 } 443 444 // Depending on the language, Form can be categorizing different forms of 445 // verbs, adjectives, adverbs, etc. For example, categorizing inflected 446 // endings of verbs and adjectives or distinguishing between short and long 447 // forms of adjectives and participles 448 enum Form { 449 // Form is not applicable in the analyzed language or is not predicted. 450 FORM_UNKNOWN = 0; 451 452 // Adnomial 453 ADNOMIAL = 1; 454 455 // Auxiliary 456 AUXILIARY = 2; 457 458 // Complementizer 459 COMPLEMENTIZER = 3; 460 461 // Final ending 462 FINAL_ENDING = 4; 463 464 // Gerund 465 GERUND = 5; 466 467 // Realis 468 REALIS = 6; 469 470 // Irrealis 471 IRREALIS = 7; 472 473 // Short form 474 SHORT = 8; 475 476 // Long form 477 LONG = 9; 478 479 // Order form 480 ORDER = 10; 481 482 // Specific form 483 SPECIFIC = 11; 484 } 485 486 // Gender classes of nouns reflected in the behaviour of associated words. 487 enum Gender { 488 // Gender is not applicable in the analyzed language or is not predicted. 489 GENDER_UNKNOWN = 0; 490 491 // Feminine 492 FEMININE = 1; 493 494 // Masculine 495 MASCULINE = 2; 496 497 // Neuter 498 NEUTER = 3; 499 } 500 501 // The grammatical feature of verbs, used for showing modality and attitude. 502 enum Mood { 503 // Mood is not applicable in the analyzed language or is not predicted. 504 MOOD_UNKNOWN = 0; 505 506 // Conditional 507 CONDITIONAL_MOOD = 1; 508 509 // Imperative 510 IMPERATIVE = 2; 511 512 // Indicative 513 INDICATIVE = 3; 514 515 // Interrogative 516 INTERROGATIVE = 4; 517 518 // Jussive 519 JUSSIVE = 5; 520 521 // Subjunctive 522 SUBJUNCTIVE = 6; 523 } 524 525 // Count distinctions. 526 enum Number { 527 // Number is not applicable in the analyzed language or is not predicted. 528 NUMBER_UNKNOWN = 0; 529 530 // Singular 531 SINGULAR = 1; 532 533 // Plural 534 PLURAL = 2; 535 536 // Dual 537 DUAL = 3; 538 } 539 540 // The distinction between the speaker, second person, third person, etc. 541 enum Person { 542 // Person is not applicable in the analyzed language or is not predicted. 543 PERSON_UNKNOWN = 0; 544 545 // First 546 FIRST = 1; 547 548 // Second 549 SECOND = 2; 550 551 // Third 552 THIRD = 3; 553 554 // Reflexive 555 REFLEXIVE_PERSON = 4; 556 } 557 558 // This category shows if the token is part of a proper name. 559 enum Proper { 560 // Proper is not applicable in the analyzed language or is not predicted. 561 PROPER_UNKNOWN = 0; 562 563 // Proper 564 PROPER = 1; 565 566 // Not proper 567 NOT_PROPER = 2; 568 } 569 570 // Reciprocal features of a pronoun. 571 enum Reciprocity { 572 // Reciprocity is not applicable in the analyzed language or is not 573 // predicted. 574 RECIPROCITY_UNKNOWN = 0; 575 576 // Reciprocal 577 RECIPROCAL = 1; 578 579 // Non-reciprocal 580 NON_RECIPROCAL = 2; 581 } 582 583 // Time reference. 584 enum Tense { 585 // Tense is not applicable in the analyzed language or is not predicted. 586 TENSE_UNKNOWN = 0; 587 588 // Conditional 589 CONDITIONAL_TENSE = 1; 590 591 // Future 592 FUTURE = 2; 593 594 // Past 595 PAST = 3; 596 597 // Present 598 PRESENT = 4; 599 600 // Imperfect 601 IMPERFECT = 5; 602 603 // Pluperfect 604 PLUPERFECT = 6; 605 } 606 607 // The relationship between the action that a verb expresses and the 608 // participants identified by its arguments. 609 enum Voice { 610 // Voice is not applicable in the analyzed language or is not predicted. 611 VOICE_UNKNOWN = 0; 612 613 // Active 614 ACTIVE = 1; 615 616 // Causative 617 CAUSATIVE = 2; 618 619 // Passive 620 PASSIVE = 3; 621 } 622 623 // The part of speech tag. 624 Tag tag = 1; 625 626 // The grammatical aspect. 627 Aspect aspect = 2; 628 629 // The grammatical case. 630 Case case = 3; 631 632 // The grammatical form. 633 Form form = 4; 634 635 // The grammatical gender. 636 Gender gender = 5; 637 638 // The grammatical mood. 639 Mood mood = 6; 640 641 // The grammatical number. 642 Number number = 7; 643 644 // The grammatical person. 645 Person person = 8; 646 647 // The grammatical properness. 648 Proper proper = 9; 649 650 // The grammatical reciprocity. 651 Reciprocity reciprocity = 10; 652 653 // The grammatical tense. 654 Tense tense = 11; 655 656 // The grammatical voice. 657 Voice voice = 12; 658} 659 660// Represents dependency parse tree information for a token. (For more 661// information on dependency labels, see 662// http://www.aclweb.org/anthology/P13-2017 663message DependencyEdge { 664 // The parse label enum for the token. 665 enum Label { 666 // Unknown 667 UNKNOWN = 0; 668 669 // Abbreviation modifier 670 ABBREV = 1; 671 672 // Adjectival complement 673 ACOMP = 2; 674 675 // Adverbial clause modifier 676 ADVCL = 3; 677 678 // Adverbial modifier 679 ADVMOD = 4; 680 681 // Adjectival modifier of an NP 682 AMOD = 5; 683 684 // Appositional modifier of an NP 685 APPOS = 6; 686 687 // Attribute dependent of a copular verb 688 ATTR = 7; 689 690 // Auxiliary (non-main) verb 691 AUX = 8; 692 693 // Passive auxiliary 694 AUXPASS = 9; 695 696 // Coordinating conjunction 697 CC = 10; 698 699 // Clausal complement of a verb or adjective 700 CCOMP = 11; 701 702 // Conjunct 703 CONJ = 12; 704 705 // Clausal subject 706 CSUBJ = 13; 707 708 // Clausal passive subject 709 CSUBJPASS = 14; 710 711 // Dependency (unable to determine) 712 DEP = 15; 713 714 // Determiner 715 DET = 16; 716 717 // Discourse 718 DISCOURSE = 17; 719 720 // Direct object 721 DOBJ = 18; 722 723 // Expletive 724 EXPL = 19; 725 726 // Goes with (part of a word in a text not well edited) 727 GOESWITH = 20; 728 729 // Indirect object 730 IOBJ = 21; 731 732 // Marker (word introducing a subordinate clause) 733 MARK = 22; 734 735 // Multi-word expression 736 MWE = 23; 737 738 // Multi-word verbal expression 739 MWV = 24; 740 741 // Negation modifier 742 NEG = 25; 743 744 // Noun compound modifier 745 NN = 26; 746 747 // Noun phrase used as an adverbial modifier 748 NPADVMOD = 27; 749 750 // Nominal subject 751 NSUBJ = 28; 752 753 // Passive nominal subject 754 NSUBJPASS = 29; 755 756 // Numeric modifier of a noun 757 NUM = 30; 758 759 // Element of compound number 760 NUMBER = 31; 761 762 // Punctuation mark 763 P = 32; 764 765 // Parataxis relation 766 PARATAXIS = 33; 767 768 // Participial modifier 769 PARTMOD = 34; 770 771 // The complement of a preposition is a clause 772 PCOMP = 35; 773 774 // Object of a preposition 775 POBJ = 36; 776 777 // Possession modifier 778 POSS = 37; 779 780 // Postverbal negative particle 781 POSTNEG = 38; 782 783 // Predicate complement 784 PRECOMP = 39; 785 786 // Preconjunt 787 PRECONJ = 40; 788 789 // Predeterminer 790 PREDET = 41; 791 792 // Prefix 793 PREF = 42; 794 795 // Prepositional modifier 796 PREP = 43; 797 798 // The relationship between a verb and verbal morpheme 799 PRONL = 44; 800 801 // Particle 802 PRT = 45; 803 804 // Associative or possessive marker 805 PS = 46; 806 807 // Quantifier phrase modifier 808 QUANTMOD = 47; 809 810 // Relative clause modifier 811 RCMOD = 48; 812 813 // Complementizer in relative clause 814 RCMODREL = 49; 815 816 // Ellipsis without a preceding predicate 817 RDROP = 50; 818 819 // Referent 820 REF = 51; 821 822 // Remnant 823 REMNANT = 52; 824 825 // Reparandum 826 REPARANDUM = 53; 827 828 // Root 829 ROOT = 54; 830 831 // Suffix specifying a unit of number 832 SNUM = 55; 833 834 // Suffix 835 SUFF = 56; 836 837 // Temporal modifier 838 TMOD = 57; 839 840 // Topic marker 841 TOPIC = 58; 842 843 // Clause headed by an infinite form of the verb that modifies a noun 844 VMOD = 59; 845 846 // Vocative 847 VOCATIVE = 60; 848 849 // Open clausal complement 850 XCOMP = 61; 851 852 // Name suffix 853 SUFFIX = 62; 854 855 // Name title 856 TITLE = 63; 857 858 // Adverbial phrase modifier 859 ADVPHMOD = 64; 860 861 // Causative auxiliary 862 AUXCAUS = 65; 863 864 // Helper auxiliary 865 AUXVV = 66; 866 867 // Rentaishi (Prenominal modifier) 868 DTMOD = 67; 869 870 // Foreign words 871 FOREIGN = 68; 872 873 // Keyword 874 KW = 69; 875 876 // List for chains of comparable items 877 LIST = 70; 878 879 // Nominalized clause 880 NOMC = 71; 881 882 // Nominalized clausal subject 883 NOMCSUBJ = 72; 884 885 // Nominalized clausal passive 886 NOMCSUBJPASS = 73; 887 888 // Compound of numeric modifier 889 NUMC = 74; 890 891 // Copula 892 COP = 75; 893 894 // Dislocated relation (for fronted/topicalized elements) 895 DISLOCATED = 76; 896 897 // Aspect marker 898 ASP = 77; 899 900 // Genitive modifier 901 GMOD = 78; 902 903 // Genitive object 904 GOBJ = 79; 905 906 // Infinitival modifier 907 INFMOD = 80; 908 909 // Measure 910 MES = 81; 911 912 // Nominal complement of a noun 913 NCOMP = 82; 914 } 915 916 // Represents the head of this token in the dependency tree. 917 // This is the index of the token which has an arc going to this token. 918 // The index is the position of the token in the array of tokens returned 919 // by the API method. If this token is a root token, then the 920 // `head_token_index` is its own index. 921 int32 head_token_index = 1; 922 923 // The parse label for the token. 924 Label label = 2; 925} 926 927// Represents a mention for an entity in the text. Currently, proper noun 928// mentions are supported. 929message EntityMention { 930 // The supported types of mentions. 931 enum Type { 932 // Unknown 933 TYPE_UNKNOWN = 0; 934 935 // Proper name 936 PROPER = 1; 937 938 // Common noun (or noun compound) 939 COMMON = 2; 940 } 941 942 // The mention text. 943 TextSpan text = 1; 944 945 // The type of the entity mention. 946 Type type = 2; 947 948 // For calls to [AnalyzeEntitySentiment][] or if 949 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] 950 // is set to true, this field will contain the sentiment expressed for this 951 // mention of the entity in the provided document. 952 Sentiment sentiment = 3; 953} 954 955// Represents an output piece of text. 956message TextSpan { 957 // The content of the output text. 958 string content = 1; 959 960 // The API calculates the beginning offset of the content in the original 961 // document according to the 962 // [EncodingType][google.cloud.language.v1.EncodingType] specified in the API 963 // request. 964 int32 begin_offset = 2; 965} 966 967// Represents a category returned from the text classifier. 968message ClassificationCategory { 969 // The name of the category representing the document, from the [predefined 970 // taxonomy](https://cloud.google.com/natural-language/docs/categories). 971 string name = 1; 972 973 // The classifier's confidence of the category. Number represents how certain 974 // the classifier is that this category represents the given text. 975 float confidence = 2; 976} 977 978// Model options available for classification requests. 979message ClassificationModelOptions { 980 // Options for the V1 model. 981 message V1Model {} 982 983 // Options for the V2 model. 984 message V2Model { 985 // The content categories used for classification. 986 enum ContentCategoriesVersion { 987 // If `ContentCategoriesVersion` is not specified, this option will 988 // default to `V1`. 989 CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0; 990 991 // Legacy content categories of our initial launch in 2017. 992 V1 = 1; 993 994 // Updated content categories in 2022. 995 V2 = 2; 996 } 997 998 // The content categories used for classification. 999 ContentCategoriesVersion content_categories_version = 1; 1000 } 1001 1002 // If this field is not set, then the `v1_model` will be used by default. 1003 oneof model_type { 1004 // Setting this field will use the V1 model and V1 content categories 1005 // version. The V1 model is a legacy model; support for this will be 1006 // discontinued in the future. 1007 V1Model v1_model = 1; 1008 1009 // Setting this field will use the V2 model with the appropriate content 1010 // categories version. The V2 model is a better performing model. 1011 V2Model v2_model = 2; 1012 } 1013} 1014 1015// The sentiment analysis request message. 1016message AnalyzeSentimentRequest { 1017 // Required. Input document. 1018 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1019 1020 // The encoding type used by the API to calculate sentence offsets. 1021 EncodingType encoding_type = 2; 1022} 1023 1024// The sentiment analysis response message. 1025message AnalyzeSentimentResponse { 1026 // The overall sentiment of the input document. 1027 Sentiment document_sentiment = 1; 1028 1029 // The language of the text, which will be the same as the language specified 1030 // in the request or, if not specified, the automatically-detected language. 1031 // See [Document.language][google.cloud.language.v1.Document.language] field 1032 // for more details. 1033 string language = 2; 1034 1035 // The sentiment for all the sentences in the document. 1036 repeated Sentence sentences = 3; 1037} 1038 1039// The entity-level sentiment analysis request message. 1040message AnalyzeEntitySentimentRequest { 1041 // Required. Input document. 1042 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1043 1044 // The encoding type used by the API to calculate offsets. 1045 EncodingType encoding_type = 2; 1046} 1047 1048// The entity-level sentiment analysis response message. 1049message AnalyzeEntitySentimentResponse { 1050 // The recognized entities in the input document with associated sentiments. 1051 repeated Entity entities = 1; 1052 1053 // The language of the text, which will be the same as the language specified 1054 // in the request or, if not specified, the automatically-detected language. 1055 // See [Document.language][google.cloud.language.v1.Document.language] field 1056 // for more details. 1057 string language = 2; 1058} 1059 1060// The entity analysis request message. 1061message AnalyzeEntitiesRequest { 1062 // Required. Input document. 1063 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1064 1065 // The encoding type used by the API to calculate offsets. 1066 EncodingType encoding_type = 2; 1067} 1068 1069// The entity analysis response message. 1070message AnalyzeEntitiesResponse { 1071 // The recognized entities in the input document. 1072 repeated Entity entities = 1; 1073 1074 // The language of the text, which will be the same as the language specified 1075 // in the request or, if not specified, the automatically-detected language. 1076 // See [Document.language][google.cloud.language.v1.Document.language] field 1077 // for more details. 1078 string language = 2; 1079} 1080 1081// The syntax analysis request message. 1082message AnalyzeSyntaxRequest { 1083 // Required. Input document. 1084 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1085 1086 // The encoding type used by the API to calculate offsets. 1087 EncodingType encoding_type = 2; 1088} 1089 1090// The syntax analysis response message. 1091message AnalyzeSyntaxResponse { 1092 // Sentences in the input document. 1093 repeated Sentence sentences = 1; 1094 1095 // Tokens, along with their syntactic information, in the input document. 1096 repeated Token tokens = 2; 1097 1098 // The language of the text, which will be the same as the language specified 1099 // in the request or, if not specified, the automatically-detected language. 1100 // See [Document.language][google.cloud.language.v1.Document.language] field 1101 // for more details. 1102 string language = 3; 1103} 1104 1105// The document classification request message. 1106message ClassifyTextRequest { 1107 // Required. Input document. 1108 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1109 1110 // Model options to use for classification. Defaults to v1 options if not 1111 // specified. 1112 ClassificationModelOptions classification_model_options = 3; 1113} 1114 1115// The document classification response message. 1116message ClassifyTextResponse { 1117 // Categories representing the input document. 1118 repeated ClassificationCategory categories = 1; 1119} 1120 1121// The request message for the text annotation API, which can perform multiple 1122// analysis types (sentiment, entities, and syntax) in one call. 1123message AnnotateTextRequest { 1124 // All available features for sentiment, syntax, and semantic analysis. 1125 // Setting each one to true will enable that specific analysis for the input. 1126 message Features { 1127 // Extract syntax information. 1128 bool extract_syntax = 1; 1129 1130 // Extract entities. 1131 bool extract_entities = 2; 1132 1133 // Extract document-level sentiment. 1134 bool extract_document_sentiment = 3; 1135 1136 // Extract entities and their associated sentiment. 1137 bool extract_entity_sentiment = 4; 1138 1139 // Classify the full document into categories. 1140 bool classify_text = 6; 1141 1142 // The model options to use for classification. Defaults to v1 options 1143 // if not specified. Only used if `classify_text` is set to true. 1144 ClassificationModelOptions classification_model_options = 10; 1145 } 1146 1147 // Required. Input document. 1148 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1149 1150 // Required. The enabled features. 1151 Features features = 2 [(google.api.field_behavior) = REQUIRED]; 1152 1153 // The encoding type used by the API to calculate offsets. 1154 EncodingType encoding_type = 3; 1155} 1156 1157// The text annotations response message. 1158message AnnotateTextResponse { 1159 // Sentences in the input document. Populated if the user enables 1160 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax]. 1161 repeated Sentence sentences = 1; 1162 1163 // Tokens, along with their syntactic information, in the input document. 1164 // Populated if the user enables 1165 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax]. 1166 repeated Token tokens = 2; 1167 1168 // Entities, along with their semantic information, in the input document. 1169 // Populated if the user enables 1170 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities]. 1171 repeated Entity entities = 3; 1172 1173 // The overall sentiment for the document. Populated if the user enables 1174 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment]. 1175 Sentiment document_sentiment = 4; 1176 1177 // The language of the text, which will be the same as the language specified 1178 // in the request or, if not specified, the automatically-detected language. 1179 // See [Document.language][google.cloud.language.v1.Document.language] field 1180 // for more details. 1181 string language = 5; 1182 1183 // Categories identified in the input document. 1184 repeated ClassificationCategory categories = 6; 1185} 1186