1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.language.v1beta2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22
23option go_package = "cloud.google.com/go/language/apiv1beta2/languagepb;languagepb";
24option java_multiple_files = true;
25option java_outer_classname = "LanguageServiceProto";
26option java_package = "com.google.cloud.language.v1beta2";
27
28// Provides text analysis operations such as sentiment analysis and entity
29// recognition.
30service LanguageService {
31  option (google.api.default_host) = "language.googleapis.com";
32  option (google.api.oauth_scopes) =
33      "https://www.googleapis.com/auth/cloud-language,"
34      "https://www.googleapis.com/auth/cloud-platform";
35
36  // Analyzes the sentiment of the provided text.
37  rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
38    option (google.api.http) = {
39      post: "/v1beta2/documents:analyzeSentiment"
40      body: "*"
41    };
42    option (google.api.method_signature) = "document,encoding_type";
43    option (google.api.method_signature) = "document";
44  }
45
46  // Finds named entities (currently proper names and common nouns) in the text
47  // along with entity types, salience, mentions for each entity, and
48  // other properties.
49  rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
50    option (google.api.http) = {
51      post: "/v1beta2/documents:analyzeEntities"
52      body: "*"
53    };
54    option (google.api.method_signature) = "document,encoding_type";
55    option (google.api.method_signature) = "document";
56  }
57
58  // Finds entities, similar to [AnalyzeEntities][google.cloud.language.v1beta2.LanguageService.AnalyzeEntities] in the text and analyzes
59  // sentiment associated with each entity and its mentions.
60  rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) returns (AnalyzeEntitySentimentResponse) {
61    option (google.api.http) = {
62      post: "/v1beta2/documents:analyzeEntitySentiment"
63      body: "*"
64    };
65    option (google.api.method_signature) = "document,encoding_type";
66    option (google.api.method_signature) = "document";
67  }
68
69  // Analyzes the syntax of the text and provides sentence boundaries and
70  // tokenization along with part of speech tags, dependency trees, and other
71  // properties.
72  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
73    option (google.api.http) = {
74      post: "/v1beta2/documents:analyzeSyntax"
75      body: "*"
76    };
77    option (google.api.method_signature) = "document,encoding_type";
78    option (google.api.method_signature) = "document";
79  }
80
81  // Classifies a document into categories.
82  rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
83    option (google.api.http) = {
84      post: "/v1beta2/documents:classifyText"
85      body: "*"
86    };
87    option (google.api.method_signature) = "document";
88  }
89
90  // A convenience method that provides all syntax, sentiment, entity, and
91  // classification features in one call.
92  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
93    option (google.api.http) = {
94      post: "/v1beta2/documents:annotateText"
95      body: "*"
96    };
97    option (google.api.method_signature) = "document,features,encoding_type";
98    option (google.api.method_signature) = "document,features";
99  }
100}
101
102// Represents the input to API methods.
103message Document {
104  // The document types enum.
105  enum Type {
106    // The content type is not specified.
107    TYPE_UNSPECIFIED = 0;
108
109    // Plain text
110    PLAIN_TEXT = 1;
111
112    // HTML
113    HTML = 2;
114  }
115
116  // Ways of handling boilerplate detected in the document
117  enum BoilerplateHandling {
118    // The boilerplate handling is not specified.
119    BOILERPLATE_HANDLING_UNSPECIFIED = 0;
120
121    // Do not analyze detected boilerplate. Reference web URI is required for
122    // detecting boilerplate.
123    SKIP_BOILERPLATE = 1;
124
125    // Treat boilerplate the same as content.
126    KEEP_BOILERPLATE = 2;
127  }
128
129  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
130  // returns an `INVALID_ARGUMENT` error.
131  Type type = 1;
132
133  // The source of the document: a string containing the content or a
134  // Google Cloud Storage URI.
135  oneof source {
136    // The content of the input in string format.
137    // Cloud audit logging exempt since it is based on user data.
138    string content = 2;
139
140    // The Google Cloud Storage URI where the file content is located.
141    // This URI must be of the form: gs://bucket_name/object_name. For more
142    // details, see https://cloud.google.com/storage/docs/reference-uris.
143    // NOTE: Cloud Storage object versioning is not supported.
144    string gcs_content_uri = 3;
145  }
146
147  // The language of the document (if not specified, the language is
148  // automatically detected). Both ISO and BCP-47 language codes are
149  // accepted.<br>
150  // [Language
151  // Support](https://cloud.google.com/natural-language/docs/languages) lists
152  // currently supported languages for each API method. If the language (either
153  // specified by the caller or automatically detected) is not supported by the
154  // called API method, an `INVALID_ARGUMENT` error is returned.
155  string language = 4;
156
157  // The web URI where the document comes from. This URI is not used for
158  // fetching the content, but as a hint for analyzing the document.
159  string reference_web_uri = 5;
160
161  // Indicates how detected boilerplate(e.g. advertisements, copyright
162  // declarations, banners) should be handled for this document. If not
163  // specified, boilerplate will be treated the same as content.
164  BoilerplateHandling boilerplate_handling = 6;
165}
166
167// Represents a sentence in the input document.
168message Sentence {
169  // The sentence text.
170  TextSpan text = 1;
171
172  // For calls to [AnalyzeSentiment][] or if
173  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment] is set to
174  // true, this field will contain the sentiment for the sentence.
175  Sentiment sentiment = 2;
176}
177
178// Represents the text encoding that the caller uses to process the output.
179// Providing an `EncodingType` is recommended because the API provides the
180// beginning offsets for various outputs, such as tokens and mentions, and
181// languages that natively use different text encodings may access offsets
182// differently.
183enum EncodingType {
184  // If `EncodingType` is not specified, encoding-dependent information (such as
185  // `begin_offset`) will be set at `-1`.
186  NONE = 0;
187
188  // Encoding-dependent information (such as `begin_offset`) is calculated based
189  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
190  // that use this encoding natively.
191  UTF8 = 1;
192
193  // Encoding-dependent information (such as `begin_offset`) is calculated based
194  // on the UTF-16 encoding of the input. Java and JavaScript are examples of
195  // languages that use this encoding natively.
196  UTF16 = 2;
197
198  // Encoding-dependent information (such as `begin_offset`) is calculated based
199  // on the UTF-32 encoding of the input. Python is an example of a language
200  // that uses this encoding natively.
201  UTF32 = 3;
202}
203
204// Represents a phrase in the text that is a known entity, such as
205// a person, an organization, or location. The API associates information, such
206// as salience and mentions, with entities.
207message Entity {
208  // The type of the entity. For most entity types, the associated metadata is a
209  // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
210  // below lists the associated fields for entities that have different
211  // metadata.
212  enum Type {
213    // Unknown
214    UNKNOWN = 0;
215
216    // Person
217    PERSON = 1;
218
219    // Location
220    LOCATION = 2;
221
222    // Organization
223    ORGANIZATION = 3;
224
225    // Event
226    EVENT = 4;
227
228    // Artwork
229    WORK_OF_ART = 5;
230
231    // Consumer product
232    CONSUMER_GOOD = 6;
233
234    // Other types of entities
235    OTHER = 7;
236
237    // Phone number
238    //
239    // The metadata lists the phone number, formatted according to local
240    // convention, plus whichever additional elements appear in the text:
241    //
242    // * `number` - the actual number, broken down into sections as per local
243    // convention
244    // * `national_prefix` - country code, if detected
245    // * `area_code` - region or area code, if detected
246    // * `extension` - phone extension (to be dialed after connection), if
247    // detected
248    PHONE_NUMBER = 9;
249
250    // Address
251    //
252    // The metadata identifies the street number and locality plus whichever
253    // additional elements appear in the text:
254    //
255    // * `street_number` - street number
256    // * `locality` - city or town
257    // * `street_name` - street/route name, if detected
258    // * `postal_code` - postal code, if detected
259    // * `country` - country, if detected<
260    // * `broad_region` - administrative area, such as the state, if detected
261    // * `narrow_region` - smaller administrative area, such as county, if
262    // detected
263    // * `sublocality` - used in Asian addresses to demark a district within a
264    // city, if detected
265    ADDRESS = 10;
266
267    // Date
268    //
269    // The metadata identifies the components of the date:
270    //
271    // * `year` - four digit year, if detected
272    // * `month` - two digit month number, if detected
273    // * `day` - two digit day number, if detected
274    DATE = 11;
275
276    // Number
277    //
278    // The metadata is the number itself.
279    NUMBER = 12;
280
281    // Price
282    //
283    // The metadata identifies the `value` and `currency`.
284    PRICE = 13;
285  }
286
287  // The representative name for the entity.
288  string name = 1;
289
290  // The entity type.
291  Type type = 2;
292
293  // Metadata associated with the entity.
294  //
295  // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
296  // and Knowledge Graph MID (`mid`), if they are available. For the metadata
297  // associated with other entity types, see the Type table below.
298  map<string, string> metadata = 3;
299
300  // The salience score associated with the entity in the [0, 1.0] range.
301  //
302  // The salience score for an entity provides information about the
303  // importance or centrality of that entity to the entire document text.
304  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
305  // salient.
306  float salience = 4;
307
308  // The mentions of this entity in the input document. The API currently
309  // supports proper noun mentions.
310  repeated EntityMention mentions = 5;
311
312  // For calls to [AnalyzeEntitySentiment][] or if
313  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
314  // true, this field will contain the aggregate sentiment expressed for this
315  // entity in the provided document.
316  Sentiment sentiment = 6;
317}
318
319// Represents the smallest syntactic building block of the text.
320message Token {
321  // The token text.
322  TextSpan text = 1;
323
324  // Parts of speech tag for this token.
325  PartOfSpeech part_of_speech = 2;
326
327  // Dependency tree parse for this token.
328  DependencyEdge dependency_edge = 3;
329
330  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
331  string lemma = 4;
332}
333
334// Represents the feeling associated with the entire text or entities in
335// the text.
336// Next ID: 6
337message Sentiment {
338  // A non-negative number in the [0, +inf) range, which represents
339  // the absolute magnitude of sentiment regardless of score (positive or
340  // negative).
341  float magnitude = 2;
342
343  // Sentiment score between -1.0 (negative sentiment) and 1.0
344  // (positive sentiment).
345  float score = 3;
346}
347
348// Represents part of speech information for a token.
349message PartOfSpeech {
350  // The part of speech tags enum.
351  enum Tag {
352    // Unknown
353    UNKNOWN = 0;
354
355    // Adjective
356    ADJ = 1;
357
358    // Adposition (preposition and postposition)
359    ADP = 2;
360
361    // Adverb
362    ADV = 3;
363
364    // Conjunction
365    CONJ = 4;
366
367    // Determiner
368    DET = 5;
369
370    // Noun (common and proper)
371    NOUN = 6;
372
373    // Cardinal number
374    NUM = 7;
375
376    // Pronoun
377    PRON = 8;
378
379    // Particle or other function word
380    PRT = 9;
381
382    // Punctuation
383    PUNCT = 10;
384
385    // Verb (all tenses and modes)
386    VERB = 11;
387
388    // Other: foreign words, typos, abbreviations
389    X = 12;
390
391    // Affix
392    AFFIX = 13;
393  }
394
395  // The characteristic of a verb that expresses time flow during an event.
396  enum Aspect {
397    // Aspect is not applicable in the analyzed language or is not predicted.
398    ASPECT_UNKNOWN = 0;
399
400    // Perfective
401    PERFECTIVE = 1;
402
403    // Imperfective
404    IMPERFECTIVE = 2;
405
406    // Progressive
407    PROGRESSIVE = 3;
408  }
409
410  // The grammatical function performed by a noun or pronoun in a phrase,
411  // clause, or sentence. In some languages, other parts of speech, such as
412  // adjective and determiner, take case inflection in agreement with the noun.
413  enum Case {
414    // Case is not applicable in the analyzed language or is not predicted.
415    CASE_UNKNOWN = 0;
416
417    // Accusative
418    ACCUSATIVE = 1;
419
420    // Adverbial
421    ADVERBIAL = 2;
422
423    // Complementive
424    COMPLEMENTIVE = 3;
425
426    // Dative
427    DATIVE = 4;
428
429    // Genitive
430    GENITIVE = 5;
431
432    // Instrumental
433    INSTRUMENTAL = 6;
434
435    // Locative
436    LOCATIVE = 7;
437
438    // Nominative
439    NOMINATIVE = 8;
440
441    // Oblique
442    OBLIQUE = 9;
443
444    // Partitive
445    PARTITIVE = 10;
446
447    // Prepositional
448    PREPOSITIONAL = 11;
449
450    // Reflexive
451    REFLEXIVE_CASE = 12;
452
453    // Relative
454    RELATIVE_CASE = 13;
455
456    // Vocative
457    VOCATIVE = 14;
458  }
459
460  // Depending on the language, Form can be categorizing different forms of
461  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
462  // endings of verbs and adjectives or distinguishing between short and long
463  // forms of adjectives and participles
464  enum Form {
465    // Form is not applicable in the analyzed language or is not predicted.
466    FORM_UNKNOWN = 0;
467
468    // Adnomial
469    ADNOMIAL = 1;
470
471    // Auxiliary
472    AUXILIARY = 2;
473
474    // Complementizer
475    COMPLEMENTIZER = 3;
476
477    // Final ending
478    FINAL_ENDING = 4;
479
480    // Gerund
481    GERUND = 5;
482
483    // Realis
484    REALIS = 6;
485
486    // Irrealis
487    IRREALIS = 7;
488
489    // Short form
490    SHORT = 8;
491
492    // Long form
493    LONG = 9;
494
495    // Order form
496    ORDER = 10;
497
498    // Specific form
499    SPECIFIC = 11;
500  }
501
502  // Gender classes of nouns reflected in the behaviour of associated words.
503  enum Gender {
504    // Gender is not applicable in the analyzed language or is not predicted.
505    GENDER_UNKNOWN = 0;
506
507    // Feminine
508    FEMININE = 1;
509
510    // Masculine
511    MASCULINE = 2;
512
513    // Neuter
514    NEUTER = 3;
515  }
516
517  // The grammatical feature of verbs, used for showing modality and attitude.
518  enum Mood {
519    // Mood is not applicable in the analyzed language or is not predicted.
520    MOOD_UNKNOWN = 0;
521
522    // Conditional
523    CONDITIONAL_MOOD = 1;
524
525    // Imperative
526    IMPERATIVE = 2;
527
528    // Indicative
529    INDICATIVE = 3;
530
531    // Interrogative
532    INTERROGATIVE = 4;
533
534    // Jussive
535    JUSSIVE = 5;
536
537    // Subjunctive
538    SUBJUNCTIVE = 6;
539  }
540
541  // Count distinctions.
542  enum Number {
543    // Number is not applicable in the analyzed language or is not predicted.
544    NUMBER_UNKNOWN = 0;
545
546    // Singular
547    SINGULAR = 1;
548
549    // Plural
550    PLURAL = 2;
551
552    // Dual
553    DUAL = 3;
554  }
555
556  // The distinction between the speaker, second person, third person, etc.
557  enum Person {
558    // Person is not applicable in the analyzed language or is not predicted.
559    PERSON_UNKNOWN = 0;
560
561    // First
562    FIRST = 1;
563
564    // Second
565    SECOND = 2;
566
567    // Third
568    THIRD = 3;
569
570    // Reflexive
571    REFLEXIVE_PERSON = 4;
572  }
573
574  // This category shows if the token is part of a proper name.
575  enum Proper {
576    // Proper is not applicable in the analyzed language or is not predicted.
577    PROPER_UNKNOWN = 0;
578
579    // Proper
580    PROPER = 1;
581
582    // Not proper
583    NOT_PROPER = 2;
584  }
585
586  // Reciprocal features of a pronoun.
587  enum Reciprocity {
588    // Reciprocity is not applicable in the analyzed language or is not
589    // predicted.
590    RECIPROCITY_UNKNOWN = 0;
591
592    // Reciprocal
593    RECIPROCAL = 1;
594
595    // Non-reciprocal
596    NON_RECIPROCAL = 2;
597  }
598
599  // Time reference.
600  enum Tense {
601    // Tense is not applicable in the analyzed language or is not predicted.
602    TENSE_UNKNOWN = 0;
603
604    // Conditional
605    CONDITIONAL_TENSE = 1;
606
607    // Future
608    FUTURE = 2;
609
610    // Past
611    PAST = 3;
612
613    // Present
614    PRESENT = 4;
615
616    // Imperfect
617    IMPERFECT = 5;
618
619    // Pluperfect
620    PLUPERFECT = 6;
621  }
622
623  // The relationship between the action that a verb expresses and the
624  // participants identified by its arguments.
625  enum Voice {
626    // Voice is not applicable in the analyzed language or is not predicted.
627    VOICE_UNKNOWN = 0;
628
629    // Active
630    ACTIVE = 1;
631
632    // Causative
633    CAUSATIVE = 2;
634
635    // Passive
636    PASSIVE = 3;
637  }
638
639  // The part of speech tag.
640  Tag tag = 1;
641
642  // The grammatical aspect.
643  Aspect aspect = 2;
644
645  // The grammatical case.
646  Case case = 3;
647
648  // The grammatical form.
649  Form form = 4;
650
651  // The grammatical gender.
652  Gender gender = 5;
653
654  // The grammatical mood.
655  Mood mood = 6;
656
657  // The grammatical number.
658  Number number = 7;
659
660  // The grammatical person.
661  Person person = 8;
662
663  // The grammatical properness.
664  Proper proper = 9;
665
666  // The grammatical reciprocity.
667  Reciprocity reciprocity = 10;
668
669  // The grammatical tense.
670  Tense tense = 11;
671
672  // The grammatical voice.
673  Voice voice = 12;
674}
675
676// Represents dependency parse tree information for a token.
677message DependencyEdge {
678  // The parse label enum for the token.
679  enum Label {
680    // Unknown
681    UNKNOWN = 0;
682
683    // Abbreviation modifier
684    ABBREV = 1;
685
686    // Adjectival complement
687    ACOMP = 2;
688
689    // Adverbial clause modifier
690    ADVCL = 3;
691
692    // Adverbial modifier
693    ADVMOD = 4;
694
695    // Adjectival modifier of an NP
696    AMOD = 5;
697
698    // Appositional modifier of an NP
699    APPOS = 6;
700
701    // Attribute dependent of a copular verb
702    ATTR = 7;
703
704    // Auxiliary (non-main) verb
705    AUX = 8;
706
707    // Passive auxiliary
708    AUXPASS = 9;
709
710    // Coordinating conjunction
711    CC = 10;
712
713    // Clausal complement of a verb or adjective
714    CCOMP = 11;
715
716    // Conjunct
717    CONJ = 12;
718
719    // Clausal subject
720    CSUBJ = 13;
721
722    // Clausal passive subject
723    CSUBJPASS = 14;
724
725    // Dependency (unable to determine)
726    DEP = 15;
727
728    // Determiner
729    DET = 16;
730
731    // Discourse
732    DISCOURSE = 17;
733
734    // Direct object
735    DOBJ = 18;
736
737    // Expletive
738    EXPL = 19;
739
740    // Goes with (part of a word in a text not well edited)
741    GOESWITH = 20;
742
743    // Indirect object
744    IOBJ = 21;
745
746    // Marker (word introducing a subordinate clause)
747    MARK = 22;
748
749    // Multi-word expression
750    MWE = 23;
751
752    // Multi-word verbal expression
753    MWV = 24;
754
755    // Negation modifier
756    NEG = 25;
757
758    // Noun compound modifier
759    NN = 26;
760
761    // Noun phrase used as an adverbial modifier
762    NPADVMOD = 27;
763
764    // Nominal subject
765    NSUBJ = 28;
766
767    // Passive nominal subject
768    NSUBJPASS = 29;
769
770    // Numeric modifier of a noun
771    NUM = 30;
772
773    // Element of compound number
774    NUMBER = 31;
775
776    // Punctuation mark
777    P = 32;
778
779    // Parataxis relation
780    PARATAXIS = 33;
781
782    // Participial modifier
783    PARTMOD = 34;
784
785    // The complement of a preposition is a clause
786    PCOMP = 35;
787
788    // Object of a preposition
789    POBJ = 36;
790
791    // Possession modifier
792    POSS = 37;
793
794    // Postverbal negative particle
795    POSTNEG = 38;
796
797    // Predicate complement
798    PRECOMP = 39;
799
800    // Preconjunt
801    PRECONJ = 40;
802
803    // Predeterminer
804    PREDET = 41;
805
806    // Prefix
807    PREF = 42;
808
809    // Prepositional modifier
810    PREP = 43;
811
812    // The relationship between a verb and verbal morpheme
813    PRONL = 44;
814
815    // Particle
816    PRT = 45;
817
818    // Associative or possessive marker
819    PS = 46;
820
821    // Quantifier phrase modifier
822    QUANTMOD = 47;
823
824    // Relative clause modifier
825    RCMOD = 48;
826
827    // Complementizer in relative clause
828    RCMODREL = 49;
829
830    // Ellipsis without a preceding predicate
831    RDROP = 50;
832
833    // Referent
834    REF = 51;
835
836    // Remnant
837    REMNANT = 52;
838
839    // Reparandum
840    REPARANDUM = 53;
841
842    // Root
843    ROOT = 54;
844
845    // Suffix specifying a unit of number
846    SNUM = 55;
847
848    // Suffix
849    SUFF = 56;
850
851    // Temporal modifier
852    TMOD = 57;
853
854    // Topic marker
855    TOPIC = 58;
856
857    // Clause headed by an infinite form of the verb that modifies a noun
858    VMOD = 59;
859
860    // Vocative
861    VOCATIVE = 60;
862
863    // Open clausal complement
864    XCOMP = 61;
865
866    // Name suffix
867    SUFFIX = 62;
868
869    // Name title
870    TITLE = 63;
871
872    // Adverbial phrase modifier
873    ADVPHMOD = 64;
874
875    // Causative auxiliary
876    AUXCAUS = 65;
877
878    // Helper auxiliary
879    AUXVV = 66;
880
881    // Rentaishi (Prenominal modifier)
882    DTMOD = 67;
883
884    // Foreign words
885    FOREIGN = 68;
886
887    // Keyword
888    KW = 69;
889
890    // List for chains of comparable items
891    LIST = 70;
892
893    // Nominalized clause
894    NOMC = 71;
895
896    // Nominalized clausal subject
897    NOMCSUBJ = 72;
898
899    // Nominalized clausal passive
900    NOMCSUBJPASS = 73;
901
902    // Compound of numeric modifier
903    NUMC = 74;
904
905    // Copula
906    COP = 75;
907
908    // Dislocated relation (for fronted/topicalized elements)
909    DISLOCATED = 76;
910
911    // Aspect marker
912    ASP = 77;
913
914    // Genitive modifier
915    GMOD = 78;
916
917    // Genitive object
918    GOBJ = 79;
919
920    // Infinitival modifier
921    INFMOD = 80;
922
923    // Measure
924    MES = 81;
925
926    // Nominal complement of a noun
927    NCOMP = 82;
928  }
929
930  // Represents the head of this token in the dependency tree.
931  // This is the index of the token which has an arc going to this token.
932  // The index is the position of the token in the array of tokens returned
933  // by the API method. If this token is a root token, then the
934  // `head_token_index` is its own index.
935  int32 head_token_index = 1;
936
937  // The parse label for the token.
938  Label label = 2;
939}
940
941// Represents a mention for an entity in the text. Currently, proper noun
942// mentions are supported.
943message EntityMention {
944  // The supported types of mentions.
945  enum Type {
946    // Unknown
947    TYPE_UNKNOWN = 0;
948
949    // Proper name
950    PROPER = 1;
951
952    // Common noun (or noun compound)
953    COMMON = 2;
954  }
955
956  // The mention text.
957  TextSpan text = 1;
958
959  // The type of the entity mention.
960  Type type = 2;
961
962  // For calls to [AnalyzeEntitySentiment][] or if
963  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
964  // true, this field will contain the sentiment expressed for this mention of
965  // the entity in the provided document.
966  Sentiment sentiment = 3;
967}
968
969// Represents an output piece of text.
970message TextSpan {
971  // The content of the output text.
972  string content = 1;
973
974  // The API calculates the beginning offset of the content in the original
975  // document according to the [EncodingType][google.cloud.language.v1beta2.EncodingType] specified in the API request.
976  int32 begin_offset = 2;
977}
978
979// Represents a category returned from the text classifier.
980message ClassificationCategory {
981  // The name of the category representing the document, from the [predefined
982  // taxonomy](https://cloud.google.com/natural-language/docs/categories).
983  string name = 1;
984
985  // The classifier's confidence of the category. Number represents how certain
986  // the classifier is that this category represents the given text.
987  float confidence = 2;
988}
989
990// Model options available for classification requests.
991message ClassificationModelOptions {
992  // Options for the V1 model.
993  message V1Model {
994
995  }
996
997  // Options for the V2 model.
998  message V2Model {
999    // The content categories used for classification.
1000    enum ContentCategoriesVersion {
1001      // If `ContentCategoriesVersion` is not specified, this option will
1002      // default to `V1`.
1003      CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
1004
1005      // Legacy content categories of our initial launch in 2017.
1006      V1 = 1;
1007
1008      // Updated content categories in 2022.
1009      V2 = 2;
1010    }
1011
1012    // The content categories used for classification.
1013    ContentCategoriesVersion content_categories_version = 1;
1014  }
1015
1016  // If this field is not set, then the `v1_model` will be used by default.
1017  oneof model_type {
1018    // Setting this field will use the V1 model and V1 content categories
1019    // version. The V1 model is a legacy model; support for this will be
1020    // discontinued in the future.
1021    V1Model v1_model = 1;
1022
1023    // Setting this field will use the V2 model with the appropriate content
1024    // categories version. The V2 model is a better performing model.
1025    V2Model v2_model = 2;
1026  }
1027}
1028
1029// The sentiment analysis request message.
1030message AnalyzeSentimentRequest {
1031  // Required. Input document.
1032  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1033
1034  // The encoding type used by the API to calculate sentence offsets for the
1035  // sentence sentiment.
1036  EncodingType encoding_type = 2;
1037}
1038
1039// The sentiment analysis response message.
1040message AnalyzeSentimentResponse {
1041  // The overall sentiment of the input document.
1042  Sentiment document_sentiment = 1;
1043
1044  // The language of the text, which will be the same as the language specified
1045  // in the request or, if not specified, the automatically-detected language.
1046  // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
1047  string language = 2;
1048
1049  // The sentiment for all the sentences in the document.
1050  repeated Sentence sentences = 3;
1051}
1052
1053// The entity-level sentiment analysis request message.
1054message AnalyzeEntitySentimentRequest {
1055  // Required. Input document.
1056  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1057
1058  // The encoding type used by the API to calculate offsets.
1059  EncodingType encoding_type = 2;
1060}
1061
1062// The entity-level sentiment analysis response message.
1063message AnalyzeEntitySentimentResponse {
1064  // The recognized entities in the input document with associated sentiments.
1065  repeated Entity entities = 1;
1066
1067  // The language of the text, which will be the same as the language specified
1068  // in the request or, if not specified, the automatically-detected language.
1069  // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
1070  string language = 2;
1071}
1072
1073// The entity analysis request message.
1074message AnalyzeEntitiesRequest {
1075  // Required. Input document.
1076  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1077
1078  // The encoding type used by the API to calculate offsets.
1079  EncodingType encoding_type = 2;
1080}
1081
1082// The entity analysis response message.
1083message AnalyzeEntitiesResponse {
1084  // The recognized entities in the input document.
1085  repeated Entity entities = 1;
1086
1087  // The language of the text, which will be the same as the language specified
1088  // in the request or, if not specified, the automatically-detected language.
1089  // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
1090  string language = 2;
1091}
1092
1093// The syntax analysis request message.
1094message AnalyzeSyntaxRequest {
1095  // Required. Input document.
1096  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1097
1098  // The encoding type used by the API to calculate offsets.
1099  EncodingType encoding_type = 2;
1100}
1101
1102// The syntax analysis response message.
1103message AnalyzeSyntaxResponse {
1104  // Sentences in the input document.
1105  repeated Sentence sentences = 1;
1106
1107  // Tokens, along with their syntactic information, in the input document.
1108  repeated Token tokens = 2;
1109
1110  // The language of the text, which will be the same as the language specified
1111  // in the request or, if not specified, the automatically-detected language.
1112  // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
1113  string language = 3;
1114}
1115
1116// The document classification request message.
1117message ClassifyTextRequest {
1118  // Required. Input document.
1119  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1120
1121  // Model options to use for classification. Defaults to v1 options if not
1122  // specified.
1123  ClassificationModelOptions classification_model_options = 3;
1124}
1125
1126// The document classification response message.
1127message ClassifyTextResponse {
1128  // Categories representing the input document.
1129  repeated ClassificationCategory categories = 1;
1130}
1131
1132// The request message for the text annotation API, which can perform multiple
1133// analysis types (sentiment, entities, and syntax) in one call.
1134message AnnotateTextRequest {
1135  // All available features for sentiment, syntax, and semantic analysis.
1136  // Setting each one to true will enable that specific analysis for the input.
1137  // Next ID: 11
1138  message Features {
1139    // Extract syntax information.
1140    bool extract_syntax = 1;
1141
1142    // Extract entities.
1143    bool extract_entities = 2;
1144
1145    // Extract document-level sentiment.
1146    bool extract_document_sentiment = 3;
1147
1148    // Extract entities and their associated sentiment.
1149    bool extract_entity_sentiment = 4;
1150
1151    // Classify the full document into categories. If this is true,
1152    // the API will use the default model which classifies into a
1153    // [predefined
1154    // taxonomy](https://cloud.google.com/natural-language/docs/categories).
1155    bool classify_text = 6;
1156
1157    // The model options to use for classification. Defaults to v1 options
1158    // if not specified. Only used if `classify_text` is set to true.
1159    ClassificationModelOptions classification_model_options = 10;
1160  }
1161
1162  // Required. Input document.
1163  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1164
1165  // Required. The enabled features.
1166  Features features = 2 [(google.api.field_behavior) = REQUIRED];
1167
1168  // The encoding type used by the API to calculate offsets.
1169  EncodingType encoding_type = 3;
1170}
1171
1172// The text annotations response message.
1173message AnnotateTextResponse {
1174  // Sentences in the input document. Populated if the user enables
1175  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
1176  repeated Sentence sentences = 1;
1177
1178  // Tokens, along with their syntactic information, in the input document.
1179  // Populated if the user enables
1180  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
1181  repeated Token tokens = 2;
1182
1183  // Entities, along with their semantic information, in the input document.
1184  // Populated if the user enables
1185  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entities].
1186  repeated Entity entities = 3;
1187
1188  // The overall sentiment for the document. Populated if the user enables
1189  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment].
1190  Sentiment document_sentiment = 4;
1191
1192  // The language of the text, which will be the same as the language specified
1193  // in the request or, if not specified, the automatically-detected language.
1194  // See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
1195  string language = 5;
1196
1197  // Categories identified in the input document.
1198  repeated ClassificationCategory categories = 6;
1199}
1200