xref: /aosp_15_r20/external/googleapis/google/cloud/language/v1/language_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.language.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22
23option go_package = "cloud.google.com/go/language/apiv1/languagepb;languagepb";
24option java_multiple_files = true;
25option java_outer_classname = "LanguageServiceProto";
26option java_package = "com.google.cloud.language.v1";
27
28// Provides text analysis operations such as sentiment analysis and entity
29// recognition.
30service LanguageService {
31  option (google.api.default_host) = "language.googleapis.com";
32  option (google.api.oauth_scopes) =
33      "https://www.googleapis.com/auth/cloud-language,"
34      "https://www.googleapis.com/auth/cloud-platform";
35
36  // Analyzes the sentiment of the provided text.
37  rpc AnalyzeSentiment(AnalyzeSentimentRequest)
38      returns (AnalyzeSentimentResponse) {
39    option (google.api.http) = {
40      post: "/v1/documents:analyzeSentiment"
41      body: "*"
42    };
43    option (google.api.method_signature) = "document,encoding_type";
44    option (google.api.method_signature) = "document";
45  }
46
47  // Finds named entities (currently proper names and common nouns) in the text
48  // along with entity types, salience, mentions for each entity, and
49  // other properties.
50  rpc AnalyzeEntities(AnalyzeEntitiesRequest)
51      returns (AnalyzeEntitiesResponse) {
52    option (google.api.http) = {
53      post: "/v1/documents:analyzeEntities"
54      body: "*"
55    };
56    option (google.api.method_signature) = "document,encoding_type";
57    option (google.api.method_signature) = "document";
58  }
59
60  // Finds entities, similar to
61  // [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities]
62  // in the text and analyzes sentiment associated with each entity and its
63  // mentions.
64  rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest)
65      returns (AnalyzeEntitySentimentResponse) {
66    option (google.api.http) = {
67      post: "/v1/documents:analyzeEntitySentiment"
68      body: "*"
69    };
70    option (google.api.method_signature) = "document,encoding_type";
71    option (google.api.method_signature) = "document";
72  }
73
74  // Analyzes the syntax of the text and provides sentence boundaries and
75  // tokenization along with part of speech tags, dependency trees, and other
76  // properties.
77  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
78    option (google.api.http) = {
79      post: "/v1/documents:analyzeSyntax"
80      body: "*"
81    };
82    option (google.api.method_signature) = "document,encoding_type";
83    option (google.api.method_signature) = "document";
84  }
85
86  // Classifies a document into categories.
87  rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
88    option (google.api.http) = {
89      post: "/v1/documents:classifyText"
90      body: "*"
91    };
92    option (google.api.method_signature) = "document";
93  }
94
95  // Moderates a document for harmful and sensitive categories.
96  rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) {
97    option (google.api.http) = {
98      post: "/v1/documents:moderateText"
99      body: "*"
100    };
101    option (google.api.method_signature) = "document";
102  }
103
104  // A convenience method that provides all the features that analyzeSentiment,
105  // analyzeEntities, and analyzeSyntax provide in one call.
106  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
107    option (google.api.http) = {
108      post: "/v1/documents:annotateText"
109      body: "*"
110    };
111    option (google.api.method_signature) = "document,features,encoding_type";
112    option (google.api.method_signature) = "document,features";
113  }
114}
115
116// Represents the input to API methods.
117message Document {
118  // The document types enum.
119  enum Type {
120    // The content type is not specified.
121    TYPE_UNSPECIFIED = 0;
122
123    // Plain text
124    PLAIN_TEXT = 1;
125
126    // HTML
127    HTML = 2;
128  }
129
130  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
131  // returns an `INVALID_ARGUMENT` error.
132  Type type = 1;
133
134  // The source of the document: a string containing the content or a
135  // Google Cloud Storage URI.
136  oneof source {
137    // The content of the input in string format.
138    // Cloud audit logging exempt since it is based on user data.
139    string content = 2;
140
141    // The Google Cloud Storage URI where the file content is located.
142    // This URI must be of the form: gs://bucket_name/object_name. For more
143    // details, see https://cloud.google.com/storage/docs/reference-uris.
144    // NOTE: Cloud Storage object versioning is not supported.
145    string gcs_content_uri = 3;
146  }
147
148  // The language of the document (if not specified, the language is
149  // automatically detected). Both ISO and BCP-47 language codes are
150  // accepted.<br>
151  // [Language
152  // Support](https://cloud.google.com/natural-language/docs/languages) lists
153  // currently supported languages for each API method. If the language (either
154  // specified by the caller or automatically detected) is not supported by the
155  // called API method, an `INVALID_ARGUMENT` error is returned.
156  string language = 4;
157}
158
159// Represents a sentence in the input document.
160message Sentence {
161  // The sentence text.
162  TextSpan text = 1;
163
164  // For calls to [AnalyzeSentiment][] or if
165  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment]
166  // is set to true, this field will contain the sentiment for the sentence.
167  Sentiment sentiment = 2;
168}
169
170// Represents the text encoding that the caller uses to process the output.
171// Providing an `EncodingType` is recommended because the API provides the
172// beginning offsets for various outputs, such as tokens and mentions, and
173// languages that natively use different text encodings may access offsets
174// differently.
175enum EncodingType {
176  // If `EncodingType` is not specified, encoding-dependent information (such as
177  // `begin_offset`) will be set at `-1`.
178  NONE = 0;
179
180  // Encoding-dependent information (such as `begin_offset`) is calculated based
181  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
182  // that use this encoding natively.
183  UTF8 = 1;
184
185  // Encoding-dependent information (such as `begin_offset`) is calculated based
186  // on the UTF-16 encoding of the input. Java and JavaScript are examples of
187  // languages that use this encoding natively.
188  UTF16 = 2;
189
190  // Encoding-dependent information (such as `begin_offset`) is calculated based
191  // on the UTF-32 encoding of the input. Python is an example of a language
192  // that uses this encoding natively.
193  UTF32 = 3;
194}
195
196// Represents a phrase in the text that is a known entity, such as
197// a person, an organization, or location. The API associates information, such
198// as salience and mentions, with entities.
199message Entity {
200  // The type of the entity. For most entity types, the associated metadata is a
201  // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
202  // below lists the associated fields for entities that have different
203  // metadata.
204  enum Type {
205    // Unknown
206    UNKNOWN = 0;
207
208    // Person
209    PERSON = 1;
210
211    // Location
212    LOCATION = 2;
213
214    // Organization
215    ORGANIZATION = 3;
216
217    // Event
218    EVENT = 4;
219
220    // Artwork
221    WORK_OF_ART = 5;
222
223    // Consumer product
224    CONSUMER_GOOD = 6;
225
226    // Other types of entities
227    OTHER = 7;
228
229    // Phone number
230    //
231    // The metadata lists the phone number, formatted according to local
232    // convention, plus whichever additional elements appear in the text:
233    //
234    // * `number` - the actual number, broken down into sections as per local
235    // convention
236    // * `national_prefix` - country code, if detected
237    // * `area_code` - region or area code, if detected
238    // * `extension` - phone extension (to be dialed after connection), if
239    // detected
240    PHONE_NUMBER = 9;
241
242    // Address
243    //
244    // The metadata identifies the street number and locality plus whichever
245    // additional elements appear in the text:
246    //
247    // * `street_number` - street number
248    // * `locality` - city or town
249    // * `street_name` - street/route name, if detected
250    // * `postal_code` - postal code, if detected
251    // * `country` - country, if detected<
252    // * `broad_region` - administrative area, such as the state, if detected
253    // * `narrow_region` - smaller administrative area, such as county, if
254    // detected
255    // * `sublocality` - used in Asian addresses to demark a district within a
256    // city, if detected
257    ADDRESS = 10;
258
259    // Date
260    //
261    // The metadata identifies the components of the date:
262    //
263    // * `year` - four digit year, if detected
264    // * `month` - two digit month number, if detected
265    // * `day` - two digit day number, if detected
266    DATE = 11;
267
268    // Number
269    //
270    // The metadata is the number itself.
271    NUMBER = 12;
272
273    // Price
274    //
275    // The metadata identifies the `value` and `currency`.
276    PRICE = 13;
277  }
278
279  // The representative name for the entity.
280  string name = 1;
281
282  // The entity type.
283  Type type = 2;
284
285  // Metadata associated with the entity.
286  //
287  // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
288  // and Knowledge Graph MID (`mid`), if they are available. For the metadata
289  // associated with other entity types, see the Type table below.
290  map<string, string> metadata = 3;
291
292  // The salience score associated with the entity in the [0, 1.0] range.
293  //
294  // The salience score for an entity provides information about the
295  // importance or centrality of that entity to the entire document text.
296  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
297  // salient.
298  float salience = 4;
299
300  // The mentions of this entity in the input document. The API currently
301  // supports proper noun mentions.
302  repeated EntityMention mentions = 5;
303
304  // For calls to [AnalyzeEntitySentiment][] or if
305  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment]
306  // is set to true, this field will contain the aggregate sentiment expressed
307  // for this entity in the provided document.
308  Sentiment sentiment = 6;
309}
310
311// Represents the smallest syntactic building block of the text.
312message Token {
313  // The token text.
314  TextSpan text = 1;
315
316  // Parts of speech tag for this token.
317  PartOfSpeech part_of_speech = 2;
318
319  // Dependency tree parse for this token.
320  DependencyEdge dependency_edge = 3;
321
322  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
323  string lemma = 4;
324}
325
326// Represents the feeling associated with the entire text or entities in
327// the text.
328message Sentiment {
329  // A non-negative number in the [0, +inf) range, which represents
330  // the absolute magnitude of sentiment regardless of score (positive or
331  // negative).
332  float magnitude = 2;
333
334  // Sentiment score between -1.0 (negative sentiment) and 1.0
335  // (positive sentiment).
336  float score = 3;
337}
338
339// Represents part of speech information for a token. Parts of speech
340// are as defined in
341// http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
342message PartOfSpeech {
343  // The part of speech tags enum.
344  enum Tag {
345    // Unknown
346    UNKNOWN = 0;
347
348    // Adjective
349    ADJ = 1;
350
351    // Adposition (preposition and postposition)
352    ADP = 2;
353
354    // Adverb
355    ADV = 3;
356
357    // Conjunction
358    CONJ = 4;
359
360    // Determiner
361    DET = 5;
362
363    // Noun (common and proper)
364    NOUN = 6;
365
366    // Cardinal number
367    NUM = 7;
368
369    // Pronoun
370    PRON = 8;
371
372    // Particle or other function word
373    PRT = 9;
374
375    // Punctuation
376    PUNCT = 10;
377
378    // Verb (all tenses and modes)
379    VERB = 11;
380
381    // Other: foreign words, typos, abbreviations
382    X = 12;
383
384    // Affix
385    AFFIX = 13;
386  }
387
388  // The characteristic of a verb that expresses time flow during an event.
389  enum Aspect {
390    // Aspect is not applicable in the analyzed language or is not predicted.
391    ASPECT_UNKNOWN = 0;
392
393    // Perfective
394    PERFECTIVE = 1;
395
396    // Imperfective
397    IMPERFECTIVE = 2;
398
399    // Progressive
400    PROGRESSIVE = 3;
401  }
402
403  // The grammatical function performed by a noun or pronoun in a phrase,
404  // clause, or sentence. In some languages, other parts of speech, such as
405  // adjective and determiner, take case inflection in agreement with the noun.
406  enum Case {
407    // Case is not applicable in the analyzed language or is not predicted.
408    CASE_UNKNOWN = 0;
409
410    // Accusative
411    ACCUSATIVE = 1;
412
413    // Adverbial
414    ADVERBIAL = 2;
415
416    // Complementive
417    COMPLEMENTIVE = 3;
418
419    // Dative
420    DATIVE = 4;
421
422    // Genitive
423    GENITIVE = 5;
424
425    // Instrumental
426    INSTRUMENTAL = 6;
427
428    // Locative
429    LOCATIVE = 7;
430
431    // Nominative
432    NOMINATIVE = 8;
433
434    // Oblique
435    OBLIQUE = 9;
436
437    // Partitive
438    PARTITIVE = 10;
439
440    // Prepositional
441    PREPOSITIONAL = 11;
442
443    // Reflexive
444    REFLEXIVE_CASE = 12;
445
446    // Relative
447    RELATIVE_CASE = 13;
448
449    // Vocative
450    VOCATIVE = 14;
451  }
452
453  // Depending on the language, Form can be categorizing different forms of
454  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
455  // endings of verbs and adjectives or distinguishing between short and long
456  // forms of adjectives and participles
457  enum Form {
458    // Form is not applicable in the analyzed language or is not predicted.
459    FORM_UNKNOWN = 0;
460
461    // Adnomial
462    ADNOMIAL = 1;
463
464    // Auxiliary
465    AUXILIARY = 2;
466
467    // Complementizer
468    COMPLEMENTIZER = 3;
469
470    // Final ending
471    FINAL_ENDING = 4;
472
473    // Gerund
474    GERUND = 5;
475
476    // Realis
477    REALIS = 6;
478
479    // Irrealis
480    IRREALIS = 7;
481
482    // Short form
483    SHORT = 8;
484
485    // Long form
486    LONG = 9;
487
488    // Order form
489    ORDER = 10;
490
491    // Specific form
492    SPECIFIC = 11;
493  }
494
495  // Gender classes of nouns reflected in the behaviour of associated words.
496  enum Gender {
497    // Gender is not applicable in the analyzed language or is not predicted.
498    GENDER_UNKNOWN = 0;
499
500    // Feminine
501    FEMININE = 1;
502
503    // Masculine
504    MASCULINE = 2;
505
506    // Neuter
507    NEUTER = 3;
508  }
509
510  // The grammatical feature of verbs, used for showing modality and attitude.
511  enum Mood {
512    // Mood is not applicable in the analyzed language or is not predicted.
513    MOOD_UNKNOWN = 0;
514
515    // Conditional
516    CONDITIONAL_MOOD = 1;
517
518    // Imperative
519    IMPERATIVE = 2;
520
521    // Indicative
522    INDICATIVE = 3;
523
524    // Interrogative
525    INTERROGATIVE = 4;
526
527    // Jussive
528    JUSSIVE = 5;
529
530    // Subjunctive
531    SUBJUNCTIVE = 6;
532  }
533
534  // Count distinctions.
535  enum Number {
536    // Number is not applicable in the analyzed language or is not predicted.
537    NUMBER_UNKNOWN = 0;
538
539    // Singular
540    SINGULAR = 1;
541
542    // Plural
543    PLURAL = 2;
544
545    // Dual
546    DUAL = 3;
547  }
548
549  // The distinction between the speaker, second person, third person, etc.
550  enum Person {
551    // Person is not applicable in the analyzed language or is not predicted.
552    PERSON_UNKNOWN = 0;
553
554    // First
555    FIRST = 1;
556
557    // Second
558    SECOND = 2;
559
560    // Third
561    THIRD = 3;
562
563    // Reflexive
564    REFLEXIVE_PERSON = 4;
565  }
566
567  // This category shows if the token is part of a proper name.
568  enum Proper {
569    // Proper is not applicable in the analyzed language or is not predicted.
570    PROPER_UNKNOWN = 0;
571
572    // Proper
573    PROPER = 1;
574
575    // Not proper
576    NOT_PROPER = 2;
577  }
578
579  // Reciprocal features of a pronoun.
580  enum Reciprocity {
581    // Reciprocity is not applicable in the analyzed language or is not
582    // predicted.
583    RECIPROCITY_UNKNOWN = 0;
584
585    // Reciprocal
586    RECIPROCAL = 1;
587
588    // Non-reciprocal
589    NON_RECIPROCAL = 2;
590  }
591
592  // Time reference.
593  enum Tense {
594    // Tense is not applicable in the analyzed language or is not predicted.
595    TENSE_UNKNOWN = 0;
596
597    // Conditional
598    CONDITIONAL_TENSE = 1;
599
600    // Future
601    FUTURE = 2;
602
603    // Past
604    PAST = 3;
605
606    // Present
607    PRESENT = 4;
608
609    // Imperfect
610    IMPERFECT = 5;
611
612    // Pluperfect
613    PLUPERFECT = 6;
614  }
615
616  // The relationship between the action that a verb expresses and the
617  // participants identified by its arguments.
618  enum Voice {
619    // Voice is not applicable in the analyzed language or is not predicted.
620    VOICE_UNKNOWN = 0;
621
622    // Active
623    ACTIVE = 1;
624
625    // Causative
626    CAUSATIVE = 2;
627
628    // Passive
629    PASSIVE = 3;
630  }
631
632  // The part of speech tag.
633  Tag tag = 1;
634
635  // The grammatical aspect.
636  Aspect aspect = 2;
637
638  // The grammatical case.
639  Case case = 3;
640
641  // The grammatical form.
642  Form form = 4;
643
644  // The grammatical gender.
645  Gender gender = 5;
646
647  // The grammatical mood.
648  Mood mood = 6;
649
650  // The grammatical number.
651  Number number = 7;
652
653  // The grammatical person.
654  Person person = 8;
655
656  // The grammatical properness.
657  Proper proper = 9;
658
659  // The grammatical reciprocity.
660  Reciprocity reciprocity = 10;
661
662  // The grammatical tense.
663  Tense tense = 11;
664
665  // The grammatical voice.
666  Voice voice = 12;
667}
668
669// Represents dependency parse tree information for a token. (For more
670// information on dependency labels, see
671// http://www.aclweb.org/anthology/P13-2017
672message DependencyEdge {
673  // The parse label enum for the token.
674  enum Label {
675    // Unknown
676    UNKNOWN = 0;
677
678    // Abbreviation modifier
679    ABBREV = 1;
680
681    // Adjectival complement
682    ACOMP = 2;
683
684    // Adverbial clause modifier
685    ADVCL = 3;
686
687    // Adverbial modifier
688    ADVMOD = 4;
689
690    // Adjectival modifier of an NP
691    AMOD = 5;
692
693    // Appositional modifier of an NP
694    APPOS = 6;
695
696    // Attribute dependent of a copular verb
697    ATTR = 7;
698
699    // Auxiliary (non-main) verb
700    AUX = 8;
701
702    // Passive auxiliary
703    AUXPASS = 9;
704
705    // Coordinating conjunction
706    CC = 10;
707
708    // Clausal complement of a verb or adjective
709    CCOMP = 11;
710
711    // Conjunct
712    CONJ = 12;
713
714    // Clausal subject
715    CSUBJ = 13;
716
717    // Clausal passive subject
718    CSUBJPASS = 14;
719
720    // Dependency (unable to determine)
721    DEP = 15;
722
723    // Determiner
724    DET = 16;
725
726    // Discourse
727    DISCOURSE = 17;
728
729    // Direct object
730    DOBJ = 18;
731
732    // Expletive
733    EXPL = 19;
734
735    // Goes with (part of a word in a text not well edited)
736    GOESWITH = 20;
737
738    // Indirect object
739    IOBJ = 21;
740
741    // Marker (word introducing a subordinate clause)
742    MARK = 22;
743
744    // Multi-word expression
745    MWE = 23;
746
747    // Multi-word verbal expression
748    MWV = 24;
749
750    // Negation modifier
751    NEG = 25;
752
753    // Noun compound modifier
754    NN = 26;
755
756    // Noun phrase used as an adverbial modifier
757    NPADVMOD = 27;
758
759    // Nominal subject
760    NSUBJ = 28;
761
762    // Passive nominal subject
763    NSUBJPASS = 29;
764
765    // Numeric modifier of a noun
766    NUM = 30;
767
768    // Element of compound number
769    NUMBER = 31;
770
771    // Punctuation mark
772    P = 32;
773
774    // Parataxis relation
775    PARATAXIS = 33;
776
777    // Participial modifier
778    PARTMOD = 34;
779
780    // The complement of a preposition is a clause
781    PCOMP = 35;
782
783    // Object of a preposition
784    POBJ = 36;
785
786    // Possession modifier
787    POSS = 37;
788
789    // Postverbal negative particle
790    POSTNEG = 38;
791
792    // Predicate complement
793    PRECOMP = 39;
794
795    // Preconjunt
796    PRECONJ = 40;
797
798    // Predeterminer
799    PREDET = 41;
800
801    // Prefix
802    PREF = 42;
803
804    // Prepositional modifier
805    PREP = 43;
806
807    // The relationship between a verb and verbal morpheme
808    PRONL = 44;
809
810    // Particle
811    PRT = 45;
812
813    // Associative or possessive marker
814    PS = 46;
815
816    // Quantifier phrase modifier
817    QUANTMOD = 47;
818
819    // Relative clause modifier
820    RCMOD = 48;
821
822    // Complementizer in relative clause
823    RCMODREL = 49;
824
825    // Ellipsis without a preceding predicate
826    RDROP = 50;
827
828    // Referent
829    REF = 51;
830
831    // Remnant
832    REMNANT = 52;
833
834    // Reparandum
835    REPARANDUM = 53;
836
837    // Root
838    ROOT = 54;
839
840    // Suffix specifying a unit of number
841    SNUM = 55;
842
843    // Suffix
844    SUFF = 56;
845
846    // Temporal modifier
847    TMOD = 57;
848
849    // Topic marker
850    TOPIC = 58;
851
852    // Clause headed by an infinite form of the verb that modifies a noun
853    VMOD = 59;
854
855    // Vocative
856    VOCATIVE = 60;
857
858    // Open clausal complement
859    XCOMP = 61;
860
861    // Name suffix
862    SUFFIX = 62;
863
864    // Name title
865    TITLE = 63;
866
867    // Adverbial phrase modifier
868    ADVPHMOD = 64;
869
870    // Causative auxiliary
871    AUXCAUS = 65;
872
873    // Helper auxiliary
874    AUXVV = 66;
875
876    // Rentaishi (Prenominal modifier)
877    DTMOD = 67;
878
879    // Foreign words
880    FOREIGN = 68;
881
882    // Keyword
883    KW = 69;
884
885    // List for chains of comparable items
886    LIST = 70;
887
888    // Nominalized clause
889    NOMC = 71;
890
891    // Nominalized clausal subject
892    NOMCSUBJ = 72;
893
894    // Nominalized clausal passive
895    NOMCSUBJPASS = 73;
896
897    // Compound of numeric modifier
898    NUMC = 74;
899
900    // Copula
901    COP = 75;
902
903    // Dislocated relation (for fronted/topicalized elements)
904    DISLOCATED = 76;
905
906    // Aspect marker
907    ASP = 77;
908
909    // Genitive modifier
910    GMOD = 78;
911
912    // Genitive object
913    GOBJ = 79;
914
915    // Infinitival modifier
916    INFMOD = 80;
917
918    // Measure
919    MES = 81;
920
921    // Nominal complement of a noun
922    NCOMP = 82;
923  }
924
925  // Represents the head of this token in the dependency tree.
926  // This is the index of the token which has an arc going to this token.
927  // The index is the position of the token in the array of tokens returned
928  // by the API method. If this token is a root token, then the
929  // `head_token_index` is its own index.
930  int32 head_token_index = 1;
931
932  // The parse label for the token.
933  Label label = 2;
934}
935
936// Represents a mention for an entity in the text. Currently, proper noun
937// mentions are supported.
938message EntityMention {
939  // The supported types of mentions.
940  enum Type {
941    // Unknown
942    TYPE_UNKNOWN = 0;
943
944    // Proper name
945    PROPER = 1;
946
947    // Common noun (or noun compound)
948    COMMON = 2;
949  }
950
951  // The mention text.
952  TextSpan text = 1;
953
954  // The type of the entity mention.
955  Type type = 2;
956
957  // For calls to [AnalyzeEntitySentiment][] or if
958  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment]
959  // is set to true, this field will contain the sentiment expressed for this
960  // mention of the entity in the provided document.
961  Sentiment sentiment = 3;
962}
963
964// Represents an output piece of text.
965message TextSpan {
966  // The content of the output text.
967  string content = 1;
968
969  // The API calculates the beginning offset of the content in the original
970  // document according to the
971  // [EncodingType][google.cloud.language.v1.EncodingType] specified in the API
972  // request.
973  int32 begin_offset = 2;
974}
975
976// Represents a category returned from the text classifier.
977message ClassificationCategory {
978  // The name of the category representing the document.
979  string name = 1;
980
981  // The classifier's confidence of the category. Number represents how certain
982  // the classifier is that this category represents the given text.
983  float confidence = 2;
984}
985
986// Model options available for classification requests.
987message ClassificationModelOptions {
988  // Options for the V1 model.
989  message V1Model {}
990
991  // Options for the V2 model.
992  message V2Model {
993    // The content categories used for classification.
994    enum ContentCategoriesVersion {
995      // If `ContentCategoriesVersion` is not specified, this option will
996      // default to `V1`.
997      CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
998
999      // Legacy content categories of our initial launch in 2017.
1000      V1 = 1;
1001
1002      // Updated content categories in 2022.
1003      V2 = 2;
1004    }
1005
1006    // The content categories used for classification.
1007    ContentCategoriesVersion content_categories_version = 1;
1008  }
1009
1010  // If this field is not set, then the `v1_model` will be used by default.
1011  oneof model_type {
1012    // Setting this field will use the V1 model and V1 content categories
1013    // version. The V1 model is a legacy model; support for this will be
1014    // discontinued in the future.
1015    V1Model v1_model = 1;
1016
1017    // Setting this field will use the V2 model with the appropriate content
1018    // categories version. The V2 model is a better performing model.
1019    V2Model v2_model = 2;
1020  }
1021}
1022
1023// The sentiment analysis request message.
1024message AnalyzeSentimentRequest {
1025  // Required. Input document.
1026  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1027
1028  // The encoding type used by the API to calculate sentence offsets.
1029  EncodingType encoding_type = 2;
1030}
1031
1032// The sentiment analysis response message.
1033message AnalyzeSentimentResponse {
1034  // The overall sentiment of the input document.
1035  Sentiment document_sentiment = 1;
1036
1037  // The language of the text, which will be the same as the language specified
1038  // in the request or, if not specified, the automatically-detected language.
1039  // See [Document.language][google.cloud.language.v1.Document.language] field
1040  // for more details.
1041  string language = 2;
1042
1043  // The sentiment for all the sentences in the document.
1044  repeated Sentence sentences = 3;
1045}
1046
1047// The entity-level sentiment analysis request message.
1048message AnalyzeEntitySentimentRequest {
1049  // Required. Input document.
1050  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1051
1052  // The encoding type used by the API to calculate offsets.
1053  EncodingType encoding_type = 2;
1054}
1055
1056// The entity-level sentiment analysis response message.
1057message AnalyzeEntitySentimentResponse {
1058  // The recognized entities in the input document with associated sentiments.
1059  repeated Entity entities = 1;
1060
1061  // The language of the text, which will be the same as the language specified
1062  // in the request or, if not specified, the automatically-detected language.
1063  // See [Document.language][google.cloud.language.v1.Document.language] field
1064  // for more details.
1065  string language = 2;
1066}
1067
1068// The entity analysis request message.
1069message AnalyzeEntitiesRequest {
1070  // Required. Input document.
1071  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1072
1073  // The encoding type used by the API to calculate offsets.
1074  EncodingType encoding_type = 2;
1075}
1076
1077// The entity analysis response message.
1078message AnalyzeEntitiesResponse {
1079  // The recognized entities in the input document.
1080  repeated Entity entities = 1;
1081
1082  // The language of the text, which will be the same as the language specified
1083  // in the request or, if not specified, the automatically-detected language.
1084  // See [Document.language][google.cloud.language.v1.Document.language] field
1085  // for more details.
1086  string language = 2;
1087}
1088
1089// The syntax analysis request message.
1090message AnalyzeSyntaxRequest {
1091  // Required. Input document.
1092  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1093
1094  // The encoding type used by the API to calculate offsets.
1095  EncodingType encoding_type = 2;
1096}
1097
1098// The syntax analysis response message.
1099message AnalyzeSyntaxResponse {
1100  // Sentences in the input document.
1101  repeated Sentence sentences = 1;
1102
1103  // Tokens, along with their syntactic information, in the input document.
1104  repeated Token tokens = 2;
1105
1106  // The language of the text, which will be the same as the language specified
1107  // in the request or, if not specified, the automatically-detected language.
1108  // See [Document.language][google.cloud.language.v1.Document.language] field
1109  // for more details.
1110  string language = 3;
1111}
1112
1113// The document classification request message.
1114message ClassifyTextRequest {
1115  // Required. Input document.
1116  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1117
1118  // Model options to use for classification. Defaults to v1 options if not
1119  // specified.
1120  ClassificationModelOptions classification_model_options = 3;
1121}
1122
1123// The document classification response message.
1124message ClassifyTextResponse {
1125  // Categories representing the input document.
1126  repeated ClassificationCategory categories = 1;
1127}
1128
1129// The document moderation request message.
1130message ModerateTextRequest {
1131  // Required. Input document.
1132  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1133}
1134
1135// The document moderation response message.
1136message ModerateTextResponse {
1137  // Harmful and sensitive categories representing the input document.
1138  repeated ClassificationCategory moderation_categories = 1;
1139}
1140
1141// The request message for the text annotation API, which can perform multiple
1142// analysis types (sentiment, entities, and syntax) in one call.
1143message AnnotateTextRequest {
1144  // All available features for sentiment, syntax, and semantic analysis.
1145  // Setting each one to true will enable that specific analysis for the input.
1146  message Features {
1147    // Extract syntax information.
1148    bool extract_syntax = 1;
1149
1150    // Extract entities.
1151    bool extract_entities = 2;
1152
1153    // Extract document-level sentiment.
1154    bool extract_document_sentiment = 3;
1155
1156    // Extract entities and their associated sentiment.
1157    bool extract_entity_sentiment = 4;
1158
1159    // Classify the full document into categories.
1160    bool classify_text = 6;
1161
1162    // Moderate the document for harmful and sensitive categories.
1163    bool moderate_text = 11;
1164
1165    // The model options to use for classification. Defaults to v1 options
1166    // if not specified. Only used if `classify_text` is set to true.
1167    ClassificationModelOptions classification_model_options = 10;
1168  }
1169
1170  // Required. Input document.
1171  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1172
1173  // Required. The enabled features.
1174  Features features = 2 [(google.api.field_behavior) = REQUIRED];
1175
1176  // The encoding type used by the API to calculate offsets.
1177  EncodingType encoding_type = 3;
1178}
1179
1180// The text annotations response message.
1181message AnnotateTextResponse {
1182  // Sentences in the input document. Populated if the user enables
1183  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
1184  repeated Sentence sentences = 1;
1185
1186  // Tokens, along with their syntactic information, in the input document.
1187  // Populated if the user enables
1188  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
1189  repeated Token tokens = 2;
1190
1191  // Entities, along with their semantic information, in the input document.
1192  // Populated if the user enables
1193  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities].
1194  repeated Entity entities = 3;
1195
1196  // The overall sentiment for the document. Populated if the user enables
1197  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment].
1198  Sentiment document_sentiment = 4;
1199
1200  // The language of the text, which will be the same as the language specified
1201  // in the request or, if not specified, the automatically-detected language.
1202  // See [Document.language][google.cloud.language.v1.Document.language] field
1203  // for more details.
1204  string language = 5;
1205
1206  // Categories identified in the input document.
1207  repeated ClassificationCategory categories = 6;
1208
1209  // Harmful and sensitive categories identified in the input document.
1210  repeated ClassificationCategory moderation_categories = 7;
1211}
1212