1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.language.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22
23option go_package = "cloud.google.com/go/language/apiv1/languagepb;languagepb";
24option java_multiple_files = true;
25option java_outer_classname = "LanguageServiceProto";
26option java_package = "com.google.cloud.language.v1";
27
28// Provides text analysis operations such as sentiment analysis and entity
29// recognition.
30service LanguageService {
31  option (google.api.default_host) = "language.googleapis.com";
32  option (google.api.oauth_scopes) =
33      "https://www.googleapis.com/auth/cloud-language,"
34      "https://www.googleapis.com/auth/cloud-platform";
35
36  // Analyzes the sentiment of the provided text.
37  rpc AnalyzeSentiment(AnalyzeSentimentRequest)
38      returns (AnalyzeSentimentResponse) {
39    option (google.api.http) = {
40      post: "/v1/documents:analyzeSentiment"
41      body: "*"
42    };
43    option (google.api.method_signature) = "document,encoding_type";
44    option (google.api.method_signature) = "document";
45  }
46
47  // Finds named entities (currently proper names and common nouns) in the text
48  // along with entity types, salience, mentions for each entity, and
49  // other properties.
50  rpc AnalyzeEntities(AnalyzeEntitiesRequest)
51      returns (AnalyzeEntitiesResponse) {
52    option (google.api.http) = {
53      post: "/v1/documents:analyzeEntities"
54      body: "*"
55    };
56    option (google.api.method_signature) = "document,encoding_type";
57    option (google.api.method_signature) = "document";
58  }
59
60  // Finds entities, similar to
61  // [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities]
62  // in the text and analyzes sentiment associated with each entity and its
63  // mentions.
64  rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest)
65      returns (AnalyzeEntitySentimentResponse) {
66    option (google.api.http) = {
67      post: "/v1/documents:analyzeEntitySentiment"
68      body: "*"
69    };
70    option (google.api.method_signature) = "document,encoding_type";
71    option (google.api.method_signature) = "document";
72  }
73
74  // Analyzes the syntax of the text and provides sentence boundaries and
75  // tokenization along with part of speech tags, dependency trees, and other
76  // properties.
77  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
78    option (google.api.http) = {
79      post: "/v1/documents:analyzeSyntax"
80      body: "*"
81    };
82    option (google.api.method_signature) = "document,encoding_type";
83    option (google.api.method_signature) = "document";
84  }
85
86  // Classifies a document into categories.
87  rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
88    option (google.api.http) = {
89      post: "/v1/documents:classifyText"
90      body: "*"
91    };
92    option (google.api.method_signature) = "document";
93  }
94
95  // A convenience method that provides all the features that analyzeSentiment,
96  // analyzeEntities, and analyzeSyntax provide in one call.
97  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
98    option (google.api.http) = {
99      post: "/v1/documents:annotateText"
100      body: "*"
101    };
102    option (google.api.method_signature) = "document,features,encoding_type";
103    option (google.api.method_signature) = "document,features";
104  }
105}
106
107// Represents the input to API methods.
108message Document {
109  // The document types enum.
110  enum Type {
111    // The content type is not specified.
112    TYPE_UNSPECIFIED = 0;
113
114    // Plain text
115    PLAIN_TEXT = 1;
116
117    // HTML
118    HTML = 2;
119  }
120
121  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
122  // returns an `INVALID_ARGUMENT` error.
123  Type type = 1;
124
125  // The source of the document: a string containing the content or a
126  // Google Cloud Storage URI.
127  oneof source {
128    // The content of the input in string format.
129    // Cloud audit logging exempt since it is based on user data.
130    string content = 2;
131
132    // The Google Cloud Storage URI where the file content is located.
133    // This URI must be of the form: gs://bucket_name/object_name. For more
134    // details, see https://cloud.google.com/storage/docs/reference-uris.
135    // NOTE: Cloud Storage object versioning is not supported.
136    string gcs_content_uri = 3;
137  }
138
139  // The language of the document (if not specified, the language is
140  // automatically detected). Both ISO and BCP-47 language codes are
141  // accepted.<br>
142  // [Language
143  // Support](https://cloud.google.com/natural-language/docs/languages) lists
144  // currently supported languages for each API method. If the language (either
145  // specified by the caller or automatically detected) is not supported by the
146  // called API method, an `INVALID_ARGUMENT` error is returned.
147  string language = 4;
148}
149
150// Represents a sentence in the input document.
151message Sentence {
152  // The sentence text.
153  TextSpan text = 1;
154
155  // For calls to [AnalyzeSentiment][] or if
156  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment]
157  // is set to true, this field will contain the sentiment for the sentence.
158  Sentiment sentiment = 2;
159}
160
161// Represents the text encoding that the caller uses to process the output.
162// Providing an `EncodingType` is recommended because the API provides the
163// beginning offsets for various outputs, such as tokens and mentions, and
164// languages that natively use different text encodings may access offsets
165// differently.
166enum EncodingType {
167  // If `EncodingType` is not specified, encoding-dependent information (such as
168  // `begin_offset`) will be set at `-1`.
169  NONE = 0;
170
171  // Encoding-dependent information (such as `begin_offset`) is calculated based
172  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
173  // that use this encoding natively.
174  UTF8 = 1;
175
176  // Encoding-dependent information (such as `begin_offset`) is calculated based
177  // on the UTF-16 encoding of the input. Java and JavaScript are examples of
178  // languages that use this encoding natively.
179  UTF16 = 2;
180
181  // Encoding-dependent information (such as `begin_offset`) is calculated based
182  // on the UTF-32 encoding of the input. Python is an example of a language
183  // that uses this encoding natively.
184  UTF32 = 3;
185}
186
187// Represents a phrase in the text that is a known entity, such as
188// a person, an organization, or location. The API associates information, such
189// as salience and mentions, with entities.
190message Entity {
191  // The type of the entity. For most entity types, the associated metadata is a
192  // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
193  // below lists the associated fields for entities that have different
194  // metadata.
195  enum Type {
196    // Unknown
197    UNKNOWN = 0;
198
199    // Person
200    PERSON = 1;
201
202    // Location
203    LOCATION = 2;
204
205    // Organization
206    ORGANIZATION = 3;
207
208    // Event
209    EVENT = 4;
210
211    // Artwork
212    WORK_OF_ART = 5;
213
214    // Consumer product
215    CONSUMER_GOOD = 6;
216
217    // Other types of entities
218    OTHER = 7;
219
220    // Phone number
221    //
222    // The metadata lists the phone number, formatted according to local
223    // convention, plus whichever additional elements appear in the text:
224    //
225    // * `number` - the actual number, broken down into sections as per local
226    // convention
227    // * `national_prefix` - country code, if detected
228    // * `area_code` - region or area code, if detected
229    // * `extension` - phone extension (to be dialed after connection), if
230    // detected
231    PHONE_NUMBER = 9;
232
233    // Address
234    //
235    // The metadata identifies the street number and locality plus whichever
236    // additional elements appear in the text:
237    //
238    // * `street_number` - street number
239    // * `locality` - city or town
240    // * `street_name` - street/route name, if detected
241    // * `postal_code` - postal code, if detected
242    // * `country` - country, if detected<
243    // * `broad_region` - administrative area, such as the state, if detected
244    // * `narrow_region` - smaller administrative area, such as county, if
245    // detected
246    // * `sublocality` - used in Asian addresses to demark a district within a
247    // city, if detected
248    ADDRESS = 10;
249
250    // Date
251    //
252    // The metadata identifies the components of the date:
253    //
254    // * `year` - four digit year, if detected
255    // * `month` - two digit month number, if detected
256    // * `day` - two digit day number, if detected
257    DATE = 11;
258
259    // Number
260    //
261    // The metadata is the number itself.
262    NUMBER = 12;
263
264    // Price
265    //
266    // The metadata identifies the `value` and `currency`.
267    PRICE = 13;
268  }
269
270  // The representative name for the entity.
271  string name = 1;
272
273  // The entity type.
274  Type type = 2;
275
276  // Metadata associated with the entity.
277  //
278  // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
279  // and Knowledge Graph MID (`mid`), if they are available. For the metadata
280  // associated with other entity types, see the Type table below.
281  map<string, string> metadata = 3;
282
283  // The salience score associated with the entity in the [0, 1.0] range.
284  //
285  // The salience score for an entity provides information about the
286  // importance or centrality of that entity to the entire document text.
287  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
288  // salient.
289  float salience = 4;
290
291  // The mentions of this entity in the input document. The API currently
292  // supports proper noun mentions.
293  repeated EntityMention mentions = 5;
294
295  // For calls to [AnalyzeEntitySentiment][] or if
296  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment]
297  // is set to true, this field will contain the aggregate sentiment expressed
298  // for this entity in the provided document.
299  Sentiment sentiment = 6;
300}
301
302// Represents the smallest syntactic building block of the text.
303message Token {
304  // The token text.
305  TextSpan text = 1;
306
307  // Parts of speech tag for this token.
308  PartOfSpeech part_of_speech = 2;
309
310  // Dependency tree parse for this token.
311  DependencyEdge dependency_edge = 3;
312
313  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
314  string lemma = 4;
315}
316
317// Represents the feeling associated with the entire text or entities in
318// the text.
319message Sentiment {
320  // A non-negative number in the [0, +inf) range, which represents
321  // the absolute magnitude of sentiment regardless of score (positive or
322  // negative).
323  float magnitude = 2;
324
325  // Sentiment score between -1.0 (negative sentiment) and 1.0
326  // (positive sentiment).
327  float score = 3;
328}
329
330// Represents part of speech information for a token. Parts of speech
331// are as defined in
332// http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
333message PartOfSpeech {
334  // The part of speech tags enum.
335  enum Tag {
336    // Unknown
337    UNKNOWN = 0;
338
339    // Adjective
340    ADJ = 1;
341
342    // Adposition (preposition and postposition)
343    ADP = 2;
344
345    // Adverb
346    ADV = 3;
347
348    // Conjunction
349    CONJ = 4;
350
351    // Determiner
352    DET = 5;
353
354    // Noun (common and proper)
355    NOUN = 6;
356
357    // Cardinal number
358    NUM = 7;
359
360    // Pronoun
361    PRON = 8;
362
363    // Particle or other function word
364    PRT = 9;
365
366    // Punctuation
367    PUNCT = 10;
368
369    // Verb (all tenses and modes)
370    VERB = 11;
371
372    // Other: foreign words, typos, abbreviations
373    X = 12;
374
375    // Affix
376    AFFIX = 13;
377  }
378
379  // The characteristic of a verb that expresses time flow during an event.
380  enum Aspect {
381    // Aspect is not applicable in the analyzed language or is not predicted.
382    ASPECT_UNKNOWN = 0;
383
384    // Perfective
385    PERFECTIVE = 1;
386
387    // Imperfective
388    IMPERFECTIVE = 2;
389
390    // Progressive
391    PROGRESSIVE = 3;
392  }
393
394  // The grammatical function performed by a noun or pronoun in a phrase,
395  // clause, or sentence. In some languages, other parts of speech, such as
396  // adjective and determiner, take case inflection in agreement with the noun.
397  enum Case {
398    // Case is not applicable in the analyzed language or is not predicted.
399    CASE_UNKNOWN = 0;
400
401    // Accusative
402    ACCUSATIVE = 1;
403
404    // Adverbial
405    ADVERBIAL = 2;
406
407    // Complementive
408    COMPLEMENTIVE = 3;
409
410    // Dative
411    DATIVE = 4;
412
413    // Genitive
414    GENITIVE = 5;
415
416    // Instrumental
417    INSTRUMENTAL = 6;
418
419    // Locative
420    LOCATIVE = 7;
421
422    // Nominative
423    NOMINATIVE = 8;
424
425    // Oblique
426    OBLIQUE = 9;
427
428    // Partitive
429    PARTITIVE = 10;
430
431    // Prepositional
432    PREPOSITIONAL = 11;
433
434    // Reflexive
435    REFLEXIVE_CASE = 12;
436
437    // Relative
438    RELATIVE_CASE = 13;
439
440    // Vocative
441    VOCATIVE = 14;
442  }
443
444  // Depending on the language, Form can be categorizing different forms of
445  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
446  // endings of verbs and adjectives or distinguishing between short and long
447  // forms of adjectives and participles
448  enum Form {
449    // Form is not applicable in the analyzed language or is not predicted.
450    FORM_UNKNOWN = 0;
451
452    // Adnomial
453    ADNOMIAL = 1;
454
455    // Auxiliary
456    AUXILIARY = 2;
457
458    // Complementizer
459    COMPLEMENTIZER = 3;
460
461    // Final ending
462    FINAL_ENDING = 4;
463
464    // Gerund
465    GERUND = 5;
466
467    // Realis
468    REALIS = 6;
469
470    // Irrealis
471    IRREALIS = 7;
472
473    // Short form
474    SHORT = 8;
475
476    // Long form
477    LONG = 9;
478
479    // Order form
480    ORDER = 10;
481
482    // Specific form
483    SPECIFIC = 11;
484  }
485
486  // Gender classes of nouns reflected in the behaviour of associated words.
487  enum Gender {
488    // Gender is not applicable in the analyzed language or is not predicted.
489    GENDER_UNKNOWN = 0;
490
491    // Feminine
492    FEMININE = 1;
493
494    // Masculine
495    MASCULINE = 2;
496
497    // Neuter
498    NEUTER = 3;
499  }
500
501  // The grammatical feature of verbs, used for showing modality and attitude.
502  enum Mood {
503    // Mood is not applicable in the analyzed language or is not predicted.
504    MOOD_UNKNOWN = 0;
505
506    // Conditional
507    CONDITIONAL_MOOD = 1;
508
509    // Imperative
510    IMPERATIVE = 2;
511
512    // Indicative
513    INDICATIVE = 3;
514
515    // Interrogative
516    INTERROGATIVE = 4;
517
518    // Jussive
519    JUSSIVE = 5;
520
521    // Subjunctive
522    SUBJUNCTIVE = 6;
523  }
524
525  // Count distinctions.
526  enum Number {
527    // Number is not applicable in the analyzed language or is not predicted.
528    NUMBER_UNKNOWN = 0;
529
530    // Singular
531    SINGULAR = 1;
532
533    // Plural
534    PLURAL = 2;
535
536    // Dual
537    DUAL = 3;
538  }
539
540  // The distinction between the speaker, second person, third person, etc.
541  enum Person {
542    // Person is not applicable in the analyzed language or is not predicted.
543    PERSON_UNKNOWN = 0;
544
545    // First
546    FIRST = 1;
547
548    // Second
549    SECOND = 2;
550
551    // Third
552    THIRD = 3;
553
554    // Reflexive
555    REFLEXIVE_PERSON = 4;
556  }
557
558  // This category shows if the token is part of a proper name.
559  enum Proper {
560    // Proper is not applicable in the analyzed language or is not predicted.
561    PROPER_UNKNOWN = 0;
562
563    // Proper
564    PROPER = 1;
565
566    // Not proper
567    NOT_PROPER = 2;
568  }
569
570  // Reciprocal features of a pronoun.
571  enum Reciprocity {
572    // Reciprocity is not applicable in the analyzed language or is not
573    // predicted.
574    RECIPROCITY_UNKNOWN = 0;
575
576    // Reciprocal
577    RECIPROCAL = 1;
578
579    // Non-reciprocal
580    NON_RECIPROCAL = 2;
581  }
582
583  // Time reference.
584  enum Tense {
585    // Tense is not applicable in the analyzed language or is not predicted.
586    TENSE_UNKNOWN = 0;
587
588    // Conditional
589    CONDITIONAL_TENSE = 1;
590
591    // Future
592    FUTURE = 2;
593
594    // Past
595    PAST = 3;
596
597    // Present
598    PRESENT = 4;
599
600    // Imperfect
601    IMPERFECT = 5;
602
603    // Pluperfect
604    PLUPERFECT = 6;
605  }
606
607  // The relationship between the action that a verb expresses and the
608  // participants identified by its arguments.
609  enum Voice {
610    // Voice is not applicable in the analyzed language or is not predicted.
611    VOICE_UNKNOWN = 0;
612
613    // Active
614    ACTIVE = 1;
615
616    // Causative
617    CAUSATIVE = 2;
618
619    // Passive
620    PASSIVE = 3;
621  }
622
623  // The part of speech tag.
624  Tag tag = 1;
625
626  // The grammatical aspect.
627  Aspect aspect = 2;
628
629  // The grammatical case.
630  Case case = 3;
631
632  // The grammatical form.
633  Form form = 4;
634
635  // The grammatical gender.
636  Gender gender = 5;
637
638  // The grammatical mood.
639  Mood mood = 6;
640
641  // The grammatical number.
642  Number number = 7;
643
644  // The grammatical person.
645  Person person = 8;
646
647  // The grammatical properness.
648  Proper proper = 9;
649
650  // The grammatical reciprocity.
651  Reciprocity reciprocity = 10;
652
653  // The grammatical tense.
654  Tense tense = 11;
655
656  // The grammatical voice.
657  Voice voice = 12;
658}
659
660// Represents dependency parse tree information for a token. (For more
661// information on dependency labels, see
662// http://www.aclweb.org/anthology/P13-2017
663message DependencyEdge {
664  // The parse label enum for the token.
665  enum Label {
666    // Unknown
667    UNKNOWN = 0;
668
669    // Abbreviation modifier
670    ABBREV = 1;
671
672    // Adjectival complement
673    ACOMP = 2;
674
675    // Adverbial clause modifier
676    ADVCL = 3;
677
678    // Adverbial modifier
679    ADVMOD = 4;
680
681    // Adjectival modifier of an NP
682    AMOD = 5;
683
684    // Appositional modifier of an NP
685    APPOS = 6;
686
687    // Attribute dependent of a copular verb
688    ATTR = 7;
689
690    // Auxiliary (non-main) verb
691    AUX = 8;
692
693    // Passive auxiliary
694    AUXPASS = 9;
695
696    // Coordinating conjunction
697    CC = 10;
698
699    // Clausal complement of a verb or adjective
700    CCOMP = 11;
701
702    // Conjunct
703    CONJ = 12;
704
705    // Clausal subject
706    CSUBJ = 13;
707
708    // Clausal passive subject
709    CSUBJPASS = 14;
710
711    // Dependency (unable to determine)
712    DEP = 15;
713
714    // Determiner
715    DET = 16;
716
717    // Discourse
718    DISCOURSE = 17;
719
720    // Direct object
721    DOBJ = 18;
722
723    // Expletive
724    EXPL = 19;
725
726    // Goes with (part of a word in a text not well edited)
727    GOESWITH = 20;
728
729    // Indirect object
730    IOBJ = 21;
731
732    // Marker (word introducing a subordinate clause)
733    MARK = 22;
734
735    // Multi-word expression
736    MWE = 23;
737
738    // Multi-word verbal expression
739    MWV = 24;
740
741    // Negation modifier
742    NEG = 25;
743
744    // Noun compound modifier
745    NN = 26;
746
747    // Noun phrase used as an adverbial modifier
748    NPADVMOD = 27;
749
750    // Nominal subject
751    NSUBJ = 28;
752
753    // Passive nominal subject
754    NSUBJPASS = 29;
755
756    // Numeric modifier of a noun
757    NUM = 30;
758
759    // Element of compound number
760    NUMBER = 31;
761
762    // Punctuation mark
763    P = 32;
764
765    // Parataxis relation
766    PARATAXIS = 33;
767
768    // Participial modifier
769    PARTMOD = 34;
770
771    // The complement of a preposition is a clause
772    PCOMP = 35;
773
774    // Object of a preposition
775    POBJ = 36;
776
777    // Possession modifier
778    POSS = 37;
779
780    // Postverbal negative particle
781    POSTNEG = 38;
782
783    // Predicate complement
784    PRECOMP = 39;
785
786    // Preconjunt
787    PRECONJ = 40;
788
789    // Predeterminer
790    PREDET = 41;
791
792    // Prefix
793    PREF = 42;
794
795    // Prepositional modifier
796    PREP = 43;
797
798    // The relationship between a verb and verbal morpheme
799    PRONL = 44;
800
801    // Particle
802    PRT = 45;
803
804    // Associative or possessive marker
805    PS = 46;
806
807    // Quantifier phrase modifier
808    QUANTMOD = 47;
809
810    // Relative clause modifier
811    RCMOD = 48;
812
813    // Complementizer in relative clause
814    RCMODREL = 49;
815
816    // Ellipsis without a preceding predicate
817    RDROP = 50;
818
819    // Referent
820    REF = 51;
821
822    // Remnant
823    REMNANT = 52;
824
825    // Reparandum
826    REPARANDUM = 53;
827
828    // Root
829    ROOT = 54;
830
831    // Suffix specifying a unit of number
832    SNUM = 55;
833
834    // Suffix
835    SUFF = 56;
836
837    // Temporal modifier
838    TMOD = 57;
839
840    // Topic marker
841    TOPIC = 58;
842
843    // Clause headed by an infinite form of the verb that modifies a noun
844    VMOD = 59;
845
846    // Vocative
847    VOCATIVE = 60;
848
849    // Open clausal complement
850    XCOMP = 61;
851
852    // Name suffix
853    SUFFIX = 62;
854
855    // Name title
856    TITLE = 63;
857
858    // Adverbial phrase modifier
859    ADVPHMOD = 64;
860
861    // Causative auxiliary
862    AUXCAUS = 65;
863
864    // Helper auxiliary
865    AUXVV = 66;
866
867    // Rentaishi (Prenominal modifier)
868    DTMOD = 67;
869
870    // Foreign words
871    FOREIGN = 68;
872
873    // Keyword
874    KW = 69;
875
876    // List for chains of comparable items
877    LIST = 70;
878
879    // Nominalized clause
880    NOMC = 71;
881
882    // Nominalized clausal subject
883    NOMCSUBJ = 72;
884
885    // Nominalized clausal passive
886    NOMCSUBJPASS = 73;
887
888    // Compound of numeric modifier
889    NUMC = 74;
890
891    // Copula
892    COP = 75;
893
894    // Dislocated relation (for fronted/topicalized elements)
895    DISLOCATED = 76;
896
897    // Aspect marker
898    ASP = 77;
899
900    // Genitive modifier
901    GMOD = 78;
902
903    // Genitive object
904    GOBJ = 79;
905
906    // Infinitival modifier
907    INFMOD = 80;
908
909    // Measure
910    MES = 81;
911
912    // Nominal complement of a noun
913    NCOMP = 82;
914  }
915
916  // Represents the head of this token in the dependency tree.
917  // This is the index of the token which has an arc going to this token.
918  // The index is the position of the token in the array of tokens returned
919  // by the API method. If this token is a root token, then the
920  // `head_token_index` is its own index.
921  int32 head_token_index = 1;
922
923  // The parse label for the token.
924  Label label = 2;
925}
926
927// Represents a mention for an entity in the text. Currently, proper noun
928// mentions are supported.
929message EntityMention {
930  // The supported types of mentions.
931  enum Type {
932    // Unknown
933    TYPE_UNKNOWN = 0;
934
935    // Proper name
936    PROPER = 1;
937
938    // Common noun (or noun compound)
939    COMMON = 2;
940  }
941
942  // The mention text.
943  TextSpan text = 1;
944
945  // The type of the entity mention.
946  Type type = 2;
947
948  // For calls to [AnalyzeEntitySentiment][] or if
949  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment]
950  // is set to true, this field will contain the sentiment expressed for this
951  // mention of the entity in the provided document.
952  Sentiment sentiment = 3;
953}
954
955// Represents an output piece of text.
956message TextSpan {
957  // The content of the output text.
958  string content = 1;
959
960  // The API calculates the beginning offset of the content in the original
961  // document according to the
962  // [EncodingType][google.cloud.language.v1.EncodingType] specified in the API
963  // request.
964  int32 begin_offset = 2;
965}
966
967// Represents a category returned from the text classifier.
968message ClassificationCategory {
969  // The name of the category representing the document, from the [predefined
970  // taxonomy](https://cloud.google.com/natural-language/docs/categories).
971  string name = 1;
972
973  // The classifier's confidence of the category. Number represents how certain
974  // the classifier is that this category represents the given text.
975  float confidence = 2;
976}
977
978// Model options available for classification requests.
979message ClassificationModelOptions {
980  // Options for the V1 model.
981  message V1Model {}
982
983  // Options for the V2 model.
984  message V2Model {
985    // The content categories used for classification.
986    enum ContentCategoriesVersion {
987      // If `ContentCategoriesVersion` is not specified, this option will
988      // default to `V1`.
989      CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
990
991      // Legacy content categories of our initial launch in 2017.
992      V1 = 1;
993
994      // Updated content categories in 2022.
995      V2 = 2;
996    }
997
998    // The content categories used for classification.
999    ContentCategoriesVersion content_categories_version = 1;
1000  }
1001
1002  // If this field is not set, then the `v1_model` will be used by default.
1003  oneof model_type {
1004    // Setting this field will use the V1 model and V1 content categories
1005    // version. The V1 model is a legacy model; support for this will be
1006    // discontinued in the future.
1007    V1Model v1_model = 1;
1008
1009    // Setting this field will use the V2 model with the appropriate content
1010    // categories version. The V2 model is a better performing model.
1011    V2Model v2_model = 2;
1012  }
1013}
1014
1015// The sentiment analysis request message.
1016message AnalyzeSentimentRequest {
1017  // Required. Input document.
1018  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1019
1020  // The encoding type used by the API to calculate sentence offsets.
1021  EncodingType encoding_type = 2;
1022}
1023
1024// The sentiment analysis response message.
1025message AnalyzeSentimentResponse {
1026  // The overall sentiment of the input document.
1027  Sentiment document_sentiment = 1;
1028
1029  // The language of the text, which will be the same as the language specified
1030  // in the request or, if not specified, the automatically-detected language.
1031  // See [Document.language][google.cloud.language.v1.Document.language] field
1032  // for more details.
1033  string language = 2;
1034
1035  // The sentiment for all the sentences in the document.
1036  repeated Sentence sentences = 3;
1037}
1038
1039// The entity-level sentiment analysis request message.
1040message AnalyzeEntitySentimentRequest {
1041  // Required. Input document.
1042  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1043
1044  // The encoding type used by the API to calculate offsets.
1045  EncodingType encoding_type = 2;
1046}
1047
1048// The entity-level sentiment analysis response message.
1049message AnalyzeEntitySentimentResponse {
1050  // The recognized entities in the input document with associated sentiments.
1051  repeated Entity entities = 1;
1052
1053  // The language of the text, which will be the same as the language specified
1054  // in the request or, if not specified, the automatically-detected language.
1055  // See [Document.language][google.cloud.language.v1.Document.language] field
1056  // for more details.
1057  string language = 2;
1058}
1059
1060// The entity analysis request message.
1061message AnalyzeEntitiesRequest {
1062  // Required. Input document.
1063  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1064
1065  // The encoding type used by the API to calculate offsets.
1066  EncodingType encoding_type = 2;
1067}
1068
1069// The entity analysis response message.
1070message AnalyzeEntitiesResponse {
1071  // The recognized entities in the input document.
1072  repeated Entity entities = 1;
1073
1074  // The language of the text, which will be the same as the language specified
1075  // in the request or, if not specified, the automatically-detected language.
1076  // See [Document.language][google.cloud.language.v1.Document.language] field
1077  // for more details.
1078  string language = 2;
1079}
1080
1081// The syntax analysis request message.
1082message AnalyzeSyntaxRequest {
1083  // Required. Input document.
1084  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1085
1086  // The encoding type used by the API to calculate offsets.
1087  EncodingType encoding_type = 2;
1088}
1089
1090// The syntax analysis response message.
1091message AnalyzeSyntaxResponse {
1092  // Sentences in the input document.
1093  repeated Sentence sentences = 1;
1094
1095  // Tokens, along with their syntactic information, in the input document.
1096  repeated Token tokens = 2;
1097
1098  // The language of the text, which will be the same as the language specified
1099  // in the request or, if not specified, the automatically-detected language.
1100  // See [Document.language][google.cloud.language.v1.Document.language] field
1101  // for more details.
1102  string language = 3;
1103}
1104
1105// The document classification request message.
1106message ClassifyTextRequest {
1107  // Required. Input document.
1108  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1109
1110  // Model options to use for classification. Defaults to v1 options if not
1111  // specified.
1112  ClassificationModelOptions classification_model_options = 3;
1113}
1114
1115// The document classification response message.
1116message ClassifyTextResponse {
1117  // Categories representing the input document.
1118  repeated ClassificationCategory categories = 1;
1119}
1120
1121// The request message for the text annotation API, which can perform multiple
1122// analysis types (sentiment, entities, and syntax) in one call.
1123message AnnotateTextRequest {
1124  // All available features for sentiment, syntax, and semantic analysis.
1125  // Setting each one to true will enable that specific analysis for the input.
1126  message Features {
1127    // Extract syntax information.
1128    bool extract_syntax = 1;
1129
1130    // Extract entities.
1131    bool extract_entities = 2;
1132
1133    // Extract document-level sentiment.
1134    bool extract_document_sentiment = 3;
1135
1136    // Extract entities and their associated sentiment.
1137    bool extract_entity_sentiment = 4;
1138
1139    // Classify the full document into categories.
1140    bool classify_text = 6;
1141
1142    // The model options to use for classification. Defaults to v1 options
1143    // if not specified. Only used if `classify_text` is set to true.
1144    ClassificationModelOptions classification_model_options = 10;
1145  }
1146
1147  // Required. Input document.
1148  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1149
1150  // Required. The enabled features.
1151  Features features = 2 [(google.api.field_behavior) = REQUIRED];
1152
1153  // The encoding type used by the API to calculate offsets.
1154  EncodingType encoding_type = 3;
1155}
1156
1157// The text annotations response message.
1158message AnnotateTextResponse {
1159  // Sentences in the input document. Populated if the user enables
1160  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
1161  repeated Sentence sentences = 1;
1162
1163  // Tokens, along with their syntactic information, in the input document.
1164  // Populated if the user enables
1165  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
1166  repeated Token tokens = 2;
1167
1168  // Entities, along with their semantic information, in the input document.
1169  // Populated if the user enables
1170  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities].
1171  repeated Entity entities = 3;
1172
1173  // The overall sentiment for the document. Populated if the user enables
1174  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment].
1175  Sentiment document_sentiment = 4;
1176
1177  // The language of the text, which will be the same as the language specified
1178  // in the request or, if not specified, the automatically-detected language.
1179  // See [Document.language][google.cloud.language.v1.Document.language] field
1180  // for more details.
1181  string language = 5;
1182
1183  // Categories identified in the input document.
1184  repeated ClassificationCategory categories = 6;
1185}
1186