xref: /aosp_15_r20/external/googleapis/google/cloud/language/v1beta2/language_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.language.v1beta2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22
23option go_package = "cloud.google.com/go/language/apiv1beta2/languagepb;languagepb";
24option java_multiple_files = true;
25option java_outer_classname = "LanguageServiceProto";
26option java_package = "com.google.cloud.language.v1beta2";
27
28// Provides text analysis operations such as sentiment analysis and entity
29// recognition.
30service LanguageService {
31  option (google.api.default_host) = "language.googleapis.com";
32  option (google.api.oauth_scopes) =
33      "https://www.googleapis.com/auth/cloud-language,"
34      "https://www.googleapis.com/auth/cloud-platform";
35
36  // Analyzes the sentiment of the provided text.
37  rpc AnalyzeSentiment(AnalyzeSentimentRequest)
38      returns (AnalyzeSentimentResponse) {
39    option (google.api.http) = {
40      post: "/v1beta2/documents:analyzeSentiment"
41      body: "*"
42    };
43    option (google.api.method_signature) = "document,encoding_type";
44    option (google.api.method_signature) = "document";
45  }
46
47  // Finds named entities (currently proper names and common nouns) in the text
48  // along with entity types, salience, mentions for each entity, and
49  // other properties.
50  rpc AnalyzeEntities(AnalyzeEntitiesRequest)
51      returns (AnalyzeEntitiesResponse) {
52    option (google.api.http) = {
53      post: "/v1beta2/documents:analyzeEntities"
54      body: "*"
55    };
56    option (google.api.method_signature) = "document,encoding_type";
57    option (google.api.method_signature) = "document";
58  }
59
60  // Finds entities, similar to
61  // [AnalyzeEntities][google.cloud.language.v1beta2.LanguageService.AnalyzeEntities]
62  // in the text and analyzes sentiment associated with each entity and its
63  // mentions.
64  rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest)
65      returns (AnalyzeEntitySentimentResponse) {
66    option (google.api.http) = {
67      post: "/v1beta2/documents:analyzeEntitySentiment"
68      body: "*"
69    };
70    option (google.api.method_signature) = "document,encoding_type";
71    option (google.api.method_signature) = "document";
72  }
73
74  // Analyzes the syntax of the text and provides sentence boundaries and
75  // tokenization along with part of speech tags, dependency trees, and other
76  // properties.
77  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
78    option (google.api.http) = {
79      post: "/v1beta2/documents:analyzeSyntax"
80      body: "*"
81    };
82    option (google.api.method_signature) = "document,encoding_type";
83    option (google.api.method_signature) = "document";
84  }
85
86  // Classifies a document into categories.
87  rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
88    option (google.api.http) = {
89      post: "/v1beta2/documents:classifyText"
90      body: "*"
91    };
92    option (google.api.method_signature) = "document";
93  }
94
95  // Moderates a document for harmful and sensitive categories.
96  rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) {
97    option (google.api.http) = {
98      post: "/v1beta2/documents:moderateText"
99      body: "*"
100    };
101    option (google.api.method_signature) = "document";
102  }
103
104  // A convenience method that provides all syntax, sentiment, entity, and
105  // classification features in one call.
106  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
107    option (google.api.http) = {
108      post: "/v1beta2/documents:annotateText"
109      body: "*"
110    };
111    option (google.api.method_signature) = "document,features,encoding_type";
112    option (google.api.method_signature) = "document,features";
113  }
114}
115
116// Represents the input to API methods.
117message Document {
118  // The document types enum.
119  enum Type {
120    // The content type is not specified.
121    TYPE_UNSPECIFIED = 0;
122
123    // Plain text
124    PLAIN_TEXT = 1;
125
126    // HTML
127    HTML = 2;
128  }
129
130  // Ways of handling boilerplate detected in the document
131  enum BoilerplateHandling {
132    // The boilerplate handling is not specified.
133    BOILERPLATE_HANDLING_UNSPECIFIED = 0;
134
135    // Do not analyze detected boilerplate. Reference web URI is required for
136    // detecting boilerplate.
137    SKIP_BOILERPLATE = 1;
138
139    // Treat boilerplate the same as content.
140    KEEP_BOILERPLATE = 2;
141  }
142
143  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
144  // returns an `INVALID_ARGUMENT` error.
145  Type type = 1;
146
147  // The source of the document: a string containing the content or a
148  // Google Cloud Storage URI.
149  oneof source {
150    // The content of the input in string format.
151    // Cloud audit logging exempt since it is based on user data.
152    string content = 2;
153
154    // The Google Cloud Storage URI where the file content is located.
155    // This URI must be of the form: gs://bucket_name/object_name. For more
156    // details, see https://cloud.google.com/storage/docs/reference-uris.
157    // NOTE: Cloud Storage object versioning is not supported.
158    string gcs_content_uri = 3;
159  }
160
161  // The language of the document (if not specified, the language is
162  // automatically detected). Both ISO and BCP-47 language codes are
163  // accepted.<br>
164  // [Language
165  // Support](https://cloud.google.com/natural-language/docs/languages) lists
166  // currently supported languages for each API method. If the language (either
167  // specified by the caller or automatically detected) is not supported by the
168  // called API method, an `INVALID_ARGUMENT` error is returned.
169  string language = 4;
170
171  // The web URI where the document comes from. This URI is not used for
172  // fetching the content, but as a hint for analyzing the document.
173  string reference_web_uri = 5;
174
175  // Indicates how detected boilerplate(e.g. advertisements, copyright
176  // declarations, banners) should be handled for this document. If not
177  // specified, boilerplate will be treated the same as content.
178  BoilerplateHandling boilerplate_handling = 6;
179}
180
181// Represents a sentence in the input document.
182message Sentence {
183  // The sentence text.
184  TextSpan text = 1;
185
186  // For calls to [AnalyzeSentiment][] or if
187  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment]
188  // is set to true, this field will contain the sentiment for the sentence.
189  Sentiment sentiment = 2;
190}
191
192// Represents the text encoding that the caller uses to process the output.
193// Providing an `EncodingType` is recommended because the API provides the
194// beginning offsets for various outputs, such as tokens and mentions, and
195// languages that natively use different text encodings may access offsets
196// differently.
197enum EncodingType {
198  // If `EncodingType` is not specified, encoding-dependent information (such as
199  // `begin_offset`) will be set at `-1`.
200  NONE = 0;
201
202  // Encoding-dependent information (such as `begin_offset`) is calculated based
203  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
204  // that use this encoding natively.
205  UTF8 = 1;
206
207  // Encoding-dependent information (such as `begin_offset`) is calculated based
208  // on the UTF-16 encoding of the input. Java and JavaScript are examples of
209  // languages that use this encoding natively.
210  UTF16 = 2;
211
212  // Encoding-dependent information (such as `begin_offset`) is calculated based
213  // on the UTF-32 encoding of the input. Python is an example of a language
214  // that uses this encoding natively.
215  UTF32 = 3;
216}
217
218// Represents a phrase in the text that is a known entity, such as
219// a person, an organization, or location. The API associates information, such
220// as salience and mentions, with entities.
221message Entity {
222  // The type of the entity. For most entity types, the associated metadata is a
223  // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
224  // below lists the associated fields for entities that have different
225  // metadata.
226  enum Type {
227    // Unknown
228    UNKNOWN = 0;
229
230    // Person
231    PERSON = 1;
232
233    // Location
234    LOCATION = 2;
235
236    // Organization
237    ORGANIZATION = 3;
238
239    // Event
240    EVENT = 4;
241
242    // Artwork
243    WORK_OF_ART = 5;
244
245    // Consumer product
246    CONSUMER_GOOD = 6;
247
248    // Other types of entities
249    OTHER = 7;
250
251    // Phone number
252    //
253    // The metadata lists the phone number, formatted according to local
254    // convention, plus whichever additional elements appear in the text:
255    //
256    // * `number` - the actual number, broken down into sections as per local
257    // convention
258    // * `national_prefix` - country code, if detected
259    // * `area_code` - region or area code, if detected
260    // * `extension` - phone extension (to be dialed after connection), if
261    // detected
262    PHONE_NUMBER = 9;
263
264    // Address
265    //
266    // The metadata identifies the street number and locality plus whichever
267    // additional elements appear in the text:
268    //
269    // * `street_number` - street number
270    // * `locality` - city or town
271    // * `street_name` - street/route name, if detected
272    // * `postal_code` - postal code, if detected
273    // * `country` - country, if detected<
274    // * `broad_region` - administrative area, such as the state, if detected
275    // * `narrow_region` - smaller administrative area, such as county, if
276    // detected
277    // * `sublocality` - used in Asian addresses to demark a district within a
278    // city, if detected
279    ADDRESS = 10;
280
281    // Date
282    //
283    // The metadata identifies the components of the date:
284    //
285    // * `year` - four digit year, if detected
286    // * `month` - two digit month number, if detected
287    // * `day` - two digit day number, if detected
288    DATE = 11;
289
290    // Number
291    //
292    // The metadata is the number itself.
293    NUMBER = 12;
294
295    // Price
296    //
297    // The metadata identifies the `value` and `currency`.
298    PRICE = 13;
299  }
300
301  // The representative name for the entity.
302  string name = 1;
303
304  // The entity type.
305  Type type = 2;
306
307  // Metadata associated with the entity.
308  //
309  // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
310  // and Knowledge Graph MID (`mid`), if they are available. For the metadata
311  // associated with other entity types, see the Type table below.
312  map<string, string> metadata = 3;
313
314  // The salience score associated with the entity in the [0, 1.0] range.
315  //
316  // The salience score for an entity provides information about the
317  // importance or centrality of that entity to the entire document text.
318  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
319  // salient.
320  float salience = 4;
321
322  // The mentions of this entity in the input document. The API currently
323  // supports proper noun mentions.
324  repeated EntityMention mentions = 5;
325
326  // For calls to [AnalyzeEntitySentiment][] or if
327  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment]
328  // is set to true, this field will contain the aggregate sentiment expressed
329  // for this entity in the provided document.
330  Sentiment sentiment = 6;
331}
332
333// Represents the smallest syntactic building block of the text.
334message Token {
335  // The token text.
336  TextSpan text = 1;
337
338  // Parts of speech tag for this token.
339  PartOfSpeech part_of_speech = 2;
340
341  // Dependency tree parse for this token.
342  DependencyEdge dependency_edge = 3;
343
344  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
345  string lemma = 4;
346}
347
348// Represents the feeling associated with the entire text or entities in
349// the text.
350// Next ID: 6
351message Sentiment {
352  // A non-negative number in the [0, +inf) range, which represents
353  // the absolute magnitude of sentiment regardless of score (positive or
354  // negative).
355  float magnitude = 2;
356
357  // Sentiment score between -1.0 (negative sentiment) and 1.0
358  // (positive sentiment).
359  float score = 3;
360}
361
362// Represents part of speech information for a token.
363message PartOfSpeech {
364  // The part of speech tags enum.
365  enum Tag {
366    // Unknown
367    UNKNOWN = 0;
368
369    // Adjective
370    ADJ = 1;
371
372    // Adposition (preposition and postposition)
373    ADP = 2;
374
375    // Adverb
376    ADV = 3;
377
378    // Conjunction
379    CONJ = 4;
380
381    // Determiner
382    DET = 5;
383
384    // Noun (common and proper)
385    NOUN = 6;
386
387    // Cardinal number
388    NUM = 7;
389
390    // Pronoun
391    PRON = 8;
392
393    // Particle or other function word
394    PRT = 9;
395
396    // Punctuation
397    PUNCT = 10;
398
399    // Verb (all tenses and modes)
400    VERB = 11;
401
402    // Other: foreign words, typos, abbreviations
403    X = 12;
404
405    // Affix
406    AFFIX = 13;
407  }
408
409  // The characteristic of a verb that expresses time flow during an event.
410  enum Aspect {
411    // Aspect is not applicable in the analyzed language or is not predicted.
412    ASPECT_UNKNOWN = 0;
413
414    // Perfective
415    PERFECTIVE = 1;
416
417    // Imperfective
418    IMPERFECTIVE = 2;
419
420    // Progressive
421    PROGRESSIVE = 3;
422  }
423
424  // The grammatical function performed by a noun or pronoun in a phrase,
425  // clause, or sentence. In some languages, other parts of speech, such as
426  // adjective and determiner, take case inflection in agreement with the noun.
427  enum Case {
428    // Case is not applicable in the analyzed language or is not predicted.
429    CASE_UNKNOWN = 0;
430
431    // Accusative
432    ACCUSATIVE = 1;
433
434    // Adverbial
435    ADVERBIAL = 2;
436
437    // Complementive
438    COMPLEMENTIVE = 3;
439
440    // Dative
441    DATIVE = 4;
442
443    // Genitive
444    GENITIVE = 5;
445
446    // Instrumental
447    INSTRUMENTAL = 6;
448
449    // Locative
450    LOCATIVE = 7;
451
452    // Nominative
453    NOMINATIVE = 8;
454
455    // Oblique
456    OBLIQUE = 9;
457
458    // Partitive
459    PARTITIVE = 10;
460
461    // Prepositional
462    PREPOSITIONAL = 11;
463
464    // Reflexive
465    REFLEXIVE_CASE = 12;
466
467    // Relative
468    RELATIVE_CASE = 13;
469
470    // Vocative
471    VOCATIVE = 14;
472  }
473
474  // Depending on the language, Form can be categorizing different forms of
475  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
476  // endings of verbs and adjectives or distinguishing between short and long
477  // forms of adjectives and participles
478  enum Form {
479    // Form is not applicable in the analyzed language or is not predicted.
480    FORM_UNKNOWN = 0;
481
482    // Adnomial
483    ADNOMIAL = 1;
484
485    // Auxiliary
486    AUXILIARY = 2;
487
488    // Complementizer
489    COMPLEMENTIZER = 3;
490
491    // Final ending
492    FINAL_ENDING = 4;
493
494    // Gerund
495    GERUND = 5;
496
497    // Realis
498    REALIS = 6;
499
500    // Irrealis
501    IRREALIS = 7;
502
503    // Short form
504    SHORT = 8;
505
506    // Long form
507    LONG = 9;
508
509    // Order form
510    ORDER = 10;
511
512    // Specific form
513    SPECIFIC = 11;
514  }
515
516  // Gender classes of nouns reflected in the behaviour of associated words.
517  enum Gender {
518    // Gender is not applicable in the analyzed language or is not predicted.
519    GENDER_UNKNOWN = 0;
520
521    // Feminine
522    FEMININE = 1;
523
524    // Masculine
525    MASCULINE = 2;
526
527    // Neuter
528    NEUTER = 3;
529  }
530
531  // The grammatical feature of verbs, used for showing modality and attitude.
532  enum Mood {
533    // Mood is not applicable in the analyzed language or is not predicted.
534    MOOD_UNKNOWN = 0;
535
536    // Conditional
537    CONDITIONAL_MOOD = 1;
538
539    // Imperative
540    IMPERATIVE = 2;
541
542    // Indicative
543    INDICATIVE = 3;
544
545    // Interrogative
546    INTERROGATIVE = 4;
547
548    // Jussive
549    JUSSIVE = 5;
550
551    // Subjunctive
552    SUBJUNCTIVE = 6;
553  }
554
555  // Count distinctions.
556  enum Number {
557    // Number is not applicable in the analyzed language or is not predicted.
558    NUMBER_UNKNOWN = 0;
559
560    // Singular
561    SINGULAR = 1;
562
563    // Plural
564    PLURAL = 2;
565
566    // Dual
567    DUAL = 3;
568  }
569
570  // The distinction between the speaker, second person, third person, etc.
571  enum Person {
572    // Person is not applicable in the analyzed language or is not predicted.
573    PERSON_UNKNOWN = 0;
574
575    // First
576    FIRST = 1;
577
578    // Second
579    SECOND = 2;
580
581    // Third
582    THIRD = 3;
583
584    // Reflexive
585    REFLEXIVE_PERSON = 4;
586  }
587
588  // This category shows if the token is part of a proper name.
589  enum Proper {
590    // Proper is not applicable in the analyzed language or is not predicted.
591    PROPER_UNKNOWN = 0;
592
593    // Proper
594    PROPER = 1;
595
596    // Not proper
597    NOT_PROPER = 2;
598  }
599
600  // Reciprocal features of a pronoun.
601  enum Reciprocity {
602    // Reciprocity is not applicable in the analyzed language or is not
603    // predicted.
604    RECIPROCITY_UNKNOWN = 0;
605
606    // Reciprocal
607    RECIPROCAL = 1;
608
609    // Non-reciprocal
610    NON_RECIPROCAL = 2;
611  }
612
613  // Time reference.
614  enum Tense {
615    // Tense is not applicable in the analyzed language or is not predicted.
616    TENSE_UNKNOWN = 0;
617
618    // Conditional
619    CONDITIONAL_TENSE = 1;
620
621    // Future
622    FUTURE = 2;
623
624    // Past
625    PAST = 3;
626
627    // Present
628    PRESENT = 4;
629
630    // Imperfect
631    IMPERFECT = 5;
632
633    // Pluperfect
634    PLUPERFECT = 6;
635  }
636
637  // The relationship between the action that a verb expresses and the
638  // participants identified by its arguments.
639  enum Voice {
640    // Voice is not applicable in the analyzed language or is not predicted.
641    VOICE_UNKNOWN = 0;
642
643    // Active
644    ACTIVE = 1;
645
646    // Causative
647    CAUSATIVE = 2;
648
649    // Passive
650    PASSIVE = 3;
651  }
652
653  // The part of speech tag.
654  Tag tag = 1;
655
656  // The grammatical aspect.
657  Aspect aspect = 2;
658
659  // The grammatical case.
660  Case case = 3;
661
662  // The grammatical form.
663  Form form = 4;
664
665  // The grammatical gender.
666  Gender gender = 5;
667
668  // The grammatical mood.
669  Mood mood = 6;
670
671  // The grammatical number.
672  Number number = 7;
673
674  // The grammatical person.
675  Person person = 8;
676
677  // The grammatical properness.
678  Proper proper = 9;
679
680  // The grammatical reciprocity.
681  Reciprocity reciprocity = 10;
682
683  // The grammatical tense.
684  Tense tense = 11;
685
686  // The grammatical voice.
687  Voice voice = 12;
688}
689
690// Represents dependency parse tree information for a token.
691message DependencyEdge {
692  // The parse label enum for the token.
693  enum Label {
694    // Unknown
695    UNKNOWN = 0;
696
697    // Abbreviation modifier
698    ABBREV = 1;
699
700    // Adjectival complement
701    ACOMP = 2;
702
703    // Adverbial clause modifier
704    ADVCL = 3;
705
706    // Adverbial modifier
707    ADVMOD = 4;
708
709    // Adjectival modifier of an NP
710    AMOD = 5;
711
712    // Appositional modifier of an NP
713    APPOS = 6;
714
715    // Attribute dependent of a copular verb
716    ATTR = 7;
717
718    // Auxiliary (non-main) verb
719    AUX = 8;
720
721    // Passive auxiliary
722    AUXPASS = 9;
723
724    // Coordinating conjunction
725    CC = 10;
726
727    // Clausal complement of a verb or adjective
728    CCOMP = 11;
729
730    // Conjunct
731    CONJ = 12;
732
733    // Clausal subject
734    CSUBJ = 13;
735
736    // Clausal passive subject
737    CSUBJPASS = 14;
738
739    // Dependency (unable to determine)
740    DEP = 15;
741
742    // Determiner
743    DET = 16;
744
745    // Discourse
746    DISCOURSE = 17;
747
748    // Direct object
749    DOBJ = 18;
750
751    // Expletive
752    EXPL = 19;
753
754    // Goes with (part of a word in a text not well edited)
755    GOESWITH = 20;
756
757    // Indirect object
758    IOBJ = 21;
759
760    // Marker (word introducing a subordinate clause)
761    MARK = 22;
762
763    // Multi-word expression
764    MWE = 23;
765
766    // Multi-word verbal expression
767    MWV = 24;
768
769    // Negation modifier
770    NEG = 25;
771
772    // Noun compound modifier
773    NN = 26;
774
775    // Noun phrase used as an adverbial modifier
776    NPADVMOD = 27;
777
778    // Nominal subject
779    NSUBJ = 28;
780
781    // Passive nominal subject
782    NSUBJPASS = 29;
783
784    // Numeric modifier of a noun
785    NUM = 30;
786
787    // Element of compound number
788    NUMBER = 31;
789
790    // Punctuation mark
791    P = 32;
792
793    // Parataxis relation
794    PARATAXIS = 33;
795
796    // Participial modifier
797    PARTMOD = 34;
798
799    // The complement of a preposition is a clause
800    PCOMP = 35;
801
802    // Object of a preposition
803    POBJ = 36;
804
805    // Possession modifier
806    POSS = 37;
807
808    // Postverbal negative particle
809    POSTNEG = 38;
810
811    // Predicate complement
812    PRECOMP = 39;
813
814    // Preconjunt
815    PRECONJ = 40;
816
817    // Predeterminer
818    PREDET = 41;
819
820    // Prefix
821    PREF = 42;
822
823    // Prepositional modifier
824    PREP = 43;
825
826    // The relationship between a verb and verbal morpheme
827    PRONL = 44;
828
829    // Particle
830    PRT = 45;
831
832    // Associative or possessive marker
833    PS = 46;
834
835    // Quantifier phrase modifier
836    QUANTMOD = 47;
837
838    // Relative clause modifier
839    RCMOD = 48;
840
841    // Complementizer in relative clause
842    RCMODREL = 49;
843
844    // Ellipsis without a preceding predicate
845    RDROP = 50;
846
847    // Referent
848    REF = 51;
849
850    // Remnant
851    REMNANT = 52;
852
853    // Reparandum
854    REPARANDUM = 53;
855
856    // Root
857    ROOT = 54;
858
859    // Suffix specifying a unit of number
860    SNUM = 55;
861
862    // Suffix
863    SUFF = 56;
864
865    // Temporal modifier
866    TMOD = 57;
867
868    // Topic marker
869    TOPIC = 58;
870
871    // Clause headed by an infinite form of the verb that modifies a noun
872    VMOD = 59;
873
874    // Vocative
875    VOCATIVE = 60;
876
877    // Open clausal complement
878    XCOMP = 61;
879
880    // Name suffix
881    SUFFIX = 62;
882
883    // Name title
884    TITLE = 63;
885
886    // Adverbial phrase modifier
887    ADVPHMOD = 64;
888
889    // Causative auxiliary
890    AUXCAUS = 65;
891
892    // Helper auxiliary
893    AUXVV = 66;
894
895    // Rentaishi (Prenominal modifier)
896    DTMOD = 67;
897
898    // Foreign words
899    FOREIGN = 68;
900
901    // Keyword
902    KW = 69;
903
904    // List for chains of comparable items
905    LIST = 70;
906
907    // Nominalized clause
908    NOMC = 71;
909
910    // Nominalized clausal subject
911    NOMCSUBJ = 72;
912
913    // Nominalized clausal passive
914    NOMCSUBJPASS = 73;
915
916    // Compound of numeric modifier
917    NUMC = 74;
918
919    // Copula
920    COP = 75;
921
922    // Dislocated relation (for fronted/topicalized elements)
923    DISLOCATED = 76;
924
925    // Aspect marker
926    ASP = 77;
927
928    // Genitive modifier
929    GMOD = 78;
930
931    // Genitive object
932    GOBJ = 79;
933
934    // Infinitival modifier
935    INFMOD = 80;
936
937    // Measure
938    MES = 81;
939
940    // Nominal complement of a noun
941    NCOMP = 82;
942  }
943
944  // Represents the head of this token in the dependency tree.
945  // This is the index of the token which has an arc going to this token.
946  // The index is the position of the token in the array of tokens returned
947  // by the API method. If this token is a root token, then the
948  // `head_token_index` is its own index.
949  int32 head_token_index = 1;
950
951  // The parse label for the token.
952  Label label = 2;
953}
954
955// Represents a mention for an entity in the text. Currently, proper noun
956// mentions are supported.
957message EntityMention {
958  // The supported types of mentions.
959  enum Type {
960    // Unknown
961    TYPE_UNKNOWN = 0;
962
963    // Proper name
964    PROPER = 1;
965
966    // Common noun (or noun compound)
967    COMMON = 2;
968  }
969
970  // The mention text.
971  TextSpan text = 1;
972
973  // The type of the entity mention.
974  Type type = 2;
975
976  // For calls to [AnalyzeEntitySentiment][] or if
977  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment]
978  // is set to true, this field will contain the sentiment expressed for this
979  // mention of the entity in the provided document.
980  Sentiment sentiment = 3;
981}
982
983// Represents an output piece of text.
984message TextSpan {
985  // The content of the output text.
986  string content = 1;
987
988  // The API calculates the beginning offset of the content in the original
989  // document according to the
990  // [EncodingType][google.cloud.language.v1beta2.EncodingType] specified in the
991  // API request.
992  int32 begin_offset = 2;
993}
994
995// Represents a category returned from the text classifier.
996message ClassificationCategory {
997  // The name of the category representing the document.
998  string name = 1;
999
1000  // The classifier's confidence of the category. Number represents how certain
1001  // the classifier is that this category represents the given text.
1002  float confidence = 2;
1003}
1004
1005// Model options available for classification requests.
1006message ClassificationModelOptions {
1007  // Options for the V1 model.
1008  message V1Model {}
1009
1010  // Options for the V2 model.
1011  message V2Model {
1012    // The content categories used for classification.
1013    enum ContentCategoriesVersion {
1014      // If `ContentCategoriesVersion` is not specified, this option will
1015      // default to `V1`.
1016      CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
1017
1018      // Legacy content categories of our initial launch in 2017.
1019      V1 = 1;
1020
1021      // Updated content categories in 2022.
1022      V2 = 2;
1023    }
1024
1025    // The content categories used for classification.
1026    ContentCategoriesVersion content_categories_version = 1;
1027  }
1028
1029  // If this field is not set, then the `v1_model` will be used by default.
1030  oneof model_type {
1031    // Setting this field will use the V1 model and V1 content categories
1032    // version. The V1 model is a legacy model; support for this will be
1033    // discontinued in the future.
1034    V1Model v1_model = 1;
1035
1036    // Setting this field will use the V2 model with the appropriate content
1037    // categories version. The V2 model is a better performing model.
1038    V2Model v2_model = 2;
1039  }
1040}
1041
1042// The sentiment analysis request message.
1043message AnalyzeSentimentRequest {
1044  // Required. Input document.
1045  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1046
1047  // The encoding type used by the API to calculate sentence offsets for the
1048  // sentence sentiment.
1049  EncodingType encoding_type = 2;
1050}
1051
1052// The sentiment analysis response message.
1053message AnalyzeSentimentResponse {
1054  // The overall sentiment of the input document.
1055  Sentiment document_sentiment = 1;
1056
1057  // The language of the text, which will be the same as the language specified
1058  // in the request or, if not specified, the automatically-detected language.
1059  // See [Document.language][google.cloud.language.v1beta2.Document.language]
1060  // field for more details.
1061  string language = 2;
1062
1063  // The sentiment for all the sentences in the document.
1064  repeated Sentence sentences = 3;
1065}
1066
1067// The entity-level sentiment analysis request message.
1068message AnalyzeEntitySentimentRequest {
1069  // Required. Input document.
1070  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1071
1072  // The encoding type used by the API to calculate offsets.
1073  EncodingType encoding_type = 2;
1074}
1075
1076// The entity-level sentiment analysis response message.
1077message AnalyzeEntitySentimentResponse {
1078  // The recognized entities in the input document with associated sentiments.
1079  repeated Entity entities = 1;
1080
1081  // The language of the text, which will be the same as the language specified
1082  // in the request or, if not specified, the automatically-detected language.
1083  // See [Document.language][google.cloud.language.v1beta2.Document.language]
1084  // field for more details.
1085  string language = 2;
1086}
1087
1088// The entity analysis request message.
1089message AnalyzeEntitiesRequest {
1090  // Required. Input document.
1091  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1092
1093  // The encoding type used by the API to calculate offsets.
1094  EncodingType encoding_type = 2;
1095}
1096
1097// The entity analysis response message.
1098message AnalyzeEntitiesResponse {
1099  // The recognized entities in the input document.
1100  repeated Entity entities = 1;
1101
1102  // The language of the text, which will be the same as the language specified
1103  // in the request or, if not specified, the automatically-detected language.
1104  // See [Document.language][google.cloud.language.v1beta2.Document.language]
1105  // field for more details.
1106  string language = 2;
1107}
1108
1109// The syntax analysis request message.
1110message AnalyzeSyntaxRequest {
1111  // Required. Input document.
1112  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1113
1114  // The encoding type used by the API to calculate offsets.
1115  EncodingType encoding_type = 2;
1116}
1117
1118// The syntax analysis response message.
1119message AnalyzeSyntaxResponse {
1120  // Sentences in the input document.
1121  repeated Sentence sentences = 1;
1122
1123  // Tokens, along with their syntactic information, in the input document.
1124  repeated Token tokens = 2;
1125
1126  // The language of the text, which will be the same as the language specified
1127  // in the request or, if not specified, the automatically-detected language.
1128  // See [Document.language][google.cloud.language.v1beta2.Document.language]
1129  // field for more details.
1130  string language = 3;
1131}
1132
1133// The document classification request message.
1134message ClassifyTextRequest {
1135  // Required. Input document.
1136  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1137
1138  // Model options to use for classification. Defaults to v1 options if not
1139  // specified.
1140  ClassificationModelOptions classification_model_options = 3;
1141}
1142
1143// The document classification response message.
1144message ClassifyTextResponse {
1145  // Categories representing the input document.
1146  repeated ClassificationCategory categories = 1;
1147}
1148
1149// The document moderation request message.
1150message ModerateTextRequest {
1151  // Required. Input document.
1152  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1153}
1154
1155// The document moderation response message.
1156message ModerateTextResponse {
1157  // Harmful and sensitive categories representing the input document.
1158  repeated ClassificationCategory moderation_categories = 1;
1159}
1160
1161// The request message for the text annotation API, which can perform multiple
1162// analysis types (sentiment, entities, and syntax) in one call.
1163message AnnotateTextRequest {
1164  // All available features for sentiment, syntax, and semantic analysis.
1165  // Setting each one to true will enable that specific analysis for the input.
1166  // Next ID: 12
1167  message Features {
1168    // Extract syntax information.
1169    bool extract_syntax = 1;
1170
1171    // Extract entities.
1172    bool extract_entities = 2;
1173
1174    // Extract document-level sentiment.
1175    bool extract_document_sentiment = 3;
1176
1177    // Extract entities and their associated sentiment.
1178    bool extract_entity_sentiment = 4;
1179
1180    // Classify the full document into categories. If this is true,
1181    // the API will use the default model which classifies into a
1182    // [predefined
1183    // taxonomy](https://cloud.google.com/natural-language/docs/categories).
1184    bool classify_text = 6;
1185
1186    // Moderate the document for harmful and sensitive categories.
1187    bool moderate_text = 11;
1188
1189    // The model options to use for classification. Defaults to v1 options
1190    // if not specified. Only used if `classify_text` is set to true.
1191    ClassificationModelOptions classification_model_options = 10;
1192  }
1193
1194  // Required. Input document.
1195  Document document = 1 [(google.api.field_behavior) = REQUIRED];
1196
1197  // Required. The enabled features.
1198  Features features = 2 [(google.api.field_behavior) = REQUIRED];
1199
1200  // The encoding type used by the API to calculate offsets.
1201  EncodingType encoding_type = 3;
1202}
1203
1204// The text annotations response message.
1205message AnnotateTextResponse {
1206  // Sentences in the input document. Populated if the user enables
1207  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
1208  repeated Sentence sentences = 1;
1209
1210  // Tokens, along with their syntactic information, in the input document.
1211  // Populated if the user enables
1212  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
1213  repeated Token tokens = 2;
1214
1215  // Entities, along with their semantic information, in the input document.
1216  // Populated if the user enables
1217  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entities].
1218  repeated Entity entities = 3;
1219
1220  // The overall sentiment for the document. Populated if the user enables
1221  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment].
1222  Sentiment document_sentiment = 4;
1223
1224  // The language of the text, which will be the same as the language specified
1225  // in the request or, if not specified, the automatically-detected language.
1226  // See [Document.language][google.cloud.language.v1beta2.Document.language]
1227  // field for more details.
1228  string language = 5;
1229
1230  // Categories identified in the input document.
1231  repeated ClassificationCategory categories = 6;
1232
1233  // Harmful and sensitive categories identified in the input document.
1234  repeated ClassificationCategory moderation_categories = 8;
1235}
1236