xref: /aosp_15_r20/external/googleapis/google/cloud/language/v1beta1/language_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.language.v1beta1;
18
19import "google/api/annotations.proto";
20
21option go_package = "cloud.google.com/go/language/apiv1beta1/languagepb;languagepb";
22option java_multiple_files = true;
23option java_outer_classname = "LanguageServiceProto";
24option java_package = "com.google.cloud.language.v1beta1";
25
26// Provides text analysis operations such as sentiment analysis and entity
27// recognition.
28service LanguageService {
29  // Analyzes the sentiment of the provided text.
30  rpc AnalyzeSentiment(AnalyzeSentimentRequest)
31      returns (AnalyzeSentimentResponse) {
32    option (google.api.http) = {
33      post: "/v1beta1/documents:analyzeSentiment"
34      body: "*"
35    };
36  }
37
38  // Finds named entities (currently proper names and common nouns) in the text
39  // along with entity types, salience, mentions for each entity, and
40  // other properties.
41  rpc AnalyzeEntities(AnalyzeEntitiesRequest)
42      returns (AnalyzeEntitiesResponse) {
43    option (google.api.http) = {
44      post: "/v1beta1/documents:analyzeEntities"
45      body: "*"
46    };
47  }
48
49  // Analyzes the syntax of the text and provides sentence boundaries and
50  // tokenization along with part of speech tags, dependency trees, and other
51  // properties.
52  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
53    option (google.api.http) = {
54      post: "/v1beta1/documents:analyzeSyntax"
55      body: "*"
56    };
57  }
58
59  // A convenience method that provides all the features that analyzeSentiment,
60  // analyzeEntities, and analyzeSyntax provide in one call.
61  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
62    option (google.api.http) = {
63      post: "/v1beta1/documents:annotateText"
64      body: "*"
65    };
66  }
67}
68
69
70//
71// Represents the input to API methods.
72message Document {
73  // The document types enum.
74  enum Type {
75    // The content type is not specified.
76    TYPE_UNSPECIFIED = 0;
77
78    // Plain text
79    PLAIN_TEXT = 1;
80
81    // HTML
82    HTML = 2;
83  }
84
85  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
86  // returns an `INVALID_ARGUMENT` error.
87  Type type = 1;
88
89  // The source of the document: a string containing the content or a
90  // Google Cloud Storage URI.
91  oneof source {
92    // The content of the input in string format.
93    string content = 2;
94
95    // The Google Cloud Storage URI where the file content is located.
96    // This URI must be of the form: gs://bucket_name/object_name. For more
97    // details, see https://cloud.google.com/storage/docs/reference-uris.
98    // NOTE: Cloud Storage object versioning is not supported.
99    string gcs_content_uri = 3;
100  }
101
102  // The language of the document (if not specified, the language is
103  // automatically detected). Both ISO and BCP-47 language codes are
104  // accepted.<br>
105  // [Language
106  // Support](https://cloud.google.com/natural-language/docs/languages) lists
107  // currently supported languages for each API method. If the language (either
108  // specified by the caller or automatically detected) is not supported by the
109  // called API method, an `INVALID_ARGUMENT` error is returned.
110  string language = 4;
111}
112
113// Represents a sentence in the input document.
114message Sentence {
115  // The sentence text.
116  TextSpan text = 1;
117
118  // For calls to [AnalyzeSentiment][] or if
119  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment]
120  // is set to true, this field will contain the sentiment for the sentence.
121  Sentiment sentiment = 2;
122}
123
124// Represents a phrase in the text that is a known entity, such as
125// a person, an organization, or location. The API associates information, such
126// as salience and mentions, with entities.
127message Entity {
128  // The type of the entity.
129  enum Type {
130    // Unknown
131    UNKNOWN = 0;
132
133    // Person
134    PERSON = 1;
135
136    // Location
137    LOCATION = 2;
138
139    // Organization
140    ORGANIZATION = 3;
141
142    // Event
143    EVENT = 4;
144
145    // Work of art
146    WORK_OF_ART = 5;
147
148    // Consumer goods
149    CONSUMER_GOOD = 6;
150
151    // Other types
152    OTHER = 7;
153  }
154
155  // The representative name for the entity.
156  string name = 1;
157
158  // The entity type.
159  Type type = 2;
160
161  // Metadata associated with the entity.
162  //
163  // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if
164  // available. The associated keys are "wikipedia_url" and "mid", respectively.
165  map<string, string> metadata = 3;
166
167  // The salience score associated with the entity in the [0, 1.0] range.
168  //
169  // The salience score for an entity provides information about the
170  // importance or centrality of that entity to the entire document text.
171  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
172  // salient.
173  float salience = 4;
174
175  // The mentions of this entity in the input document. The API currently
176  // supports proper noun mentions.
177  repeated EntityMention mentions = 5;
178}
179
180// Represents the smallest syntactic building block of the text.
181message Token {
182  // The token text.
183  TextSpan text = 1;
184
185  // Parts of speech tag for this token.
186  PartOfSpeech part_of_speech = 2;
187
188  // Dependency tree parse for this token.
189  DependencyEdge dependency_edge = 3;
190
191  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
192  string lemma = 4;
193}
194
195// Represents the feeling associated with the entire text or entities in
196// the text.
197message Sentiment {
198  // DEPRECATED FIELD - This field is being deprecated in
199  // favor of score. Please refer to our documentation at
200  // https://cloud.google.com/natural-language/docs for more information.
201  float polarity = 1;
202
203  // A non-negative number in the [0, +inf) range, which represents
204  // the absolute magnitude of sentiment regardless of score (positive or
205  // negative).
206  float magnitude = 2;
207
208  // Sentiment score between -1.0 (negative sentiment) and 1.0
209  // (positive sentiment).
210  float score = 3;
211}
212
213// Represents part of speech information for a token.
214message PartOfSpeech {
215  // The part of speech tags enum.
216  enum Tag {
217    // Unknown
218    UNKNOWN = 0;
219
220    // Adjective
221    ADJ = 1;
222
223    // Adposition (preposition and postposition)
224    ADP = 2;
225
226    // Adverb
227    ADV = 3;
228
229    // Conjunction
230    CONJ = 4;
231
232    // Determiner
233    DET = 5;
234
235    // Noun (common and proper)
236    NOUN = 6;
237
238    // Cardinal number
239    NUM = 7;
240
241    // Pronoun
242    PRON = 8;
243
244    // Particle or other function word
245    PRT = 9;
246
247    // Punctuation
248    PUNCT = 10;
249
250    // Verb (all tenses and modes)
251    VERB = 11;
252
253    // Other: foreign words, typos, abbreviations
254    X = 12;
255
256    // Affix
257    AFFIX = 13;
258  }
259
260  // The characteristic of a verb that expresses time flow during an event.
261  enum Aspect {
262    // Aspect is not applicable in the analyzed language or is not predicted.
263    ASPECT_UNKNOWN = 0;
264
265    // Perfective
266    PERFECTIVE = 1;
267
268    // Imperfective
269    IMPERFECTIVE = 2;
270
271    // Progressive
272    PROGRESSIVE = 3;
273  }
274
275  // The grammatical function performed by a noun or pronoun in a phrase,
276  // clause, or sentence. In some languages, other parts of speech, such as
277  // adjective and determiner, take case inflection in agreement with the noun.
278  enum Case {
279    // Case is not applicable in the analyzed language or is not predicted.
280    CASE_UNKNOWN = 0;
281
282    // Accusative
283    ACCUSATIVE = 1;
284
285    // Adverbial
286    ADVERBIAL = 2;
287
288    // Complementive
289    COMPLEMENTIVE = 3;
290
291    // Dative
292    DATIVE = 4;
293
294    // Genitive
295    GENITIVE = 5;
296
297    // Instrumental
298    INSTRUMENTAL = 6;
299
300    // Locative
301    LOCATIVE = 7;
302
303    // Nominative
304    NOMINATIVE = 8;
305
306    // Oblique
307    OBLIQUE = 9;
308
309    // Partitive
310    PARTITIVE = 10;
311
312    // Prepositional
313    PREPOSITIONAL = 11;
314
315    // Reflexive
316    REFLEXIVE_CASE = 12;
317
318    // Relative
319    RELATIVE_CASE = 13;
320
321    // Vocative
322    VOCATIVE = 14;
323  }
324
325  // Depending on the language, Form can be categorizing different forms of
326  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
327  // endings of verbs and adjectives or distinguishing between short and long
328  // forms of adjectives and participles
329  enum Form {
330    // Form is not applicable in the analyzed language or is not predicted.
331    FORM_UNKNOWN = 0;
332
333    // Adnomial
334    ADNOMIAL = 1;
335
336    // Auxiliary
337    AUXILIARY = 2;
338
339    // Complementizer
340    COMPLEMENTIZER = 3;
341
342    // Final ending
343    FINAL_ENDING = 4;
344
345    // Gerund
346    GERUND = 5;
347
348    // Realis
349    REALIS = 6;
350
351    // Irrealis
352    IRREALIS = 7;
353
354    // Short form
355    SHORT = 8;
356
357    // Long form
358    LONG = 9;
359
360    // Order form
361    ORDER = 10;
362
363    // Specific form
364    SPECIFIC = 11;
365  }
366
367  // Gender classes of nouns reflected in the behaviour of associated words.
368  enum Gender {
369    // Gender is not applicable in the analyzed language or is not predicted.
370    GENDER_UNKNOWN = 0;
371
372    // Feminine
373    FEMININE = 1;
374
375    // Masculine
376    MASCULINE = 2;
377
378    // Neuter
379    NEUTER = 3;
380  }
381
382  // The grammatical feature of verbs, used for showing modality and attitude.
383  enum Mood {
384    // Mood is not applicable in the analyzed language or is not predicted.
385    MOOD_UNKNOWN = 0;
386
387    // Conditional
388    CONDITIONAL_MOOD = 1;
389
390    // Imperative
391    IMPERATIVE = 2;
392
393    // Indicative
394    INDICATIVE = 3;
395
396    // Interrogative
397    INTERROGATIVE = 4;
398
399    // Jussive
400    JUSSIVE = 5;
401
402    // Subjunctive
403    SUBJUNCTIVE = 6;
404  }
405
406  // Count distinctions.
407  enum Number {
408    // Number is not applicable in the analyzed language or is not predicted.
409    NUMBER_UNKNOWN = 0;
410
411    // Singular
412    SINGULAR = 1;
413
414    // Plural
415    PLURAL = 2;
416
417    // Dual
418    DUAL = 3;
419  }
420
421  // The distinction between the speaker, second person, third person, etc.
422  enum Person {
423    // Person is not applicable in the analyzed language or is not predicted.
424    PERSON_UNKNOWN = 0;
425
426    // First
427    FIRST = 1;
428
429    // Second
430    SECOND = 2;
431
432    // Third
433    THIRD = 3;
434
435    // Reflexive
436    REFLEXIVE_PERSON = 4;
437  }
438
439  // This category shows if the token is part of a proper name.
440  enum Proper {
441    // Proper is not applicable in the analyzed language or is not predicted.
442    PROPER_UNKNOWN = 0;
443
444    // Proper
445    PROPER = 1;
446
447    // Not proper
448    NOT_PROPER = 2;
449  }
450
451  // Reciprocal features of a pronoun.
452  enum Reciprocity {
453    // Reciprocity is not applicable in the analyzed language or is not
454    // predicted.
455    RECIPROCITY_UNKNOWN = 0;
456
457    // Reciprocal
458    RECIPROCAL = 1;
459
460    // Non-reciprocal
461    NON_RECIPROCAL = 2;
462  }
463
464  // Time reference.
465  enum Tense {
466    // Tense is not applicable in the analyzed language or is not predicted.
467    TENSE_UNKNOWN = 0;
468
469    // Conditional
470    CONDITIONAL_TENSE = 1;
471
472    // Future
473    FUTURE = 2;
474
475    // Past
476    PAST = 3;
477
478    // Present
479    PRESENT = 4;
480
481    // Imperfect
482    IMPERFECT = 5;
483
484    // Pluperfect
485    PLUPERFECT = 6;
486  }
487
488  // The relationship between the action that a verb expresses and the
489  // participants identified by its arguments.
490  enum Voice {
491    // Voice is not applicable in the analyzed language or is not predicted.
492    VOICE_UNKNOWN = 0;
493
494    // Active
495    ACTIVE = 1;
496
497    // Causative
498    CAUSATIVE = 2;
499
500    // Passive
501    PASSIVE = 3;
502  }
503
504  // The part of speech tag.
505  Tag tag = 1;
506
507  // The grammatical aspect.
508  Aspect aspect = 2;
509
510  // The grammatical case.
511  Case case = 3;
512
513  // The grammatical form.
514  Form form = 4;
515
516  // The grammatical gender.
517  Gender gender = 5;
518
519  // The grammatical mood.
520  Mood mood = 6;
521
522  // The grammatical number.
523  Number number = 7;
524
525  // The grammatical person.
526  Person person = 8;
527
528  // The grammatical properness.
529  Proper proper = 9;
530
531  // The grammatical reciprocity.
532  Reciprocity reciprocity = 10;
533
534  // The grammatical tense.
535  Tense tense = 11;
536
537  // The grammatical voice.
538  Voice voice = 12;
539}
540
541// Represents dependency parse tree information for a token.
542message DependencyEdge {
543  // The parse label enum for the token.
544  enum Label {
545    // Unknown
546    UNKNOWN = 0;
547
548    // Abbreviation modifier
549    ABBREV = 1;
550
551    // Adjectival complement
552    ACOMP = 2;
553
554    // Adverbial clause modifier
555    ADVCL = 3;
556
557    // Adverbial modifier
558    ADVMOD = 4;
559
560    // Adjectival modifier of an NP
561    AMOD = 5;
562
563    // Appositional modifier of an NP
564    APPOS = 6;
565
566    // Attribute dependent of a copular verb
567    ATTR = 7;
568
569    // Auxiliary (non-main) verb
570    AUX = 8;
571
572    // Passive auxiliary
573    AUXPASS = 9;
574
575    // Coordinating conjunction
576    CC = 10;
577
578    // Clausal complement of a verb or adjective
579    CCOMP = 11;
580
581    // Conjunct
582    CONJ = 12;
583
584    // Clausal subject
585    CSUBJ = 13;
586
587    // Clausal passive subject
588    CSUBJPASS = 14;
589
590    // Dependency (unable to determine)
591    DEP = 15;
592
593    // Determiner
594    DET = 16;
595
596    // Discourse
597    DISCOURSE = 17;
598
599    // Direct object
600    DOBJ = 18;
601
602    // Expletive
603    EXPL = 19;
604
605    // Goes with (part of a word in a text not well edited)
606    GOESWITH = 20;
607
608    // Indirect object
609    IOBJ = 21;
610
611    // Marker (word introducing a subordinate clause)
612    MARK = 22;
613
614    // Multi-word expression
615    MWE = 23;
616
617    // Multi-word verbal expression
618    MWV = 24;
619
620    // Negation modifier
621    NEG = 25;
622
623    // Noun compound modifier
624    NN = 26;
625
626    // Noun phrase used as an adverbial modifier
627    NPADVMOD = 27;
628
629    // Nominal subject
630    NSUBJ = 28;
631
632    // Passive nominal subject
633    NSUBJPASS = 29;
634
635    // Numeric modifier of a noun
636    NUM = 30;
637
638    // Element of compound number
639    NUMBER = 31;
640
641    // Punctuation mark
642    P = 32;
643
644    // Parataxis relation
645    PARATAXIS = 33;
646
647    // Participial modifier
648    PARTMOD = 34;
649
650    // The complement of a preposition is a clause
651    PCOMP = 35;
652
653    // Object of a preposition
654    POBJ = 36;
655
656    // Possession modifier
657    POSS = 37;
658
659    // Postverbal negative particle
660    POSTNEG = 38;
661
662    // Predicate complement
663    PRECOMP = 39;
664
665    // Preconjunt
666    PRECONJ = 40;
667
668    // Predeterminer
669    PREDET = 41;
670
671    // Prefix
672    PREF = 42;
673
674    // Prepositional modifier
675    PREP = 43;
676
677    // The relationship between a verb and verbal morpheme
678    PRONL = 44;
679
680    // Particle
681    PRT = 45;
682
683    // Associative or possessive marker
684    PS = 46;
685
686    // Quantifier phrase modifier
687    QUANTMOD = 47;
688
689    // Relative clause modifier
690    RCMOD = 48;
691
692    // Complementizer in relative clause
693    RCMODREL = 49;
694
695    // Ellipsis without a preceding predicate
696    RDROP = 50;
697
698    // Referent
699    REF = 51;
700
701    // Remnant
702    REMNANT = 52;
703
704    // Reparandum
705    REPARANDUM = 53;
706
707    // Root
708    ROOT = 54;
709
710    // Suffix specifying a unit of number
711    SNUM = 55;
712
713    // Suffix
714    SUFF = 56;
715
716    // Temporal modifier
717    TMOD = 57;
718
719    // Topic marker
720    TOPIC = 58;
721
722    // Clause headed by an infinite form of the verb that modifies a noun
723    VMOD = 59;
724
725    // Vocative
726    VOCATIVE = 60;
727
728    // Open clausal complement
729    XCOMP = 61;
730
731    // Name suffix
732    SUFFIX = 62;
733
734    // Name title
735    TITLE = 63;
736
737    // Adverbial phrase modifier
738    ADVPHMOD = 64;
739
740    // Causative auxiliary
741    AUXCAUS = 65;
742
743    // Helper auxiliary
744    AUXVV = 66;
745
746    // Rentaishi (Prenominal modifier)
747    DTMOD = 67;
748
749    // Foreign words
750    FOREIGN = 68;
751
752    // Keyword
753    KW = 69;
754
755    // List for chains of comparable items
756    LIST = 70;
757
758    // Nominalized clause
759    NOMC = 71;
760
761    // Nominalized clausal subject
762    NOMCSUBJ = 72;
763
764    // Nominalized clausal passive
765    NOMCSUBJPASS = 73;
766
767    // Compound of numeric modifier
768    NUMC = 74;
769
770    // Copula
771    COP = 75;
772
773    // Dislocated relation (for fronted/topicalized elements)
774    DISLOCATED = 76;
775  }
776
777  // Represents the head of this token in the dependency tree.
778  // This is the index of the token which has an arc going to this token.
779  // The index is the position of the token in the array of tokens returned
780  // by the API method. If this token is a root token, then the
781  // `head_token_index` is its own index.
782  int32 head_token_index = 1;
783
784  // The parse label for the token.
785  Label label = 2;
786}
787
788// Represents a mention for an entity in the text. Currently, proper noun
789// mentions are supported.
790message EntityMention {
791  // The supported types of mentions.
792  enum Type {
793    // Unknown
794    TYPE_UNKNOWN = 0;
795
796    // Proper name
797    PROPER = 1;
798
799    // Common noun (or noun compound)
800    COMMON = 2;
801  }
802
803  // The mention text.
804  TextSpan text = 1;
805
806  // The type of the entity mention.
807  Type type = 2;
808}
809
810// Represents an output piece of text.
811message TextSpan {
812  // The content of the output text.
813  string content = 1;
814
815  // The API calculates the beginning offset of the content in the original
816  // document according to the
817  // [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the
818  // API request.
819  int32 begin_offset = 2;
820}
821
822// The sentiment analysis request message.
823message AnalyzeSentimentRequest {
824  // Input document.
825  Document document = 1;
826
827  // The encoding type used by the API to calculate sentence offsets for the
828  // sentence sentiment.
829  EncodingType encoding_type = 2;
830}
831
832// The sentiment analysis response message.
833message AnalyzeSentimentResponse {
834  // The overall sentiment of the input document.
835  Sentiment document_sentiment = 1;
836
837  // The language of the text, which will be the same as the language specified
838  // in the request or, if not specified, the automatically-detected language.
839  // See [Document.language][google.cloud.language.v1beta1.Document.language]
840  // field for more details.
841  string language = 2;
842
843  // The sentiment for all the sentences in the document.
844  repeated Sentence sentences = 3;
845}
846
847// The entity analysis request message.
848message AnalyzeEntitiesRequest {
849  // Input document.
850  Document document = 1;
851
852  // The encoding type used by the API to calculate offsets.
853  EncodingType encoding_type = 2;
854}
855
856// The entity analysis response message.
857message AnalyzeEntitiesResponse {
858  // The recognized entities in the input document.
859  repeated Entity entities = 1;
860
861  // The language of the text, which will be the same as the language specified
862  // in the request or, if not specified, the automatically-detected language.
863  // See [Document.language][google.cloud.language.v1beta1.Document.language]
864  // field for more details.
865  string language = 2;
866}
867
868// The syntax analysis request message.
869message AnalyzeSyntaxRequest {
870  // Input document.
871  Document document = 1;
872
873  // The encoding type used by the API to calculate offsets.
874  EncodingType encoding_type = 2;
875}
876
877// The syntax analysis response message.
878message AnalyzeSyntaxResponse {
879  // Sentences in the input document.
880  repeated Sentence sentences = 1;
881
882  // Tokens, along with their syntactic information, in the input document.
883  repeated Token tokens = 2;
884
885  // The language of the text, which will be the same as the language specified
886  // in the request or, if not specified, the automatically-detected language.
887  // See [Document.language][google.cloud.language.v1beta1.Document.language]
888  // field for more details.
889  string language = 3;
890}
891
892// The request message for the text annotation API, which can perform multiple
893// analysis types (sentiment, entities, and syntax) in one call.
894message AnnotateTextRequest {
895  // All available features for sentiment, syntax, and semantic analysis.
896  // Setting each one to true will enable that specific analysis for the input.
897  message Features {
898    // Extract syntax information.
899    bool extract_syntax = 1;
900
901    // Extract entities.
902    bool extract_entities = 2;
903
904    // Extract document-level sentiment.
905    bool extract_document_sentiment = 3;
906  }
907
908  // Input document.
909  Document document = 1;
910
911  // The enabled features.
912  Features features = 2;
913
914  // The encoding type used by the API to calculate offsets.
915  EncodingType encoding_type = 3;
916}
917
918// The text annotations response message.
919message AnnotateTextResponse {
920  // Sentences in the input document. Populated if the user enables
921  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
922  repeated Sentence sentences = 1;
923
924  // Tokens, along with their syntactic information, in the input document.
925  // Populated if the user enables
926  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax].
927  repeated Token tokens = 2;
928
929  // Entities, along with their semantic information, in the input document.
930  // Populated if the user enables
931  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities].
932  repeated Entity entities = 3;
933
934  // The overall sentiment for the document. Populated if the user enables
935  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment].
936  Sentiment document_sentiment = 4;
937
938  // The language of the text, which will be the same as the language specified
939  // in the request or, if not specified, the automatically-detected language.
940  // See [Document.language][google.cloud.language.v1beta1.Document.language]
941  // field for more details.
942  string language = 5;
943}
944
945// Represents the text encoding that the caller uses to process the output.
946// Providing an `EncodingType` is recommended because the API provides the
947// beginning offsets for various outputs, such as tokens and mentions, and
948// languages that natively use different text encodings may access offsets
949// differently.
950enum EncodingType {
951  // If `EncodingType` is not specified, encoding-dependent information (such as
952  // `begin_offset`) will be set at `-1`.
953  NONE = 0;
954
955  // Encoding-dependent information (such as `begin_offset`) is calculated based
956  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
957  // that use this encoding natively.
958  UTF8 = 1;
959
960  // Encoding-dependent information (such as `begin_offset`) is calculated based
961  // on the UTF-16 encoding of the input. Java and Javascript are examples of
962  // languages that use this encoding natively.
963  UTF16 = 2;
964
965  // Encoding-dependent information (such as `begin_offset`) is calculated based
966  // on the UTF-32 encoding of the input. Python is an example of a language
967  // that uses this encoding natively.
968  UTF32 = 3;
969}
970