language/v1beta2/language_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.language.v1beta2;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";

option go_package = "cloud.google.com/go/language/apiv1beta2/languagepb;languagepb";
option java_multiple_files = true;
option java_outer_classname = "LanguageServiceProto";
option java_package = "com.google.cloud.language.v1beta2";

// Provides text analysis operations such as sentiment analysis and entity
// recognition.
service LanguageService {
  option (google.api.default_host) = "language.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-language,"
      "https://www.googleapis.com/auth/cloud-platform";

  // Analyzes the sentiment of the provided text.
  rpc AnalyzeSentiment(AnalyzeSentimentRequest)
      returns (AnalyzeSentimentResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:analyzeSentiment"
      body: "*"
    };
    option (google.api.method_signature) = "document,encoding_type";
    option (google.api.method_signature) = "document";
  }

  // Finds named entities (currently proper names and common nouns) in the text
  // along with entity types, salience, mentions for each entity, and
  // other properties.
  rpc AnalyzeEntities(AnalyzeEntitiesRequest)
      returns (AnalyzeEntitiesResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:analyzeEntities"
      body: "*"
    };
    option (google.api.method_signature) = "document,encoding_type";
    option (google.api.method_signature) = "document";
  }

  // Finds entities, similar to
  // [AnalyzeEntities][google.cloud.language.v1beta2.LanguageService.AnalyzeEntities]
  // in the text and analyzes sentiment associated with each entity and its
  // mentions.
  rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest)
      returns (AnalyzeEntitySentimentResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:analyzeEntitySentiment"
      body: "*"
    };
    option (google.api.method_signature) = "document,encoding_type";
    option (google.api.method_signature) = "document";
  }

  // Analyzes the syntax of the text and provides sentence boundaries and
  // tokenization along with part of speech tags, dependency trees, and other
  // properties.
  rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:analyzeSyntax"
      body: "*"
    };
    option (google.api.method_signature) = "document,encoding_type";
    option (google.api.method_signature) = "document";
  }

  // Classifies a document into categories.
  rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:classifyText"
      body: "*"
    };
    option (google.api.method_signature) = "document";
  }

  // Moderates a document for harmful and sensitive categories.
  rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:moderateText"
      body: "*"
    };
    option (google.api.method_signature) = "document";
  }

  // A convenience method that provides all syntax, sentiment, entity, and
  // classification features in one call.
  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
    option (google.api.http) = {
      post: "/v1beta2/documents:annotateText"
      body: "*"
    };
    option (google.api.method_signature) = "document,features,encoding_type";
    option (google.api.method_signature) = "document,features";
  }
}

// Represents the input to API methods.
message Document {
  // The document types enum.
  enum Type {
    // The content type is not specified.
    TYPE_UNSPECIFIED = 0;

    // Plain text
    PLAIN_TEXT = 1;

    // HTML
    HTML = 2;
  }

  // Ways of handling boilerplate detected in the document
  enum BoilerplateHandling {
    // The boilerplate handling is not specified.
    BOILERPLATE_HANDLING_UNSPECIFIED = 0;

    // Do not analyze detected boilerplate. Reference web URI is required for
    // detecting boilerplate.
    SKIP_BOILERPLATE = 1;

    // Treat boilerplate the same as content.
    KEEP_BOILERPLATE = 2;
  }

  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
  // returns an `INVALID_ARGUMENT` error.
  Type type = 1;

  // The source of the document: a string containing the content or a
  // Google Cloud Storage URI.
  oneof source {
    // The content of the input in string format.
    // Cloud audit logging exempt since it is based on user data.
    string content = 2;

    // The Google Cloud Storage URI where the file content is located.
    // This URI must be of the form: gs://bucket_name/object_name. For more
    // details, see https://cloud.google.com/storage/docs/reference-uris.
    // NOTE: Cloud Storage object versioning is not supported.
    string gcs_content_uri = 3;
  }

  // The language of the document (if not specified, the language is
  // automatically detected). Both ISO and BCP-47 language codes are
  // accepted.<br>
  // [Language
  // Support](https://cloud.google.com/natural-language/docs/languages) lists
  // currently supported languages for each API method. If the language (either
  // specified by the caller or automatically detected) is not supported by the
  // called API method, an `INVALID_ARGUMENT` error is returned.
  string language = 4;

  // The web URI where the document comes from. This URI is not used for
  // fetching the content, but as a hint for analyzing the document.
  string reference_web_uri = 5;

  // Indicates how detected boilerplate(e.g. advertisements, copyright
  // declarations, banners) should be handled for this document. If not
  // specified, boilerplate will be treated the same as content.
  BoilerplateHandling boilerplate_handling = 6;
}

// Represents a sentence in the input document.
message Sentence {
  // The sentence text.
  TextSpan text = 1;

  // For calls to [AnalyzeSentiment][] or if
  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment]
  // is set to true, this field will contain the sentiment for the sentence.
  Sentiment sentiment = 2;
}

// Represents the text encoding that the caller uses to process the output.
// Providing an `EncodingType` is recommended because the API provides the
// beginning offsets for various outputs, such as tokens and mentions, and
// languages that natively use different text encodings may access offsets
// differently.
enum EncodingType {
  // If `EncodingType` is not specified, encoding-dependent information (such as
  // `begin_offset`) will be set at `-1`.
  NONE = 0;

  // Encoding-dependent information (such as `begin_offset`) is calculated based
  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
  // that use this encoding natively.
  UTF8 = 1;

  // Encoding-dependent information (such as `begin_offset`) is calculated based
  // on the UTF-16 encoding of the input. Java and JavaScript are examples of
  // languages that use this encoding natively.
  UTF16 = 2;

  // Encoding-dependent information (such as `begin_offset`) is calculated based
  // on the UTF-32 encoding of the input. Python is an example of a language
  // that uses this encoding natively.
  UTF32 = 3;
}

// Represents a phrase in the text that is a known entity, such as
// a person, an organization, or location. The API associates information, such
// as salience and mentions, with entities.
message Entity {
  // The type of the entity. For most entity types, the associated metadata is a
  // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
  // below lists the associated fields for entities that have different
  // metadata.
  enum Type {
    // Unknown
    UNKNOWN = 0;

    // Person
    PERSON = 1;

    // Location
    LOCATION = 2;

    // Organization
    ORGANIZATION = 3;

    // Event
    EVENT = 4;

    // Artwork
    WORK_OF_ART = 5;

    // Consumer product
    CONSUMER_GOOD = 6;

    // Other types of entities
    OTHER = 7;

    // Phone number
    //
    // The metadata lists the phone number, formatted according to local
    // convention, plus whichever additional elements appear in the text:
    //
    // * `number` - the actual number, broken down into sections as per local
    // convention
    // * `national_prefix` - country code, if detected
    // * `area_code` - region or area code, if detected
    // * `extension` - phone extension (to be dialed after connection), if
    // detected
    PHONE_NUMBER = 9;

    // Address
    //
    // The metadata identifies the street number and locality plus whichever
    // additional elements appear in the text:
    //
    // * `street_number` - street number
    // * `locality` - city or town
    // * `street_name` - street/route name, if detected
    // * `postal_code` - postal code, if detected
    // * `country` - country, if detected<
    // * `broad_region` - administrative area, such as the state, if detected
    // * `narrow_region` - smaller administrative area, such as county, if
    // detected
    // * `sublocality` - used in Asian addresses to demark a district within a
    // city, if detected
    ADDRESS = 10;

    // Date
    //
    // The metadata identifies the components of the date:
    //
    // * `year` - four digit year, if detected
    // * `month` - two digit month number, if detected
    // * `day` - two digit day number, if detected
    DATE = 11;

    // Number
    //
    // The metadata is the number itself.
    NUMBER = 12;

    // Price
    //
    // The metadata identifies the `value` and `currency`.
    PRICE = 13;
  }

  // The representative name for the entity.
  string name = 1;

  // The entity type.
  Type type = 2;

  // Metadata associated with the entity.
  //
  // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
  // and Knowledge Graph MID (`mid`), if they are available. For the metadata
  // associated with other entity types, see the Type table below.
  map<string, string> metadata = 3;

  // The salience score associated with the entity in the [0, 1.0] range.
  //
  // The salience score for an entity provides information about the
  // importance or centrality of that entity to the entire document text.
  // Scores closer to 0 are less salient, while scores closer to 1.0 are highly
  // salient.
  float salience = 4;

  // The mentions of this entity in the input document. The API currently
  // supports proper noun mentions.
  repeated EntityMention mentions = 5;

  // For calls to [AnalyzeEntitySentiment][] or if
  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment]
  // is set to true, this field will contain the aggregate sentiment expressed
  // for this entity in the provided document.
  Sentiment sentiment = 6;
}

// Represents the smallest syntactic building block of the text.
message Token {
  // The token text.
  TextSpan text = 1;

  // Parts of speech tag for this token.
  PartOfSpeech part_of_speech = 2;

  // Dependency tree parse for this token.
  DependencyEdge dependency_edge = 3;

  // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
  string lemma = 4;
}

// Represents the feeling associated with the entire text or entities in
// the text.
// Next ID: 6
message Sentiment {
  // A non-negative number in the [0, +inf) range, which represents
  // the absolute magnitude of sentiment regardless of score (positive or
  // negative).
  float magnitude = 2;

  // Sentiment score between -1.0 (negative sentiment) and 1.0
  // (positive sentiment).
  float score = 3;
}

// Represents part of speech information for a token.
message PartOfSpeech {
  // The part of speech tags enum.
  enum Tag {
    // Unknown
    UNKNOWN = 0;

    // Adjective
    ADJ = 1;

    // Adposition (preposition and postposition)
    ADP = 2;

    // Adverb
    ADV = 3;

    // Conjunction
    CONJ = 4;

    // Determiner
    DET = 5;

    // Noun (common and proper)
    NOUN = 6;

    // Cardinal number
    NUM = 7;

    // Pronoun
    PRON = 8;

    // Particle or other function word
    PRT = 9;

    // Punctuation
    PUNCT = 10;

    // Verb (all tenses and modes)
    VERB = 11;

    // Other: foreign words, typos, abbreviations
    X = 12;

    // Affix
    AFFIX = 13;
  }

  // The characteristic of a verb that expresses time flow during an event.
  enum Aspect {
    // Aspect is not applicable in the analyzed language or is not predicted.
    ASPECT_UNKNOWN = 0;

    // Perfective
    PERFECTIVE = 1;

    // Imperfective
    IMPERFECTIVE = 2;

    // Progressive
    PROGRESSIVE = 3;
  }

  // The grammatical function performed by a noun or pronoun in a phrase,
  // clause, or sentence. In some languages, other parts of speech, such as
  // adjective and determiner, take case inflection in agreement with the noun.
  enum Case {
    // Case is not applicable in the analyzed language or is not predicted.
    CASE_UNKNOWN = 0;

    // Accusative
    ACCUSATIVE = 1;

    // Adverbial
    ADVERBIAL = 2;

    // Complementive
    COMPLEMENTIVE = 3;

    // Dative
    DATIVE = 4;

    // Genitive
    GENITIVE = 5;

    // Instrumental
    INSTRUMENTAL = 6;

    // Locative
    LOCATIVE = 7;

    // Nominative
    NOMINATIVE = 8;

    // Oblique
    OBLIQUE = 9;

    // Partitive
    PARTITIVE = 10;

    // Prepositional
    PREPOSITIONAL = 11;

    // Reflexive
    REFLEXIVE_CASE = 12;

    // Relative
    RELATIVE_CASE = 13;

    // Vocative
    VOCATIVE = 14;
  }

  // Depending on the language, Form can be categorizing different forms of
  // verbs, adjectives, adverbs, etc. For example, categorizing inflected
  // endings of verbs and adjectives or distinguishing between short and long
  // forms of adjectives and participles
  enum Form {
    // Form is not applicable in the analyzed language or is not predicted.
    FORM_UNKNOWN = 0;

    // Adnomial
    ADNOMIAL = 1;

    // Auxiliary
    AUXILIARY = 2;

    // Complementizer
    COMPLEMENTIZER = 3;

    // Final ending
    FINAL_ENDING = 4;

    // Gerund
    GERUND = 5;

    // Realis
    REALIS = 6;

    // Irrealis
    IRREALIS = 7;

    // Short form
    SHORT = 8;

    // Long form
    LONG = 9;

    // Order form
    ORDER = 10;

    // Specific form
    SPECIFIC = 11;
  }

  // Gender classes of nouns reflected in the behaviour of associated words.
  enum Gender {
    // Gender is not applicable in the analyzed language or is not predicted.
    GENDER_UNKNOWN = 0;

    // Feminine
    FEMININE = 1;

    // Masculine
    MASCULINE = 2;

    // Neuter
    NEUTER = 3;
  }

  // The grammatical feature of verbs, used for showing modality and attitude.
  enum Mood {
    // Mood is not applicable in the analyzed language or is not predicted.
    MOOD_UNKNOWN = 0;

    // Conditional
    CONDITIONAL_MOOD = 1;

    // Imperative
    IMPERATIVE = 2;

    // Indicative
    INDICATIVE = 3;

    // Interrogative
    INTERROGATIVE = 4;

    // Jussive
    JUSSIVE = 5;

    // Subjunctive
    SUBJUNCTIVE = 6;
  }

  // Count distinctions.
  enum Number {
    // Number is not applicable in the analyzed language or is not predicted.
    NUMBER_UNKNOWN = 0;

    // Singular
    SINGULAR = 1;

    // Plural
    PLURAL = 2;

    // Dual
    DUAL = 3;
  }

  // The distinction between the speaker, second person, third person, etc.
  enum Person {
    // Person is not applicable in the analyzed language or is not predicted.
    PERSON_UNKNOWN = 0;

    // First
    FIRST = 1;

    // Second
    SECOND = 2;

    // Third
    THIRD = 3;

    // Reflexive
    REFLEXIVE_PERSON = 4;
  }

  // This category shows if the token is part of a proper name.
  enum Proper {
    // Proper is not applicable in the analyzed language or is not predicted.
    PROPER_UNKNOWN = 0;

    // Proper
    PROPER = 1;

    // Not proper
    NOT_PROPER = 2;
  }

  // Reciprocal features of a pronoun.
  enum Reciprocity {
    // Reciprocity is not applicable in the analyzed language or is not
    // predicted.
    RECIPROCITY_UNKNOWN = 0;

    // Reciprocal
    RECIPROCAL = 1;

    // Non-reciprocal
    NON_RECIPROCAL = 2;
  }

  // Time reference.
  enum Tense {
    // Tense is not applicable in the analyzed language or is not predicted.
    TENSE_UNKNOWN = 0;

    // Conditional
    CONDITIONAL_TENSE = 1;

    // Future
    FUTURE = 2;

    // Past
    PAST = 3;

    // Present
    PRESENT = 4;

    // Imperfect
    IMPERFECT = 5;

    // Pluperfect
    PLUPERFECT = 6;
  }

  // The relationship between the action that a verb expresses and the
  // participants identified by its arguments.
  enum Voice {
    // Voice is not applicable in the analyzed language or is not predicted.
    VOICE_UNKNOWN = 0;

    // Active
    ACTIVE = 1;

    // Causative
    CAUSATIVE = 2;

    // Passive
    PASSIVE = 3;
  }

  // The part of speech tag.
  Tag tag = 1;

  // The grammatical aspect.
  Aspect aspect = 2;

  // The grammatical case.
  Case case = 3;

  // The grammatical form.
  Form form = 4;

  // The grammatical gender.
  Gender gender = 5;

  // The grammatical mood.
  Mood mood = 6;

  // The grammatical number.
  Number number = 7;

  // The grammatical person.
  Person person = 8;

  // The grammatical properness.
  Proper proper = 9;

  // The grammatical reciprocity.
  Reciprocity reciprocity = 10;

  // The grammatical tense.
  Tense tense = 11;

  // The grammatical voice.
  Voice voice = 12;
}

// Represents dependency parse tree information for a token.
message DependencyEdge {
  // The parse label enum for the token.
  enum Label {
    // Unknown
    UNKNOWN = 0;

    // Abbreviation modifier
    ABBREV = 1;

    // Adjectival complement
    ACOMP = 2;

    // Adverbial clause modifier
    ADVCL = 3;

    // Adverbial modifier
    ADVMOD = 4;

    // Adjectival modifier of an NP
    AMOD = 5;

    // Appositional modifier of an NP
    APPOS = 6;

    // Attribute dependent of a copular verb
    ATTR = 7;

    // Auxiliary (non-main) verb
    AUX = 8;

    // Passive auxiliary
    AUXPASS = 9;

    // Coordinating conjunction
    CC = 10;

    // Clausal complement of a verb or adjective
    CCOMP = 11;

    // Conjunct
    CONJ = 12;

    // Clausal subject
    CSUBJ = 13;

    // Clausal passive subject
    CSUBJPASS = 14;

    // Dependency (unable to determine)
    DEP = 15;

    // Determiner
    DET = 16;

    // Discourse
    DISCOURSE = 17;

    // Direct object
    DOBJ = 18;

    // Expletive
    EXPL = 19;

    // Goes with (part of a word in a text not well edited)
    GOESWITH = 20;

    // Indirect object
    IOBJ = 21;

    // Marker (word introducing a subordinate clause)
    MARK = 22;

    // Multi-word expression
    MWE = 23;

    // Multi-word verbal expression
    MWV = 24;

    // Negation modifier
    NEG = 25;

    // Noun compound modifier
    NN = 26;

    // Noun phrase used as an adverbial modifier
    NPADVMOD = 27;

    // Nominal subject
    NSUBJ = 28;

    // Passive nominal subject
    NSUBJPASS = 29;

    // Numeric modifier of a noun
    NUM = 30;

    // Element of compound number
    NUMBER = 31;

    // Punctuation mark
    P = 32;

    // Parataxis relation
    PARATAXIS = 33;

    // Participial modifier
    PARTMOD = 34;

    // The complement of a preposition is a clause
    PCOMP = 35;

    // Object of a preposition
    POBJ = 36;

    // Possession modifier
    POSS = 37;

    // Postverbal negative particle
    POSTNEG = 38;

    // Predicate complement
    PRECOMP = 39;

    // Preconjunt
    PRECONJ = 40;

    // Predeterminer
    PREDET = 41;

    // Prefix
    PREF = 42;

    // Prepositional modifier
    PREP = 43;

    // The relationship between a verb and verbal morpheme
    PRONL = 44;

    // Particle
    PRT = 45;

    // Associative or possessive marker
    PS = 46;

    // Quantifier phrase modifier
    QUANTMOD = 47;

    // Relative clause modifier
    RCMOD = 48;

    // Complementizer in relative clause
    RCMODREL = 49;

    // Ellipsis without a preceding predicate
    RDROP = 50;

    // Referent
    REF = 51;

    // Remnant
    REMNANT = 52;

    // Reparandum
    REPARANDUM = 53;

    // Root
    ROOT = 54;

    // Suffix specifying a unit of number
    SNUM = 55;

    // Suffix
    SUFF = 56;

    // Temporal modifier
    TMOD = 57;

    // Topic marker
    TOPIC = 58;

    // Clause headed by an infinite form of the verb that modifies a noun
    VMOD = 59;

    // Vocative
    VOCATIVE = 60;

    // Open clausal complement
    XCOMP = 61;

    // Name suffix
    SUFFIX = 62;

    // Name title
    TITLE = 63;

    // Adverbial phrase modifier
    ADVPHMOD = 64;

    // Causative auxiliary
    AUXCAUS = 65;

    // Helper auxiliary
    AUXVV = 66;

    // Rentaishi (Prenominal modifier)
    DTMOD = 67;

    // Foreign words
    FOREIGN = 68;

    // Keyword
    KW = 69;

    // List for chains of comparable items
    LIST = 70;

    // Nominalized clause
    NOMC = 71;

    // Nominalized clausal subject
    NOMCSUBJ = 72;

    // Nominalized clausal passive
    NOMCSUBJPASS = 73;

    // Compound of numeric modifier
    NUMC = 74;

    // Copula
    COP = 75;

    // Dislocated relation (for fronted/topicalized elements)
    DISLOCATED = 76;

    // Aspect marker
    ASP = 77;

    // Genitive modifier
    GMOD = 78;

    // Genitive object
    GOBJ = 79;

    // Infinitival modifier
    INFMOD = 80;

    // Measure
    MES = 81;

    // Nominal complement of a noun
    NCOMP = 82;
  }

  // Represents the head of this token in the dependency tree.
  // This is the index of the token which has an arc going to this token.
  // The index is the position of the token in the array of tokens returned
  // by the API method. If this token is a root token, then the
  // `head_token_index` is its own index.
  int32 head_token_index = 1;

  // The parse label for the token.
  Label label = 2;
}

// Represents a mention for an entity in the text. Currently, proper noun
// mentions are supported.
message EntityMention {
  // The supported types of mentions.
  enum Type {
    // Unknown
    TYPE_UNKNOWN = 0;

    // Proper name
    PROPER = 1;

    // Common noun (or noun compound)
    COMMON = 2;
  }

  // The mention text.
  TextSpan text = 1;

  // The type of the entity mention.
  Type type = 2;

  // For calls to [AnalyzeEntitySentiment][] or if
  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment]
  // is set to true, this field will contain the sentiment expressed for this
  // mention of the entity in the provided document.
  Sentiment sentiment = 3;
}

// Represents an output piece of text.
message TextSpan {
  // The content of the output text.
  string content = 1;

  // The API calculates the beginning offset of the content in the original
  // document according to the
  // [EncodingType][google.cloud.language.v1beta2.EncodingType] specified in the
  // API request.
  int32 begin_offset = 2;
}

// Represents a category returned from the text classifier.
message ClassificationCategory {
  // The name of the category representing the document.
  string name = 1;

  // The classifier's confidence of the category. Number represents how certain
  // the classifier is that this category represents the given text.
  float confidence = 2;
}

// Model options available for classification requests.
message ClassificationModelOptions {
  // Options for the V1 model.
  message V1Model {}

  // Options for the V2 model.
  message V2Model {
    // The content categories used for classification.
    enum ContentCategoriesVersion {
      // If `ContentCategoriesVersion` is not specified, this option will
      // default to `V1`.
      CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;

      // Legacy content categories of our initial launch in 2017.
      V1 = 1;

      // Updated content categories in 2022.
      V2 = 2;
    }

    // The content categories used for classification.
    ContentCategoriesVersion content_categories_version = 1;
  }

  // If this field is not set, then the `v1_model` will be used by default.
  oneof model_type {
    // Setting this field will use the V1 model and V1 content categories
    // version. The V1 model is a legacy model; support for this will be
    // discontinued in the future.
    V1Model v1_model = 1;

    // Setting this field will use the V2 model with the appropriate content
    // categories version. The V2 model is a better performing model.
    V2Model v2_model = 2;
  }
}

// The sentiment analysis request message.
message AnalyzeSentimentRequest {
  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];

  // The encoding type used by the API to calculate sentence offsets for the
  // sentence sentiment.
  EncodingType encoding_type = 2;
}

// The sentiment analysis response message.
message AnalyzeSentimentResponse {
  // The overall sentiment of the input document.
  Sentiment document_sentiment = 1;

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  // See [Document.language][google.cloud.language.v1beta2.Document.language]
  // field for more details.
  string language = 2;

  // The sentiment for all the sentences in the document.
  repeated Sentence sentences = 3;
}

// The entity-level sentiment analysis request message.
message AnalyzeEntitySentimentRequest {
  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];

  // The encoding type used by the API to calculate offsets.
  EncodingType encoding_type = 2;
}

// The entity-level sentiment analysis response message.
message AnalyzeEntitySentimentResponse {
  // The recognized entities in the input document with associated sentiments.
  repeated Entity entities = 1;

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  // See [Document.language][google.cloud.language.v1beta2.Document.language]
  // field for more details.
  string language = 2;
}

// The entity analysis request message.
message AnalyzeEntitiesRequest {
  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];

  // The encoding type used by the API to calculate offsets.
  EncodingType encoding_type = 2;
}

// The entity analysis response message.
message AnalyzeEntitiesResponse {
  // The recognized entities in the input document.
  repeated Entity entities = 1;

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  // See [Document.language][google.cloud.language.v1beta2.Document.language]
  // field for more details.
  string language = 2;
}

// The syntax analysis request message.
message AnalyzeSyntaxRequest {
  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];

  // The encoding type used by the API to calculate offsets.
  EncodingType encoding_type = 2;
}

// The syntax analysis response message.
message AnalyzeSyntaxResponse {
  // Sentences in the input document.
  repeated Sentence sentences = 1;

  // Tokens, along with their syntactic information, in the input document.
  repeated Token tokens = 2;

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  // See [Document.language][google.cloud.language.v1beta2.Document.language]
  // field for more details.
  string language = 3;
}

// The document classification request message.
message ClassifyTextRequest {
  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];

  // Model options to use for classification. Defaults to v1 options if not
  // specified.
  ClassificationModelOptions classification_model_options = 3;
}

// The document classification response message.
message ClassifyTextResponse {
  // Categories representing the input document.
  repeated ClassificationCategory categories = 1;
}

// The document moderation request message.
message ModerateTextRequest {
  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];
}

// The document moderation response message.
message ModerateTextResponse {
  // Harmful and sensitive categories representing the input document.
  repeated ClassificationCategory moderation_categories = 1;
}

// The request message for the text annotation API, which can perform multiple
// analysis types (sentiment, entities, and syntax) in one call.
message AnnotateTextRequest {
  // All available features for sentiment, syntax, and semantic analysis.
  // Setting each one to true will enable that specific analysis for the input.
  // Next ID: 12
  message Features {
    // Extract syntax information.
    bool extract_syntax = 1;

    // Extract entities.
    bool extract_entities = 2;

    // Extract document-level sentiment.
    bool extract_document_sentiment = 3;

    // Extract entities and their associated sentiment.
    bool extract_entity_sentiment = 4;

    // Classify the full document into categories. If this is true,
    // the API will use the default model which classifies into a
    // [predefined
    // taxonomy](https://cloud.google.com/natural-language/docs/categories).
    bool classify_text = 6;

    // Moderate the document for harmful and sensitive categories.
    bool moderate_text = 11;

    // The model options to use for classification. Defaults to v1 options
    // if not specified. Only used if `classify_text` is set to true.
    ClassificationModelOptions classification_model_options = 10;
  }

  // Required. Input document.
  Document document = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The enabled features.
  Features features = 2 [(google.api.field_behavior) = REQUIRED];

  // The encoding type used by the API to calculate offsets.
  EncodingType encoding_type = 3;
}

// The text annotations response message.
message AnnotateTextResponse {
  // Sentences in the input document. Populated if the user enables
  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
  repeated Sentence sentences = 1;

  // Tokens, along with their syntactic information, in the input document.
  // Populated if the user enables
  // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
  repeated Token tokens = 2;

  // Entities, along with their semantic information, in the input document.
  // Populated if the user enables
  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entities].
  repeated Entity entities = 3;

  // The overall sentiment for the document. Populated if the user enables
  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment].
  Sentiment document_sentiment = 4;

  // The language of the text, which will be the same as the language specified
  // in the request or, if not specified, the automatically-detected language.
  // See [Document.language][google.cloud.language.v1beta2.Document.language]
  // field for more details.
  string language = 5;

  // Categories identified in the input document.
  repeated ClassificationCategory categories = 6;

  // Harmful and sensitive categories identified in the input document.
  repeated ClassificationCategory moderation_categories = 8;
}