1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.language.v1beta2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22 23option go_package = "cloud.google.com/go/language/apiv1beta2/languagepb;languagepb"; 24option java_multiple_files = true; 25option java_outer_classname = "LanguageServiceProto"; 26option java_package = "com.google.cloud.language.v1beta2"; 27 28// Provides text analysis operations such as sentiment analysis and entity 29// recognition. 30service LanguageService { 31 option (google.api.default_host) = "language.googleapis.com"; 32 option (google.api.oauth_scopes) = 33 "https://www.googleapis.com/auth/cloud-language," 34 "https://www.googleapis.com/auth/cloud-platform"; 35 36 // Analyzes the sentiment of the provided text. 37 rpc AnalyzeSentiment(AnalyzeSentimentRequest) 38 returns (AnalyzeSentimentResponse) { 39 option (google.api.http) = { 40 post: "/v1beta2/documents:analyzeSentiment" 41 body: "*" 42 }; 43 option (google.api.method_signature) = "document,encoding_type"; 44 option (google.api.method_signature) = "document"; 45 } 46 47 // Finds named entities (currently proper names and common nouns) in the text 48 // along with entity types, salience, mentions for each entity, and 49 // other properties. 50 rpc AnalyzeEntities(AnalyzeEntitiesRequest) 51 returns (AnalyzeEntitiesResponse) { 52 option (google.api.http) = { 53 post: "/v1beta2/documents:analyzeEntities" 54 body: "*" 55 }; 56 option (google.api.method_signature) = "document,encoding_type"; 57 option (google.api.method_signature) = "document"; 58 } 59 60 // Finds entities, similar to 61 // [AnalyzeEntities][google.cloud.language.v1beta2.LanguageService.AnalyzeEntities] 62 // in the text and analyzes sentiment associated with each entity and its 63 // mentions. 64 rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) 65 returns (AnalyzeEntitySentimentResponse) { 66 option (google.api.http) = { 67 post: "/v1beta2/documents:analyzeEntitySentiment" 68 body: "*" 69 }; 70 option (google.api.method_signature) = "document,encoding_type"; 71 option (google.api.method_signature) = "document"; 72 } 73 74 // Analyzes the syntax of the text and provides sentence boundaries and 75 // tokenization along with part of speech tags, dependency trees, and other 76 // properties. 77 rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { 78 option (google.api.http) = { 79 post: "/v1beta2/documents:analyzeSyntax" 80 body: "*" 81 }; 82 option (google.api.method_signature) = "document,encoding_type"; 83 option (google.api.method_signature) = "document"; 84 } 85 86 // Classifies a document into categories. 87 rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { 88 option (google.api.http) = { 89 post: "/v1beta2/documents:classifyText" 90 body: "*" 91 }; 92 option (google.api.method_signature) = "document"; 93 } 94 95 // Moderates a document for harmful and sensitive categories. 96 rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) { 97 option (google.api.http) = { 98 post: "/v1beta2/documents:moderateText" 99 body: "*" 100 }; 101 option (google.api.method_signature) = "document"; 102 } 103 104 // A convenience method that provides all syntax, sentiment, entity, and 105 // classification features in one call. 106 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { 107 option (google.api.http) = { 108 post: "/v1beta2/documents:annotateText" 109 body: "*" 110 }; 111 option (google.api.method_signature) = "document,features,encoding_type"; 112 option (google.api.method_signature) = "document,features"; 113 } 114} 115 116// Represents the input to API methods. 117message Document { 118 // The document types enum. 119 enum Type { 120 // The content type is not specified. 121 TYPE_UNSPECIFIED = 0; 122 123 // Plain text 124 PLAIN_TEXT = 1; 125 126 // HTML 127 HTML = 2; 128 } 129 130 // Ways of handling boilerplate detected in the document 131 enum BoilerplateHandling { 132 // The boilerplate handling is not specified. 133 BOILERPLATE_HANDLING_UNSPECIFIED = 0; 134 135 // Do not analyze detected boilerplate. Reference web URI is required for 136 // detecting boilerplate. 137 SKIP_BOILERPLATE = 1; 138 139 // Treat boilerplate the same as content. 140 KEEP_BOILERPLATE = 2; 141 } 142 143 // Required. If the type is not set or is `TYPE_UNSPECIFIED`, 144 // returns an `INVALID_ARGUMENT` error. 145 Type type = 1; 146 147 // The source of the document: a string containing the content or a 148 // Google Cloud Storage URI. 149 oneof source { 150 // The content of the input in string format. 151 // Cloud audit logging exempt since it is based on user data. 152 string content = 2; 153 154 // The Google Cloud Storage URI where the file content is located. 155 // This URI must be of the form: gs://bucket_name/object_name. For more 156 // details, see https://cloud.google.com/storage/docs/reference-uris. 157 // NOTE: Cloud Storage object versioning is not supported. 158 string gcs_content_uri = 3; 159 } 160 161 // The language of the document (if not specified, the language is 162 // automatically detected). Both ISO and BCP-47 language codes are 163 // accepted.<br> 164 // [Language 165 // Support](https://cloud.google.com/natural-language/docs/languages) lists 166 // currently supported languages for each API method. If the language (either 167 // specified by the caller or automatically detected) is not supported by the 168 // called API method, an `INVALID_ARGUMENT` error is returned. 169 string language = 4; 170 171 // The web URI where the document comes from. This URI is not used for 172 // fetching the content, but as a hint for analyzing the document. 173 string reference_web_uri = 5; 174 175 // Indicates how detected boilerplate(e.g. advertisements, copyright 176 // declarations, banners) should be handled for this document. If not 177 // specified, boilerplate will be treated the same as content. 178 BoilerplateHandling boilerplate_handling = 6; 179} 180 181// Represents a sentence in the input document. 182message Sentence { 183 // The sentence text. 184 TextSpan text = 1; 185 186 // For calls to [AnalyzeSentiment][] or if 187 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment] 188 // is set to true, this field will contain the sentiment for the sentence. 189 Sentiment sentiment = 2; 190} 191 192// Represents the text encoding that the caller uses to process the output. 193// Providing an `EncodingType` is recommended because the API provides the 194// beginning offsets for various outputs, such as tokens and mentions, and 195// languages that natively use different text encodings may access offsets 196// differently. 197enum EncodingType { 198 // If `EncodingType` is not specified, encoding-dependent information (such as 199 // `begin_offset`) will be set at `-1`. 200 NONE = 0; 201 202 // Encoding-dependent information (such as `begin_offset`) is calculated based 203 // on the UTF-8 encoding of the input. C++ and Go are examples of languages 204 // that use this encoding natively. 205 UTF8 = 1; 206 207 // Encoding-dependent information (such as `begin_offset`) is calculated based 208 // on the UTF-16 encoding of the input. Java and JavaScript are examples of 209 // languages that use this encoding natively. 210 UTF16 = 2; 211 212 // Encoding-dependent information (such as `begin_offset`) is calculated based 213 // on the UTF-32 encoding of the input. Python is an example of a language 214 // that uses this encoding natively. 215 UTF32 = 3; 216} 217 218// Represents a phrase in the text that is a known entity, such as 219// a person, an organization, or location. The API associates information, such 220// as salience and mentions, with entities. 221message Entity { 222 // The type of the entity. For most entity types, the associated metadata is a 223 // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table 224 // below lists the associated fields for entities that have different 225 // metadata. 226 enum Type { 227 // Unknown 228 UNKNOWN = 0; 229 230 // Person 231 PERSON = 1; 232 233 // Location 234 LOCATION = 2; 235 236 // Organization 237 ORGANIZATION = 3; 238 239 // Event 240 EVENT = 4; 241 242 // Artwork 243 WORK_OF_ART = 5; 244 245 // Consumer product 246 CONSUMER_GOOD = 6; 247 248 // Other types of entities 249 OTHER = 7; 250 251 // Phone number 252 // 253 // The metadata lists the phone number, formatted according to local 254 // convention, plus whichever additional elements appear in the text: 255 // 256 // * `number` - the actual number, broken down into sections as per local 257 // convention 258 // * `national_prefix` - country code, if detected 259 // * `area_code` - region or area code, if detected 260 // * `extension` - phone extension (to be dialed after connection), if 261 // detected 262 PHONE_NUMBER = 9; 263 264 // Address 265 // 266 // The metadata identifies the street number and locality plus whichever 267 // additional elements appear in the text: 268 // 269 // * `street_number` - street number 270 // * `locality` - city or town 271 // * `street_name` - street/route name, if detected 272 // * `postal_code` - postal code, if detected 273 // * `country` - country, if detected< 274 // * `broad_region` - administrative area, such as the state, if detected 275 // * `narrow_region` - smaller administrative area, such as county, if 276 // detected 277 // * `sublocality` - used in Asian addresses to demark a district within a 278 // city, if detected 279 ADDRESS = 10; 280 281 // Date 282 // 283 // The metadata identifies the components of the date: 284 // 285 // * `year` - four digit year, if detected 286 // * `month` - two digit month number, if detected 287 // * `day` - two digit day number, if detected 288 DATE = 11; 289 290 // Number 291 // 292 // The metadata is the number itself. 293 NUMBER = 12; 294 295 // Price 296 // 297 // The metadata identifies the `value` and `currency`. 298 PRICE = 13; 299 } 300 301 // The representative name for the entity. 302 string name = 1; 303 304 // The entity type. 305 Type type = 2; 306 307 // Metadata associated with the entity. 308 // 309 // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) 310 // and Knowledge Graph MID (`mid`), if they are available. For the metadata 311 // associated with other entity types, see the Type table below. 312 map<string, string> metadata = 3; 313 314 // The salience score associated with the entity in the [0, 1.0] range. 315 // 316 // The salience score for an entity provides information about the 317 // importance or centrality of that entity to the entire document text. 318 // Scores closer to 0 are less salient, while scores closer to 1.0 are highly 319 // salient. 320 float salience = 4; 321 322 // The mentions of this entity in the input document. The API currently 323 // supports proper noun mentions. 324 repeated EntityMention mentions = 5; 325 326 // For calls to [AnalyzeEntitySentiment][] or if 327 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] 328 // is set to true, this field will contain the aggregate sentiment expressed 329 // for this entity in the provided document. 330 Sentiment sentiment = 6; 331} 332 333// Represents the smallest syntactic building block of the text. 334message Token { 335 // The token text. 336 TextSpan text = 1; 337 338 // Parts of speech tag for this token. 339 PartOfSpeech part_of_speech = 2; 340 341 // Dependency tree parse for this token. 342 DependencyEdge dependency_edge = 3; 343 344 // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. 345 string lemma = 4; 346} 347 348// Represents the feeling associated with the entire text or entities in 349// the text. 350// Next ID: 6 351message Sentiment { 352 // A non-negative number in the [0, +inf) range, which represents 353 // the absolute magnitude of sentiment regardless of score (positive or 354 // negative). 355 float magnitude = 2; 356 357 // Sentiment score between -1.0 (negative sentiment) and 1.0 358 // (positive sentiment). 359 float score = 3; 360} 361 362// Represents part of speech information for a token. 363message PartOfSpeech { 364 // The part of speech tags enum. 365 enum Tag { 366 // Unknown 367 UNKNOWN = 0; 368 369 // Adjective 370 ADJ = 1; 371 372 // Adposition (preposition and postposition) 373 ADP = 2; 374 375 // Adverb 376 ADV = 3; 377 378 // Conjunction 379 CONJ = 4; 380 381 // Determiner 382 DET = 5; 383 384 // Noun (common and proper) 385 NOUN = 6; 386 387 // Cardinal number 388 NUM = 7; 389 390 // Pronoun 391 PRON = 8; 392 393 // Particle or other function word 394 PRT = 9; 395 396 // Punctuation 397 PUNCT = 10; 398 399 // Verb (all tenses and modes) 400 VERB = 11; 401 402 // Other: foreign words, typos, abbreviations 403 X = 12; 404 405 // Affix 406 AFFIX = 13; 407 } 408 409 // The characteristic of a verb that expresses time flow during an event. 410 enum Aspect { 411 // Aspect is not applicable in the analyzed language or is not predicted. 412 ASPECT_UNKNOWN = 0; 413 414 // Perfective 415 PERFECTIVE = 1; 416 417 // Imperfective 418 IMPERFECTIVE = 2; 419 420 // Progressive 421 PROGRESSIVE = 3; 422 } 423 424 // The grammatical function performed by a noun or pronoun in a phrase, 425 // clause, or sentence. In some languages, other parts of speech, such as 426 // adjective and determiner, take case inflection in agreement with the noun. 427 enum Case { 428 // Case is not applicable in the analyzed language or is not predicted. 429 CASE_UNKNOWN = 0; 430 431 // Accusative 432 ACCUSATIVE = 1; 433 434 // Adverbial 435 ADVERBIAL = 2; 436 437 // Complementive 438 COMPLEMENTIVE = 3; 439 440 // Dative 441 DATIVE = 4; 442 443 // Genitive 444 GENITIVE = 5; 445 446 // Instrumental 447 INSTRUMENTAL = 6; 448 449 // Locative 450 LOCATIVE = 7; 451 452 // Nominative 453 NOMINATIVE = 8; 454 455 // Oblique 456 OBLIQUE = 9; 457 458 // Partitive 459 PARTITIVE = 10; 460 461 // Prepositional 462 PREPOSITIONAL = 11; 463 464 // Reflexive 465 REFLEXIVE_CASE = 12; 466 467 // Relative 468 RELATIVE_CASE = 13; 469 470 // Vocative 471 VOCATIVE = 14; 472 } 473 474 // Depending on the language, Form can be categorizing different forms of 475 // verbs, adjectives, adverbs, etc. For example, categorizing inflected 476 // endings of verbs and adjectives or distinguishing between short and long 477 // forms of adjectives and participles 478 enum Form { 479 // Form is not applicable in the analyzed language or is not predicted. 480 FORM_UNKNOWN = 0; 481 482 // Adnomial 483 ADNOMIAL = 1; 484 485 // Auxiliary 486 AUXILIARY = 2; 487 488 // Complementizer 489 COMPLEMENTIZER = 3; 490 491 // Final ending 492 FINAL_ENDING = 4; 493 494 // Gerund 495 GERUND = 5; 496 497 // Realis 498 REALIS = 6; 499 500 // Irrealis 501 IRREALIS = 7; 502 503 // Short form 504 SHORT = 8; 505 506 // Long form 507 LONG = 9; 508 509 // Order form 510 ORDER = 10; 511 512 // Specific form 513 SPECIFIC = 11; 514 } 515 516 // Gender classes of nouns reflected in the behaviour of associated words. 517 enum Gender { 518 // Gender is not applicable in the analyzed language or is not predicted. 519 GENDER_UNKNOWN = 0; 520 521 // Feminine 522 FEMININE = 1; 523 524 // Masculine 525 MASCULINE = 2; 526 527 // Neuter 528 NEUTER = 3; 529 } 530 531 // The grammatical feature of verbs, used for showing modality and attitude. 532 enum Mood { 533 // Mood is not applicable in the analyzed language or is not predicted. 534 MOOD_UNKNOWN = 0; 535 536 // Conditional 537 CONDITIONAL_MOOD = 1; 538 539 // Imperative 540 IMPERATIVE = 2; 541 542 // Indicative 543 INDICATIVE = 3; 544 545 // Interrogative 546 INTERROGATIVE = 4; 547 548 // Jussive 549 JUSSIVE = 5; 550 551 // Subjunctive 552 SUBJUNCTIVE = 6; 553 } 554 555 // Count distinctions. 556 enum Number { 557 // Number is not applicable in the analyzed language or is not predicted. 558 NUMBER_UNKNOWN = 0; 559 560 // Singular 561 SINGULAR = 1; 562 563 // Plural 564 PLURAL = 2; 565 566 // Dual 567 DUAL = 3; 568 } 569 570 // The distinction between the speaker, second person, third person, etc. 571 enum Person { 572 // Person is not applicable in the analyzed language or is not predicted. 573 PERSON_UNKNOWN = 0; 574 575 // First 576 FIRST = 1; 577 578 // Second 579 SECOND = 2; 580 581 // Third 582 THIRD = 3; 583 584 // Reflexive 585 REFLEXIVE_PERSON = 4; 586 } 587 588 // This category shows if the token is part of a proper name. 589 enum Proper { 590 // Proper is not applicable in the analyzed language or is not predicted. 591 PROPER_UNKNOWN = 0; 592 593 // Proper 594 PROPER = 1; 595 596 // Not proper 597 NOT_PROPER = 2; 598 } 599 600 // Reciprocal features of a pronoun. 601 enum Reciprocity { 602 // Reciprocity is not applicable in the analyzed language or is not 603 // predicted. 604 RECIPROCITY_UNKNOWN = 0; 605 606 // Reciprocal 607 RECIPROCAL = 1; 608 609 // Non-reciprocal 610 NON_RECIPROCAL = 2; 611 } 612 613 // Time reference. 614 enum Tense { 615 // Tense is not applicable in the analyzed language or is not predicted. 616 TENSE_UNKNOWN = 0; 617 618 // Conditional 619 CONDITIONAL_TENSE = 1; 620 621 // Future 622 FUTURE = 2; 623 624 // Past 625 PAST = 3; 626 627 // Present 628 PRESENT = 4; 629 630 // Imperfect 631 IMPERFECT = 5; 632 633 // Pluperfect 634 PLUPERFECT = 6; 635 } 636 637 // The relationship between the action that a verb expresses and the 638 // participants identified by its arguments. 639 enum Voice { 640 // Voice is not applicable in the analyzed language or is not predicted. 641 VOICE_UNKNOWN = 0; 642 643 // Active 644 ACTIVE = 1; 645 646 // Causative 647 CAUSATIVE = 2; 648 649 // Passive 650 PASSIVE = 3; 651 } 652 653 // The part of speech tag. 654 Tag tag = 1; 655 656 // The grammatical aspect. 657 Aspect aspect = 2; 658 659 // The grammatical case. 660 Case case = 3; 661 662 // The grammatical form. 663 Form form = 4; 664 665 // The grammatical gender. 666 Gender gender = 5; 667 668 // The grammatical mood. 669 Mood mood = 6; 670 671 // The grammatical number. 672 Number number = 7; 673 674 // The grammatical person. 675 Person person = 8; 676 677 // The grammatical properness. 678 Proper proper = 9; 679 680 // The grammatical reciprocity. 681 Reciprocity reciprocity = 10; 682 683 // The grammatical tense. 684 Tense tense = 11; 685 686 // The grammatical voice. 687 Voice voice = 12; 688} 689 690// Represents dependency parse tree information for a token. 691message DependencyEdge { 692 // The parse label enum for the token. 693 enum Label { 694 // Unknown 695 UNKNOWN = 0; 696 697 // Abbreviation modifier 698 ABBREV = 1; 699 700 // Adjectival complement 701 ACOMP = 2; 702 703 // Adverbial clause modifier 704 ADVCL = 3; 705 706 // Adverbial modifier 707 ADVMOD = 4; 708 709 // Adjectival modifier of an NP 710 AMOD = 5; 711 712 // Appositional modifier of an NP 713 APPOS = 6; 714 715 // Attribute dependent of a copular verb 716 ATTR = 7; 717 718 // Auxiliary (non-main) verb 719 AUX = 8; 720 721 // Passive auxiliary 722 AUXPASS = 9; 723 724 // Coordinating conjunction 725 CC = 10; 726 727 // Clausal complement of a verb or adjective 728 CCOMP = 11; 729 730 // Conjunct 731 CONJ = 12; 732 733 // Clausal subject 734 CSUBJ = 13; 735 736 // Clausal passive subject 737 CSUBJPASS = 14; 738 739 // Dependency (unable to determine) 740 DEP = 15; 741 742 // Determiner 743 DET = 16; 744 745 // Discourse 746 DISCOURSE = 17; 747 748 // Direct object 749 DOBJ = 18; 750 751 // Expletive 752 EXPL = 19; 753 754 // Goes with (part of a word in a text not well edited) 755 GOESWITH = 20; 756 757 // Indirect object 758 IOBJ = 21; 759 760 // Marker (word introducing a subordinate clause) 761 MARK = 22; 762 763 // Multi-word expression 764 MWE = 23; 765 766 // Multi-word verbal expression 767 MWV = 24; 768 769 // Negation modifier 770 NEG = 25; 771 772 // Noun compound modifier 773 NN = 26; 774 775 // Noun phrase used as an adverbial modifier 776 NPADVMOD = 27; 777 778 // Nominal subject 779 NSUBJ = 28; 780 781 // Passive nominal subject 782 NSUBJPASS = 29; 783 784 // Numeric modifier of a noun 785 NUM = 30; 786 787 // Element of compound number 788 NUMBER = 31; 789 790 // Punctuation mark 791 P = 32; 792 793 // Parataxis relation 794 PARATAXIS = 33; 795 796 // Participial modifier 797 PARTMOD = 34; 798 799 // The complement of a preposition is a clause 800 PCOMP = 35; 801 802 // Object of a preposition 803 POBJ = 36; 804 805 // Possession modifier 806 POSS = 37; 807 808 // Postverbal negative particle 809 POSTNEG = 38; 810 811 // Predicate complement 812 PRECOMP = 39; 813 814 // Preconjunt 815 PRECONJ = 40; 816 817 // Predeterminer 818 PREDET = 41; 819 820 // Prefix 821 PREF = 42; 822 823 // Prepositional modifier 824 PREP = 43; 825 826 // The relationship between a verb and verbal morpheme 827 PRONL = 44; 828 829 // Particle 830 PRT = 45; 831 832 // Associative or possessive marker 833 PS = 46; 834 835 // Quantifier phrase modifier 836 QUANTMOD = 47; 837 838 // Relative clause modifier 839 RCMOD = 48; 840 841 // Complementizer in relative clause 842 RCMODREL = 49; 843 844 // Ellipsis without a preceding predicate 845 RDROP = 50; 846 847 // Referent 848 REF = 51; 849 850 // Remnant 851 REMNANT = 52; 852 853 // Reparandum 854 REPARANDUM = 53; 855 856 // Root 857 ROOT = 54; 858 859 // Suffix specifying a unit of number 860 SNUM = 55; 861 862 // Suffix 863 SUFF = 56; 864 865 // Temporal modifier 866 TMOD = 57; 867 868 // Topic marker 869 TOPIC = 58; 870 871 // Clause headed by an infinite form of the verb that modifies a noun 872 VMOD = 59; 873 874 // Vocative 875 VOCATIVE = 60; 876 877 // Open clausal complement 878 XCOMP = 61; 879 880 // Name suffix 881 SUFFIX = 62; 882 883 // Name title 884 TITLE = 63; 885 886 // Adverbial phrase modifier 887 ADVPHMOD = 64; 888 889 // Causative auxiliary 890 AUXCAUS = 65; 891 892 // Helper auxiliary 893 AUXVV = 66; 894 895 // Rentaishi (Prenominal modifier) 896 DTMOD = 67; 897 898 // Foreign words 899 FOREIGN = 68; 900 901 // Keyword 902 KW = 69; 903 904 // List for chains of comparable items 905 LIST = 70; 906 907 // Nominalized clause 908 NOMC = 71; 909 910 // Nominalized clausal subject 911 NOMCSUBJ = 72; 912 913 // Nominalized clausal passive 914 NOMCSUBJPASS = 73; 915 916 // Compound of numeric modifier 917 NUMC = 74; 918 919 // Copula 920 COP = 75; 921 922 // Dislocated relation (for fronted/topicalized elements) 923 DISLOCATED = 76; 924 925 // Aspect marker 926 ASP = 77; 927 928 // Genitive modifier 929 GMOD = 78; 930 931 // Genitive object 932 GOBJ = 79; 933 934 // Infinitival modifier 935 INFMOD = 80; 936 937 // Measure 938 MES = 81; 939 940 // Nominal complement of a noun 941 NCOMP = 82; 942 } 943 944 // Represents the head of this token in the dependency tree. 945 // This is the index of the token which has an arc going to this token. 946 // The index is the position of the token in the array of tokens returned 947 // by the API method. If this token is a root token, then the 948 // `head_token_index` is its own index. 949 int32 head_token_index = 1; 950 951 // The parse label for the token. 952 Label label = 2; 953} 954 955// Represents a mention for an entity in the text. Currently, proper noun 956// mentions are supported. 957message EntityMention { 958 // The supported types of mentions. 959 enum Type { 960 // Unknown 961 TYPE_UNKNOWN = 0; 962 963 // Proper name 964 PROPER = 1; 965 966 // Common noun (or noun compound) 967 COMMON = 2; 968 } 969 970 // The mention text. 971 TextSpan text = 1; 972 973 // The type of the entity mention. 974 Type type = 2; 975 976 // For calls to [AnalyzeEntitySentiment][] or if 977 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] 978 // is set to true, this field will contain the sentiment expressed for this 979 // mention of the entity in the provided document. 980 Sentiment sentiment = 3; 981} 982 983// Represents an output piece of text. 984message TextSpan { 985 // The content of the output text. 986 string content = 1; 987 988 // The API calculates the beginning offset of the content in the original 989 // document according to the 990 // [EncodingType][google.cloud.language.v1beta2.EncodingType] specified in the 991 // API request. 992 int32 begin_offset = 2; 993} 994 995// Represents a category returned from the text classifier. 996message ClassificationCategory { 997 // The name of the category representing the document. 998 string name = 1; 999 1000 // The classifier's confidence of the category. Number represents how certain 1001 // the classifier is that this category represents the given text. 1002 float confidence = 2; 1003} 1004 1005// Model options available for classification requests. 1006message ClassificationModelOptions { 1007 // Options for the V1 model. 1008 message V1Model {} 1009 1010 // Options for the V2 model. 1011 message V2Model { 1012 // The content categories used for classification. 1013 enum ContentCategoriesVersion { 1014 // If `ContentCategoriesVersion` is not specified, this option will 1015 // default to `V1`. 1016 CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0; 1017 1018 // Legacy content categories of our initial launch in 2017. 1019 V1 = 1; 1020 1021 // Updated content categories in 2022. 1022 V2 = 2; 1023 } 1024 1025 // The content categories used for classification. 1026 ContentCategoriesVersion content_categories_version = 1; 1027 } 1028 1029 // If this field is not set, then the `v1_model` will be used by default. 1030 oneof model_type { 1031 // Setting this field will use the V1 model and V1 content categories 1032 // version. The V1 model is a legacy model; support for this will be 1033 // discontinued in the future. 1034 V1Model v1_model = 1; 1035 1036 // Setting this field will use the V2 model with the appropriate content 1037 // categories version. The V2 model is a better performing model. 1038 V2Model v2_model = 2; 1039 } 1040} 1041 1042// The sentiment analysis request message. 1043message AnalyzeSentimentRequest { 1044 // Required. Input document. 1045 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1046 1047 // The encoding type used by the API to calculate sentence offsets for the 1048 // sentence sentiment. 1049 EncodingType encoding_type = 2; 1050} 1051 1052// The sentiment analysis response message. 1053message AnalyzeSentimentResponse { 1054 // The overall sentiment of the input document. 1055 Sentiment document_sentiment = 1; 1056 1057 // The language of the text, which will be the same as the language specified 1058 // in the request or, if not specified, the automatically-detected language. 1059 // See [Document.language][google.cloud.language.v1beta2.Document.language] 1060 // field for more details. 1061 string language = 2; 1062 1063 // The sentiment for all the sentences in the document. 1064 repeated Sentence sentences = 3; 1065} 1066 1067// The entity-level sentiment analysis request message. 1068message AnalyzeEntitySentimentRequest { 1069 // Required. Input document. 1070 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1071 1072 // The encoding type used by the API to calculate offsets. 1073 EncodingType encoding_type = 2; 1074} 1075 1076// The entity-level sentiment analysis response message. 1077message AnalyzeEntitySentimentResponse { 1078 // The recognized entities in the input document with associated sentiments. 1079 repeated Entity entities = 1; 1080 1081 // The language of the text, which will be the same as the language specified 1082 // in the request or, if not specified, the automatically-detected language. 1083 // See [Document.language][google.cloud.language.v1beta2.Document.language] 1084 // field for more details. 1085 string language = 2; 1086} 1087 1088// The entity analysis request message. 1089message AnalyzeEntitiesRequest { 1090 // Required. Input document. 1091 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1092 1093 // The encoding type used by the API to calculate offsets. 1094 EncodingType encoding_type = 2; 1095} 1096 1097// The entity analysis response message. 1098message AnalyzeEntitiesResponse { 1099 // The recognized entities in the input document. 1100 repeated Entity entities = 1; 1101 1102 // The language of the text, which will be the same as the language specified 1103 // in the request or, if not specified, the automatically-detected language. 1104 // See [Document.language][google.cloud.language.v1beta2.Document.language] 1105 // field for more details. 1106 string language = 2; 1107} 1108 1109// The syntax analysis request message. 1110message AnalyzeSyntaxRequest { 1111 // Required. Input document. 1112 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1113 1114 // The encoding type used by the API to calculate offsets. 1115 EncodingType encoding_type = 2; 1116} 1117 1118// The syntax analysis response message. 1119message AnalyzeSyntaxResponse { 1120 // Sentences in the input document. 1121 repeated Sentence sentences = 1; 1122 1123 // Tokens, along with their syntactic information, in the input document. 1124 repeated Token tokens = 2; 1125 1126 // The language of the text, which will be the same as the language specified 1127 // in the request or, if not specified, the automatically-detected language. 1128 // See [Document.language][google.cloud.language.v1beta2.Document.language] 1129 // field for more details. 1130 string language = 3; 1131} 1132 1133// The document classification request message. 1134message ClassifyTextRequest { 1135 // Required. Input document. 1136 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1137 1138 // Model options to use for classification. Defaults to v1 options if not 1139 // specified. 1140 ClassificationModelOptions classification_model_options = 3; 1141} 1142 1143// The document classification response message. 1144message ClassifyTextResponse { 1145 // Categories representing the input document. 1146 repeated ClassificationCategory categories = 1; 1147} 1148 1149// The document moderation request message. 1150message ModerateTextRequest { 1151 // Required. Input document. 1152 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1153} 1154 1155// The document moderation response message. 1156message ModerateTextResponse { 1157 // Harmful and sensitive categories representing the input document. 1158 repeated ClassificationCategory moderation_categories = 1; 1159} 1160 1161// The request message for the text annotation API, which can perform multiple 1162// analysis types (sentiment, entities, and syntax) in one call. 1163message AnnotateTextRequest { 1164 // All available features for sentiment, syntax, and semantic analysis. 1165 // Setting each one to true will enable that specific analysis for the input. 1166 // Next ID: 12 1167 message Features { 1168 // Extract syntax information. 1169 bool extract_syntax = 1; 1170 1171 // Extract entities. 1172 bool extract_entities = 2; 1173 1174 // Extract document-level sentiment. 1175 bool extract_document_sentiment = 3; 1176 1177 // Extract entities and their associated sentiment. 1178 bool extract_entity_sentiment = 4; 1179 1180 // Classify the full document into categories. If this is true, 1181 // the API will use the default model which classifies into a 1182 // [predefined 1183 // taxonomy](https://cloud.google.com/natural-language/docs/categories). 1184 bool classify_text = 6; 1185 1186 // Moderate the document for harmful and sensitive categories. 1187 bool moderate_text = 11; 1188 1189 // The model options to use for classification. Defaults to v1 options 1190 // if not specified. Only used if `classify_text` is set to true. 1191 ClassificationModelOptions classification_model_options = 10; 1192 } 1193 1194 // Required. Input document. 1195 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 1196 1197 // Required. The enabled features. 1198 Features features = 2 [(google.api.field_behavior) = REQUIRED]; 1199 1200 // The encoding type used by the API to calculate offsets. 1201 EncodingType encoding_type = 3; 1202} 1203 1204// The text annotations response message. 1205message AnnotateTextResponse { 1206 // Sentences in the input document. Populated if the user enables 1207 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax]. 1208 repeated Sentence sentences = 1; 1209 1210 // Tokens, along with their syntactic information, in the input document. 1211 // Populated if the user enables 1212 // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax]. 1213 repeated Token tokens = 2; 1214 1215 // Entities, along with their semantic information, in the input document. 1216 // Populated if the user enables 1217 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entities]. 1218 repeated Entity entities = 3; 1219 1220 // The overall sentiment for the document. Populated if the user enables 1221 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment]. 1222 Sentiment document_sentiment = 4; 1223 1224 // The language of the text, which will be the same as the language specified 1225 // in the request or, if not specified, the automatically-detected language. 1226 // See [Document.language][google.cloud.language.v1beta2.Document.language] 1227 // field for more details. 1228 string language = 5; 1229 1230 // Categories identified in the input document. 1231 repeated ClassificationCategory categories = 6; 1232 1233 // Harmful and sensitive categories identified in the input document. 1234 repeated ClassificationCategory moderation_categories = 8; 1235} 1236