1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.speech.v1; 18 19import "google/api/resource.proto"; 20 21option cc_enable_arenas = true; 22option go_package = "cloud.google.com/go/speech/apiv1/speechpb;speechpb"; 23option java_multiple_files = true; 24option java_outer_classname = "SpeechResourceProto"; 25option java_package = "com.google.cloud.speech.v1"; 26option objc_class_prefix = "GCS"; 27 28// A set of words or phrases that represents a common concept likely to appear 29// in your audio, for example a list of passenger ship names. CustomClass items 30// can be substituted into placeholders that you set in PhraseSet phrases. 31message CustomClass { 32 option (google.api.resource) = { 33 type: "speech.googleapis.com/CustomClass" 34 pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}" 35 }; 36 37 // An item of the class. 38 message ClassItem { 39 // The class item's value. 40 string value = 1; 41 } 42 43 // The resource name of the custom class. 44 string name = 1; 45 46 // If this custom class is a resource, the custom_class_id is the resource id 47 // of the CustomClass. Case sensitive. 48 string custom_class_id = 2; 49 50 // A collection of class items. 51 repeated ClassItem items = 3; 52} 53 54// Provides "hints" to the speech recognizer to favor specific words and phrases 55// in the results. 56message PhraseSet { 57 option (google.api.resource) = { 58 type: "speech.googleapis.com/PhraseSet" 59 pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}" 60 }; 61 62 // A phrases containing words and phrase "hints" so that 63 // the speech recognition is more likely to recognize them. This can be used 64 // to improve the accuracy for specific words and phrases, for example, if 65 // specific commands are typically spoken by the user. This can also be used 66 // to add additional words to the vocabulary of the recognizer. See 67 // [usage limits](https://cloud.google.com/speech-to-text/quotas#content). 68 // 69 // List items can also include pre-built or custom classes containing groups 70 // of words that represent common concepts that occur in natural language. For 71 // example, rather than providing a phrase hint for every month of the 72 // year (e.g. "i was born in january", "i was born in febuary", ...), use the 73 // pre-built `$MONTH` class improves the likelihood of correctly transcribing 74 // audio that includes months (e.g. "i was born in $month"). 75 // To refer to pre-built classes, use the class' symbol prepended with `$` 76 // e.g. `$MONTH`. To refer to custom classes that were defined inline in the 77 // request, set the class's `custom_class_id` to a string unique to all class 78 // resources and inline classes. Then use the class' id wrapped in $`{...}` 79 // e.g. "${my-months}". To refer to custom classes resources, use the class' 80 // id wrapped in `${}` (e.g. `${my-months}`). 81 // 82 // Speech-to-Text supports three locations: `global`, `us` (US North America), 83 // and `eu` (Europe). If you are calling the `speech.googleapis.com` 84 // endpoint, use the `global` location. To specify a region, use a 85 // [regional endpoint](https://cloud.google.com/speech-to-text/docs/endpoints) 86 // with matching `us` or `eu` location value. 87 message Phrase { 88 // The phrase itself. 89 string value = 1; 90 91 // Hint Boost. Overrides the boost set at the phrase set level. 92 // Positive value will increase the probability that a specific phrase will 93 // be recognized over other similar sounding phrases. The higher the boost, 94 // the higher the chance of false positive recognition as well. Negative 95 // boost will simply be ignored. Though `boost` can accept a wide range of 96 // positive values, most use cases are best served 97 // with values between 0 and 20. We recommend using a binary search approach 98 // to finding the optimal value for your use case as well as adding 99 // phrases both with and without boost to your requests. 100 float boost = 2; 101 } 102 103 // The resource name of the phrase set. 104 string name = 1; 105 106 // A list of word and phrases. 107 repeated Phrase phrases = 2; 108 109 // Hint Boost. Positive value will increase the probability that a specific 110 // phrase will be recognized over other similar sounding phrases. The higher 111 // the boost, the higher the chance of false positive recognition as well. 112 // Negative boost values would correspond to anti-biasing. Anti-biasing is not 113 // enabled, so negative boost will simply be ignored. Though `boost` can 114 // accept a wide range of positive values, most use cases are best served with 115 // values between 0 (exclusive) and 20. We recommend using a binary search 116 // approach to finding the optimal value for your use case as well as adding 117 // phrases both with and without boost to your requests. 118 float boost = 4; 119} 120 121// Speech adaptation configuration. 122message SpeechAdaptation { 123 message ABNFGrammar { 124 // All declarations and rules of an ABNF grammar broken up into multiple 125 // strings that will end up concatenated. 126 repeated string abnf_strings = 1; 127 } 128 129 // A collection of phrase sets. To specify the hints inline, leave the 130 // phrase set's `name` blank and fill in the rest of its fields. Any 131 // phrase set can use any custom class. 132 repeated PhraseSet phrase_sets = 1; 133 134 // A collection of phrase set resource names to use. 135 repeated string phrase_set_references = 2 [(google.api.resource_reference) = { 136 type: "speech.googleapis.com/PhraseSet" 137 }]; 138 139 // A collection of custom classes. To specify the classes inline, leave the 140 // class' `name` blank and fill in the rest of its fields, giving it a unique 141 // `custom_class_id`. Refer to the inline defined class in phrase hints by its 142 // `custom_class_id`. 143 repeated CustomClass custom_classes = 3; 144 145 // Augmented Backus-Naur form (ABNF) is a standardized grammar notation 146 // comprised by a set of derivation rules. 147 // See specifications: https://www.w3.org/TR/speech-grammar 148 ABNFGrammar abnf_grammar = 4; 149} 150 151// Transcription normalization configuration. Use transcription normalization 152// to automatically replace parts of the transcript with phrases of your 153// choosing. For StreamingRecognize, this normalization only applies to stable 154// partial transcripts (stability > 0.8) and final transcripts. 155message TranscriptNormalization { 156 // A single replacement configuration. 157 message Entry { 158 // What to replace. Max length is 100 characters. 159 string search = 1; 160 161 // What to replace with. Max length is 100 characters. 162 string replace = 2; 163 164 // Whether the search is case sensitive. 165 bool case_sensitive = 3; 166 } 167 168 // A list of replacement entries. We will perform replacement with one entry 169 // at a time. For example, the second entry in ["cat" => "dog", "mountain cat" 170 // => "mountain dog"] will never be applied because we will always process the 171 // first entry before it. At most 100 entries. 172 repeated Entry entries = 1; 173} 174