xref: /aosp_15_r20/external/googleapis/google/cloud/speech/v1/resource.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.speech.v1;
18
19import "google/api/resource.proto";
20
21option cc_enable_arenas = true;
22option go_package = "cloud.google.com/go/speech/apiv1/speechpb;speechpb";
23option java_multiple_files = true;
24option java_outer_classname = "SpeechResourceProto";
25option java_package = "com.google.cloud.speech.v1";
26option objc_class_prefix = "GCS";
27
28// A set of words or phrases that represents a common concept likely to appear
29// in your audio, for example a list of passenger ship names. CustomClass items
30// can be substituted into placeholders that you set in PhraseSet phrases.
31message CustomClass {
32  option (google.api.resource) = {
33    type: "speech.googleapis.com/CustomClass"
34    pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}"
35  };
36
37  // An item of the class.
38  message ClassItem {
39    // The class item's value.
40    string value = 1;
41  }
42
43  // The resource name of the custom class.
44  string name = 1;
45
46  // If this custom class is a resource, the custom_class_id is the resource id
47  // of the CustomClass. Case sensitive.
48  string custom_class_id = 2;
49
50  // A collection of class items.
51  repeated ClassItem items = 3;
52}
53
54// Provides "hints" to the speech recognizer to favor specific words and phrases
55// in the results.
56message PhraseSet {
57  option (google.api.resource) = {
58    type: "speech.googleapis.com/PhraseSet"
59    pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}"
60  };
61
62  // A phrases containing words and phrase "hints" so that
63  // the speech recognition is more likely to recognize them. This can be used
64  // to improve the accuracy for specific words and phrases, for example, if
65  // specific commands are typically spoken by the user. This can also be used
66  // to add additional words to the vocabulary of the recognizer. See
67  // [usage limits](https://cloud.google.com/speech-to-text/quotas#content).
68  //
69  // List items can also include pre-built or custom classes containing groups
70  // of words that represent common concepts that occur in natural language. For
71  // example, rather than providing a phrase hint for every month of the
72  // year (e.g. "i was born in january", "i was born in febuary", ...), use the
73  // pre-built `$MONTH` class improves the likelihood of correctly transcribing
74  // audio that includes months (e.g. "i was born in $month").
75  // To refer to pre-built classes, use the class' symbol prepended with `$`
76  // e.g. `$MONTH`. To refer to custom classes that were defined inline in the
77  // request, set the class's `custom_class_id` to a string unique to all class
78  // resources and inline classes. Then use the class' id wrapped in $`{...}`
79  // e.g. "${my-months}". To refer to custom classes resources, use the class'
80  // id wrapped in `${}` (e.g. `${my-months}`).
81  //
82  // Speech-to-Text supports three locations: `global`, `us` (US North America),
83  // and `eu` (Europe). If you are calling the `speech.googleapis.com`
84  // endpoint, use the `global` location. To specify a region, use a
85  // [regional endpoint](https://cloud.google.com/speech-to-text/docs/endpoints)
86  // with matching `us` or `eu` location value.
87  message Phrase {
88    // The phrase itself.
89    string value = 1;
90
91    // Hint Boost. Overrides the boost set at the phrase set level.
92    // Positive value will increase the probability that a specific phrase will
93    // be recognized over other similar sounding phrases. The higher the boost,
94    // the higher the chance of false positive recognition as well. Negative
95    // boost will simply be ignored. Though `boost` can accept a wide range of
96    // positive values, most use cases are best served
97    // with values between 0 and 20. We recommend using a binary search approach
98    // to finding the optimal value for your use case as well as adding
99    // phrases both with and without boost to your requests.
100    float boost = 2;
101  }
102
103  // The resource name of the phrase set.
104  string name = 1;
105
106  // A list of word and phrases.
107  repeated Phrase phrases = 2;
108
109  // Hint Boost. Positive value will increase the probability that a specific
110  // phrase will be recognized over other similar sounding phrases. The higher
111  // the boost, the higher the chance of false positive recognition as well.
112  // Negative boost values would correspond to anti-biasing. Anti-biasing is not
113  // enabled, so negative boost will simply be ignored. Though `boost` can
114  // accept a wide range of positive values, most use cases are best served with
115  // values between 0 (exclusive) and 20. We recommend using a binary search
116  // approach to finding the optimal value for your use case as well as adding
117  // phrases both with and without boost to your requests.
118  float boost = 4;
119}
120
121// Speech adaptation configuration.
122message SpeechAdaptation {
123  message ABNFGrammar {
124    // All declarations and rules of an ABNF grammar broken up into multiple
125    // strings that will end up concatenated.
126    repeated string abnf_strings = 1;
127  }
128
129  // A collection of phrase sets. To specify the hints inline, leave the
130  // phrase set's `name` blank and fill in the rest of its fields. Any
131  // phrase set can use any custom class.
132  repeated PhraseSet phrase_sets = 1;
133
134  // A collection of phrase set resource names to use.
135  repeated string phrase_set_references = 2 [(google.api.resource_reference) = {
136    type: "speech.googleapis.com/PhraseSet"
137  }];
138
139  // A collection of custom classes. To specify the classes inline, leave the
140  // class' `name` blank and fill in the rest of its fields, giving it a unique
141  // `custom_class_id`. Refer to the inline defined class in phrase hints by its
142  // `custom_class_id`.
143  repeated CustomClass custom_classes = 3;
144
145  // Augmented Backus-Naur form (ABNF) is a standardized grammar notation
146  // comprised by a set of derivation rules.
147  // See specifications: https://www.w3.org/TR/speech-grammar
148  ABNFGrammar abnf_grammar = 4;
149}
150
151// Transcription normalization configuration. Use transcription normalization
152// to automatically replace parts of the transcript with phrases of your
153// choosing. For StreamingRecognize, this normalization only applies to stable
154// partial transcripts (stability > 0.8) and final transcripts.
155message TranscriptNormalization {
156  // A single replacement configuration.
157  message Entry {
158    // What to replace. Max length is 100 characters.
159    string search = 1;
160
161    // What to replace with. Max length is 100 characters.
162    string replace = 2;
163
164    // Whether the search is case sensitive.
165    bool case_sensitive = 3;
166  }
167
168  // A list of replacement entries. We will perform replacement with one entry
169  // at a time. For example, the second entry in ["cat" => "dog", "mountain cat"
170  // => "mountain dog"] will never be applied because we will always process the
171  // first entry before it. At most 100 entries.
172  repeated Entry entries = 1;
173}
174