xref: /aosp_15_r20/external/googleapis/google/cloud/translate/v3beta1/translation_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.translation.v3beta1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/timestamp.proto";
25
26option cc_enable_arenas = true;
27option csharp_namespace = "Google.Cloud.Translate.V3Beta1";
28option go_package = "cloud.google.com/go/translation/apiv3beta1/translationpb;translationpb";
29option java_multiple_files = true;
30option java_outer_classname = "TranslationServiceProto";
31option java_package = "com.google.cloud.translate.v3beta1";
32option php_namespace = "Google\\Cloud\\Translate\\V3beta1";
33option ruby_package = "Google::Cloud::Translate::V3beta1";
34
35// Proto file for the Cloud Translation API (v3beta1).
36
37// Provides natural language translation operations.
38service TranslationService {
39  option (google.api.default_host) = "translate.googleapis.com";
40  option (google.api.oauth_scopes) =
41      "https://www.googleapis.com/auth/cloud-platform,"
42      "https://www.googleapis.com/auth/cloud-translation";
43
44  // Translates input text and returns translated text.
45  rpc TranslateText(TranslateTextRequest) returns (TranslateTextResponse) {
46    option (google.api.http) = {
47      post: "/v3beta1/{parent=projects/*/locations/*}:translateText"
48      body: "*"
49      additional_bindings {
50        post: "/v3beta1/{parent=projects/*}:translateText"
51        body: "*"
52      }
53    };
54  }
55
56  // Detects the language of text within a request.
57  rpc DetectLanguage(DetectLanguageRequest) returns (DetectLanguageResponse) {
58    option (google.api.http) = {
59      post: "/v3beta1/{parent=projects/*/locations/*}:detectLanguage"
60      body: "*"
61      additional_bindings {
62        post: "/v3beta1/{parent=projects/*}:detectLanguage"
63        body: "*"
64      }
65    };
66    option (google.api.method_signature) = "parent,model,mime_type";
67  }
68
69  // Returns a list of supported languages for translation.
70  rpc GetSupportedLanguages(GetSupportedLanguagesRequest)
71      returns (SupportedLanguages) {
72    option (google.api.http) = {
73      get: "/v3beta1/{parent=projects/*/locations/*}/supportedLanguages"
74      additional_bindings {
75        get: "/v3beta1/{parent=projects/*}/supportedLanguages"
76      }
77    };
78    option (google.api.method_signature) = "parent,display_language_code,model";
79  }
80
81  // Translates documents in synchronous mode.
82  rpc TranslateDocument(TranslateDocumentRequest)
83      returns (TranslateDocumentResponse) {
84    option (google.api.http) = {
85      post: "/v3beta1/{parent=projects/*/locations/*}:translateDocument"
86      body: "*"
87    };
88  }
89
90  // Translates a large volume of text in asynchronous batch mode.
91  // This function provides real-time output as the inputs are being processed.
92  // If caller cancels a request, the partial results (for an input file, it's
93  // all or nothing) may still be available on the specified output location.
94  //
95  // This call returns immediately and you can
96  // use google.longrunning.Operation.name to poll the status of the call.
97  rpc BatchTranslateText(BatchTranslateTextRequest)
98      returns (google.longrunning.Operation) {
99    option (google.api.http) = {
100      post: "/v3beta1/{parent=projects/*/locations/*}:batchTranslateText"
101      body: "*"
102    };
103    option (google.longrunning.operation_info) = {
104      response_type: "BatchTranslateResponse"
105      metadata_type: "BatchTranslateMetadata"
106    };
107  }
108
109  // Translates a large volume of document in asynchronous batch mode.
110  // This function provides real-time output as the inputs are being processed.
111  // If caller cancels a request, the partial results (for an input file, it's
112  // all or nothing) may still be available on the specified output location.
113  //
114  // This call returns immediately and you can use
115  // google.longrunning.Operation.name to poll the status of the call.
116  rpc BatchTranslateDocument(BatchTranslateDocumentRequest)
117      returns (google.longrunning.Operation) {
118    option (google.api.http) = {
119      post: "/v3beta1/{parent=projects/*/locations/*}:batchTranslateDocument"
120      body: "*"
121    };
122    option (google.api.method_signature) =
123        "parent,source_language_code,target_language_codes,input_configs,output_config";
124    option (google.longrunning.operation_info) = {
125      response_type: "BatchTranslateDocumentResponse"
126      metadata_type: "BatchTranslateDocumentMetadata"
127    };
128  }
129
130  // Creates a glossary and returns the long-running operation. Returns
131  // NOT_FOUND, if the project doesn't exist.
132  rpc CreateGlossary(CreateGlossaryRequest)
133      returns (google.longrunning.Operation) {
134    option (google.api.http) = {
135      post: "/v3beta1/{parent=projects/*/locations/*}/glossaries"
136      body: "glossary"
137    };
138    option (google.api.method_signature) = "parent,glossary";
139    option (google.longrunning.operation_info) = {
140      response_type: "Glossary"
141      metadata_type: "CreateGlossaryMetadata"
142    };
143  }
144
145  // Lists glossaries in a project. Returns NOT_FOUND, if the project doesn't
146  // exist.
147  rpc ListGlossaries(ListGlossariesRequest) returns (ListGlossariesResponse) {
148    option (google.api.http) = {
149      get: "/v3beta1/{parent=projects/*/locations/*}/glossaries"
150    };
151    option (google.api.method_signature) = "parent,filter";
152  }
153
154  // Gets a glossary. Returns NOT_FOUND, if the glossary doesn't
155  // exist.
156  rpc GetGlossary(GetGlossaryRequest) returns (Glossary) {
157    option (google.api.http) = {
158      get: "/v3beta1/{name=projects/*/locations/*/glossaries/*}"
159    };
160    option (google.api.method_signature) = "name";
161  }
162
163  // Deletes a glossary, or cancels glossary construction
164  // if the glossary isn't created yet.
165  // Returns NOT_FOUND, if the glossary doesn't exist.
166  rpc DeleteGlossary(DeleteGlossaryRequest)
167      returns (google.longrunning.Operation) {
168    option (google.api.http) = {
169      delete: "/v3beta1/{name=projects/*/locations/*/glossaries/*}"
170    };
171    option (google.api.method_signature) = "name";
172    option (google.longrunning.operation_info) = {
173      response_type: "DeleteGlossaryResponse"
174      metadata_type: "DeleteGlossaryMetadata"
175    };
176  }
177}
178
179// Configures which glossary should be used for a specific target language,
180// and defines options for applying that glossary.
181message TranslateTextGlossaryConfig {
182  // Required. Specifies the glossary used for this translation. Use
183  // this format: projects/*/locations/*/glossaries/*
184  string glossary = 1 [(google.api.field_behavior) = REQUIRED];
185
186  // Optional. Indicates match is case-insensitive.
187  // Default value is false if missing.
188  bool ignore_case = 2 [(google.api.field_behavior) = OPTIONAL];
189}
190
191// The request message for synchronous translation.
192message TranslateTextRequest {
193  // Required. The content of the input in string format.
194  // We recommend the total content be less than 30k codepoints. The max length
195  // of this field is 1024.
196  // Use BatchTranslateText for larger text.
197  repeated string contents = 1 [(google.api.field_behavior) = REQUIRED];
198
199  // Optional. The format of the source text, for example, "text/html",
200  //  "text/plain". If left blank, the MIME type defaults to "text/html".
201  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
202
203  // Optional. The BCP-47 language code of the input text if
204  // known, for example, "en-US" or "sr-Latn". Supported language codes are
205  // listed in Language Support. If the source language isn't specified, the API
206  // attempts to identify the source language automatically and returns the
207  // source language within the response.
208  string source_language_code = 4 [(google.api.field_behavior) = OPTIONAL];
209
210  // Required. The BCP-47 language code to use for translation of the input
211  // text, set to one of the language codes listed in Language Support.
212  string target_language_code = 5 [(google.api.field_behavior) = REQUIRED];
213
214  // Required. Project or location to make a call. Must refer to a caller's
215  // project.
216  //
217  // Format: `projects/{project-number-or-id}` or
218  // `projects/{project-number-or-id}/locations/{location-id}`.
219  //
220  // For global calls, use `projects/{project-number-or-id}/locations/global` or
221  // `projects/{project-number-or-id}`.
222  //
223  // Non-global location is required for requests using AutoML models or
224  // custom glossaries.
225  //
226  // Models and glossaries must be within the same region (have same
227  // location-id), otherwise an INVALID_ARGUMENT (400) error is returned.
228  string parent = 8 [
229    (google.api.field_behavior) = REQUIRED,
230    (google.api.resource_reference) = {
231      type: "locations.googleapis.com/Location"
232    }
233  ];
234
235  // Optional. The `model` type requested for this translation.
236  //
237  // The format depends on model type:
238  //
239  // - AutoML Translation models:
240  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
241  //
242  // - General (built-in) models:
243  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
244  //
245  //
246  // For global (non-regionalized) requests, use `location-id` `global`.
247  // For example,
248  // `projects/{project-number-or-id}/locations/global/models/general/nmt`.
249  //
250  // If not provided, the default Google model (NMT) will be used
251  string model = 6 [(google.api.field_behavior) = OPTIONAL];
252
253  // Optional. Glossary to be applied. The glossary must be
254  // within the same region (have the same location-id) as the model, otherwise
255  // an INVALID_ARGUMENT (400) error is returned.
256  TranslateTextGlossaryConfig glossary_config = 7
257      [(google.api.field_behavior) = OPTIONAL];
258
259  // Optional. The labels with user-defined metadata for the request.
260  //
261  // Label keys and values can be no longer than 63 characters
262  // (Unicode codepoints), can only contain lowercase letters, numeric
263  // characters, underscores and dashes. International characters are allowed.
264  // Label values are optional. Label keys must start with a letter.
265  //
266  // See https://cloud.google.com/translate/docs/labels for more information.
267  map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL];
268}
269
270message TranslateTextResponse {
271  // Text translation responses with no glossary applied.
272  // This field has the same length as
273  // [`contents`][google.cloud.translation.v3beta1.TranslateTextRequest.contents].
274  repeated Translation translations = 1;
275
276  // Text translation responses if a glossary is provided in the request.
277  // This can be the same as
278  // [`translations`][google.cloud.translation.v3beta1.TranslateTextResponse.translations]
279  // if no terms apply. This field has the same length as
280  // [`contents`][google.cloud.translation.v3beta1.TranslateTextRequest.contents].
281  repeated Translation glossary_translations = 3;
282}
283
284// A single translation response.
285message Translation {
286  // Text translated into the target language.
287  // If an error occurs during translation, this field might be excluded from
288  // the response.
289  string translated_text = 1;
290
291  // Only present when `model` is present in the request.
292  // `model` here is normalized to have project number.
293  //
294  // For example:
295  // If the `model` requested in TranslationTextRequest is
296  // `projects/{project-id}/locations/{location-id}/models/general/nmt` then
297  // `model` here would be normalized to
298  // `projects/{project-number}/locations/{location-id}/models/general/nmt`.
299  string model = 2;
300
301  // The BCP-47 language code of source text in the initial request, detected
302  // automatically, if no source language was passed within the initial
303  // request. If the source language was passed, auto-detection of the language
304  // does not occur and this field is empty.
305  string detected_language_code = 4;
306
307  // The `glossary_config` used for this translation.
308  TranslateTextGlossaryConfig glossary_config = 3;
309}
310
311// The request message for language detection.
312message DetectLanguageRequest {
313  // Required. Project or location to make a call. Must refer to a caller's
314  // project.
315  //
316  // Format: `projects/{project-number-or-id}/locations/{location-id}` or
317  // `projects/{project-number-or-id}`.
318  //
319  // For global calls, use `projects/{project-number-or-id}/locations/global` or
320  // `projects/{project-number-or-id}`.
321  //
322  // Only models within the same region (has same location-id) can be used.
323  // Otherwise an INVALID_ARGUMENT (400) error is returned.
324  string parent = 5 [
325    (google.api.field_behavior) = REQUIRED,
326    (google.api.resource_reference) = {
327      type: "locations.googleapis.com/Location"
328    }
329  ];
330
331  // Optional. The language detection model to be used.
332  //
333  // Format:
334  // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/{model-id}`
335  //
336  // Only one language detection model is currently supported:
337  // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/default`.
338  //
339  // If not specified, the default model is used.
340  string model = 4 [(google.api.field_behavior) = OPTIONAL];
341
342  // Required. The source of the document from which to detect the language.
343  oneof source {
344    // The content of the input stored as a string.
345    string content = 1;
346  }
347
348  // Optional. The format of the source text, for example, "text/html",
349  // "text/plain". If left blank, the MIME type defaults to "text/html".
350  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
351
352  // Optional. The labels with user-defined metadata for the request.
353  //
354  // Label keys and values can be no longer than 63 characters
355  // (Unicode codepoints), can only contain lowercase letters, numeric
356  // characters, underscores and dashes. International characters are allowed.
357  // Label values are optional. Label keys must start with a letter.
358  //
359  // See https://cloud.google.com/translate/docs/labels for more information.
360  map<string, string> labels = 6 [(google.api.field_behavior) = OPTIONAL];
361}
362
363// The response message for language detection.
364message DetectedLanguage {
365  // The BCP-47 language code of source content in the request, detected
366  // automatically.
367  string language_code = 1;
368
369  // The confidence of the detection result for this language.
370  float confidence = 2;
371}
372
373// The response message for language detection.
374message DetectLanguageResponse {
375  // A list of detected languages sorted by detection confidence in descending
376  // order. The most probable language first.
377  repeated DetectedLanguage languages = 1;
378}
379
380// The request message for discovering supported languages.
381message GetSupportedLanguagesRequest {
382  // Required. Project or location to make a call. Must refer to a caller's
383  // project.
384  //
385  // Format: `projects/{project-number-or-id}` or
386  // `projects/{project-number-or-id}/locations/{location-id}`.
387  //
388  // For global calls, use `projects/{project-number-or-id}/locations/global` or
389  // `projects/{project-number-or-id}`.
390  //
391  // Non-global location is required for AutoML models.
392  //
393  // Only models within the same region (have same location-id) can be used,
394  // otherwise an INVALID_ARGUMENT (400) error is returned.
395  string parent = 3 [
396    (google.api.field_behavior) = REQUIRED,
397    (google.api.resource_reference) = {
398      type: "locations.googleapis.com/Location"
399    }
400  ];
401
402  // Optional. The language to use to return localized, human readable names
403  // of supported languages. If missing, then display names are not returned
404  // in a response.
405  string display_language_code = 1 [(google.api.field_behavior) = OPTIONAL];
406
407  // Optional. Get supported languages of this model.
408  //
409  // The format depends on model type:
410  //
411  // - AutoML Translation models:
412  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
413  //
414  // - General (built-in) models:
415  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
416  //
417  //
418  // Returns languages supported by the specified model.
419  // If missing, we get supported languages of Google general NMT model.
420  string model = 2 [(google.api.field_behavior) = OPTIONAL];
421}
422
423// The response message for discovering supported languages.
424message SupportedLanguages {
425  // A list of supported language responses. This list contains an entry
426  // for each language the Translation API supports.
427  repeated SupportedLanguage languages = 1;
428}
429
430// A single supported language response corresponds to information related
431// to one supported language.
432message SupportedLanguage {
433  // Supported language code, generally consisting of its ISO 639-1
434  // identifier, for example, 'en', 'ja'. In certain cases, BCP-47 codes
435  // including language and region identifiers are returned (for example,
436  // 'zh-TW' and 'zh-CN')
437  string language_code = 1;
438
439  // Human readable name of the language localized in the display language
440  // specified in the request.
441  string display_name = 2;
442
443  // Can be used as source language.
444  bool support_source = 3;
445
446  // Can be used as target language.
447  bool support_target = 4;
448}
449
450// The Google Cloud Storage location for the input content.
451message GcsSource {
452  // Required. Source data URI. For example, `gs://my_bucket/my_object`.
453  string input_uri = 1 [(google.api.field_behavior) = REQUIRED];
454}
455
456// Input configuration for BatchTranslateText request.
457message InputConfig {
458  // Optional. Can be "text/plain" or "text/html".
459  // For `.tsv`, "text/html" is used if mime_type is missing.
460  // For `.html`, this field must be "text/html" or empty.
461  // For `.txt`, this field must be "text/plain" or empty.
462  string mime_type = 1 [(google.api.field_behavior) = OPTIONAL];
463
464  // Required. Specify the input.
465  oneof source {
466    // Required. Google Cloud Storage location for the source input.
467    // This can be a single file (for example,
468    // `gs://translation-test/input.tsv`) or a wildcard (for example,
469    // `gs://translation-test/*`). If a file extension is `.tsv`, it can
470    // contain either one or two columns. The first column (optional) is the id
471    // of the text request. If the first column is missing, we use the row
472    // number (0-based) from the input file as the ID in the output file. The
473    // second column is the actual text to be
474    //  translated. We recommend each row be <= 10K Unicode codepoints,
475    // otherwise an error might be returned.
476    // Note that the input tsv must be RFC 4180 compliant.
477    //
478    // You could use https://github.com/Clever/csvlint to check potential
479    // formatting errors in your tsv file.
480    // csvlint --delimiter='\t' your_input_file.tsv
481    //
482    // The other supported file extensions are `.txt` or `.html`, which is
483    // treated as a single large chunk of text.
484    GcsSource gcs_source = 2;
485  }
486}
487
488// The Google Cloud Storage location for the output content.
489message GcsDestination {
490  // Required. There must be no files under 'output_uri_prefix'.
491  // 'output_uri_prefix' must end with "/" and start with "gs://", otherwise an
492  // INVALID_ARGUMENT (400) error is returned.
493  string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED];
494}
495
496// Output configuration for BatchTranslateText request.
497message OutputConfig {
498  // Required. The destination of output.
499  oneof destination {
500    // Google Cloud Storage destination for output content.
501    // For every single input file (for example, gs://a/b/c.[extension]), we
502    // generate at most 2 * n output files. (n is the # of target_language_codes
503    // in the BatchTranslateTextRequest).
504    //
505    // Output files (tsv) generated are compliant with RFC 4180 except that
506    // record delimiters are '\n' instead of '\r\n'. We don't provide any way to
507    // change record delimiters.
508    //
509    // While the input files are being processed, we write/update an index file
510    // 'index.csv'  under 'output_uri_prefix' (for example,
511    // gs://translation-test/index.csv) The index file is generated/updated as
512    // new files are being translated. The format is:
513    //
514    // input_file,target_language_code,translations_file,errors_file,
515    // glossary_translations_file,glossary_errors_file
516    //
517    // input_file is one file we matched using gcs_source.input_uri.
518    // target_language_code is provided in the request.
519    // translations_file contains the translations. (details provided below)
520    // errors_file contains the errors during processing of the file. (details
521    // below). Both translations_file and errors_file could be empty
522    // strings if we have no content to output.
523    // glossary_translations_file and glossary_errors_file are always empty
524    // strings if the input_file is tsv. They could also be empty if we have no
525    // content to output.
526    //
527    // Once a row is present in index.csv, the input/output matching never
528    // changes. Callers should also expect all the content in input_file are
529    // processed and ready to be consumed (that is, no partial output file is
530    // written).
531    //
532    // Since index.csv will be keeping updated during the process, please make
533    // sure there is no custom retention policy applied on the output bucket
534    // that may avoid file updating.
535    // (https://cloud.google.com/storage/docs/bucket-lock#retention-policy)
536    //
537    // The format of translations_file (for target language code 'trg') is:
538    // `gs://translation_test/a_b_c_'trg'_translations.[extension]`
539    //
540    // If the input file extension is tsv, the output has the following
541    // columns:
542    // Column 1: ID of the request provided in the input, if it's not
543    // provided in the input, then the input row number is used (0-based).
544    // Column 2: source sentence.
545    // Column 3: translation without applying a glossary. Empty string if there
546    // is an error.
547    // Column 4 (only present if a glossary is provided in the request):
548    // translation after applying the glossary. Empty string if there is an
549    // error applying the glossary. Could be same string as column 3 if there is
550    // no glossary applied.
551    //
552    // If input file extension is a txt or html, the translation is directly
553    // written to the output file. If glossary is requested, a separate
554    // glossary_translations_file has format of
555    // `gs://translation_test/a_b_c_'trg'_glossary_translations.[extension]`
556    //
557    // The format of errors file (for target language code 'trg') is:
558    // `gs://translation_test/a_b_c_'trg'_errors.[extension]`
559    //
560    // If the input file extension is tsv, errors_file contains the following:
561    // Column 1: ID of the request provided in the input, if it's not
562    // provided in the input, then the input row number is used (0-based).
563    // Column 2: source sentence.
564    // Column 3: Error detail for the translation. Could be empty.
565    // Column 4 (only present if a glossary is provided in the request):
566    // Error when applying the glossary.
567    //
568    // If the input file extension is txt or html, glossary_error_file will be
569    // generated that contains error details. glossary_error_file has format of
570    // `gs://translation_test/a_b_c_'trg'_glossary_errors.[extension]`
571    GcsDestination gcs_destination = 1;
572  }
573}
574
575// A document translation request input config.
576message DocumentInputConfig {
577  // Specifies the source for the document's content.
578  // The input file size should be <= 20MB for
579  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
580  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
581  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
582  // The input file size should be <= 20MB and the maximum page limit is 20 for
583  // - application/pdf
584  oneof source {
585    // Document's content represented as a stream of bytes.
586    bytes content = 1;
587
588    // Google Cloud Storage location. This must be a single file.
589    // For example: gs://example_bucket/example_file.pdf
590    GcsSource gcs_source = 2;
591  }
592
593  // Specifies the input document's mime_type.
594  //
595  // If not specified it will be determined using the file extension for
596  // gcs_source provided files. For a file provided through bytes content the
597  // mime_type must be provided.
598  // Currently supported mime types are:
599  // - application/pdf
600  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
601  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
602  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
603  string mime_type = 4;
604}
605
606// A document translation request output config.
607message DocumentOutputConfig {
608  // A URI destination for the translated document.
609  // It is optional to provide a destination. If provided the results from
610  // TranslateDocument will be stored in the destination.
611  // Whether a destination is provided or not, the translated documents will be
612  // returned within TranslateDocumentResponse.document_translation and
613  // TranslateDocumentResponse.glossary_document_translation.
614  oneof destination {
615    // Optional. Google Cloud Storage destination for the translation output,
616    // e.g., `gs://my_bucket/my_directory/`.
617    //
618    // The destination directory provided does not have to be empty, but the
619    // bucket must exist. If a file with the same name as the output file
620    // already exists in the destination an error will be returned.
621    //
622    // For a DocumentInputConfig.contents provided document, the output file
623    // will have the name "output_[trg]_translations.[ext]", where
624    // - [trg] corresponds to the translated file's language code,
625    // - [ext] corresponds to the translated file's extension according to its
626    // mime type.
627    //
628    //
629    // For a DocumentInputConfig.gcs_uri provided document, the output file will
630    // have a name according to its URI. For example: an input file with URI:
631    // `gs://a/b/c.[extension]` stored in a gcs_destination bucket with name
632    // "my_bucket" will have an output URI:
633    // `gs://my_bucket/a_b_c_[trg]_translations.[ext]`, where
634    // - [trg] corresponds to the translated file's language code,
635    // - [ext] corresponds to the translated file's extension according to its
636    // mime type.
637    //
638    //
639    // If the document was directly provided through the request, then the
640    // output document will have the format:
641    // `gs://my_bucket/translated_document_[trg]_translations.[ext]`, where
642    // - [trg] corresponds to the translated file's language code,
643    // - [ext] corresponds to the translated file's extension according to its
644    // mime type.
645    //
646    // If a glossary was provided, then the output URI for the glossary
647    // translation will be equal to the default output URI but have
648    // `glossary_translations` instead of `translations`. For the previous
649    // example, its glossary URI would be:
650    // `gs://my_bucket/a_b_c_[trg]_glossary_translations.[ext]`.
651    //
652    // Thus the max number of output files will be 2 (Translated document,
653    // Glossary translated document).
654    //
655    // Callers should expect no partial outputs. If there is any error during
656    // document translation, no output will be stored in the Cloud Storage
657    // bucket.
658    GcsDestination gcs_destination = 1 [(google.api.field_behavior) = OPTIONAL];
659  }
660
661  // Optional. Specifies the translated document's mime_type.
662  // If not specified, the translated file's mime type will be the same as the
663  // input file's mime type.
664  // Currently only support the output mime type to be the same as input mime
665  // type.
666  // - application/pdf
667  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
668  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
669  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
670  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
671}
672
673// A document translation request.
674message TranslateDocumentRequest {
675  // Required. Location to make a regional call.
676  //
677  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
678  //
679  // For global calls, use `projects/{project-number-or-id}/locations/global`.
680  //
681  // Non-global location is required for requests using AutoML models or custom
682  // glossaries.
683  //
684  // Models and glossaries must be within the same region (have the same
685  // location-id), otherwise an INVALID_ARGUMENT (400) error is returned.
686  string parent = 1 [(google.api.field_behavior) = REQUIRED];
687
688  // Optional. The BCP-47 language code of the input document if known, for
689  // example, "en-US" or "sr-Latn". Supported language codes are listed in
690  // Language Support. If the source language isn't specified, the API attempts
691  // to identify the source language automatically and returns the source
692  // language within the response. Source language must be specified if the
693  // request contains a glossary or a custom model.
694  string source_language_code = 2 [(google.api.field_behavior) = OPTIONAL];
695
696  // Required. The BCP-47 language code to use for translation of the input
697  // document, set to one of the language codes listed in Language Support.
698  string target_language_code = 3 [(google.api.field_behavior) = REQUIRED];
699
700  // Required. Input configurations.
701  DocumentInputConfig document_input_config = 4
702      [(google.api.field_behavior) = REQUIRED];
703
704  // Optional. Output configurations.
705  // Defines if the output file should be stored within Cloud Storage as well
706  // as the desired output format. If not provided the translated file will
707  // only be returned through a byte-stream and its output mime type will be
708  // the same as the input file's mime type.
709  DocumentOutputConfig document_output_config = 5
710      [(google.api.field_behavior) = OPTIONAL];
711
712  // Optional. The `model` type requested for this translation.
713  //
714  // The format depends on model type:
715  //
716  // - AutoML Translation models:
717  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
718  //
719  // - General (built-in) models:
720  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
721  //
722  //
723  // If not provided, the default Google model (NMT) will be used for
724  // translation.
725  string model = 6 [(google.api.field_behavior) = OPTIONAL];
726
727  // Optional. Glossary to be applied. The glossary must be within the same
728  // region (have the same location-id) as the model, otherwise an
729  // INVALID_ARGUMENT (400) error is returned.
730  TranslateTextGlossaryConfig glossary_config = 7
731      [(google.api.field_behavior) = OPTIONAL];
732
733  // Optional. The labels with user-defined metadata for the request.
734  //
735  // Label keys and values can be no longer than 63 characters (Unicode
736  // codepoints), can only contain lowercase letters, numeric characters,
737  // underscores and dashes. International characters are allowed. Label values
738  // are optional. Label keys must start with a letter.
739  //
740  // See https://cloud.google.com/translate/docs/advanced/labels for more
741  // information.
742  map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
743
744  // Optional. This flag is to support user customized attribution.
745  // If not provided, the default is `Machine Translated by Google`.
746  // Customized attribution should follow rules in
747  // https://cloud.google.com/translate/attribution#attribution_and_logos
748  string customized_attribution = 10 [(google.api.field_behavior) = OPTIONAL];
749
750  // Optional. is_translate_native_pdf_only field for external customers.
751  // If true, the page limit of online native pdf translation is 300 and only
752  // native pdf pages will be translated.
753  bool is_translate_native_pdf_only = 11
754      [(google.api.field_behavior) = OPTIONAL];
755
756  // Optional. If true, use the text removal server to remove the shadow text on
757  // background image for native pdf translation.
758  // Shadow removal feature can only be enabled when
759  // is_translate_native_pdf_only: false && pdf_native_only: false
760  bool enable_shadow_removal_native_pdf = 12
761      [(google.api.field_behavior) = OPTIONAL];
762
763  // Optional. If true, enable auto rotation correction in DVS.
764  bool enable_rotation_correction = 13 [(google.api.field_behavior) = OPTIONAL];
765}
766
767// A translated document message.
768message DocumentTranslation {
769  // The array of translated documents. It is expected to be size 1 for now. We
770  // may produce multiple translated documents in the future for other type of
771  // file formats.
772  repeated bytes byte_stream_outputs = 1;
773
774  // The translated document's mime type.
775  string mime_type = 2;
776
777  // The detected language for the input document.
778  // If the user did not provide the source language for the input document,
779  // this field will have the language code automatically detected. If the
780  // source language was passed, auto-detection of the language does not occur
781  // and this field is empty.
782  string detected_language_code = 3;
783}
784
785// A translated document response message.
786message TranslateDocumentResponse {
787  // Translated document.
788  DocumentTranslation document_translation = 1;
789
790  // The document's translation output if a glossary is provided in the request.
791  // This can be the same as [TranslateDocumentResponse.document_translation]
792  // if no glossary terms apply.
793  DocumentTranslation glossary_document_translation = 2;
794
795  // Only present when 'model' is present in the request.
796  // 'model' is normalized to have a project number.
797  //
798  // For example:
799  // If the 'model' field in TranslateDocumentRequest is:
800  // `projects/{project-id}/locations/{location-id}/models/general/nmt` then
801  // `model` here would be normalized to
802  // `projects/{project-number}/locations/{location-id}/models/general/nmt`.
803  string model = 3;
804
805  // The `glossary_config` used for this translation.
806  TranslateTextGlossaryConfig glossary_config = 4;
807}
808
809// The batch translation request.
810message BatchTranslateTextRequest {
811  // Required. Location to make a call. Must refer to a caller's project.
812  //
813  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
814  //
815  // The `global` location is not supported for batch translation.
816  //
817  // Only AutoML Translation models or glossaries within the same region (have
818  // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400)
819  // error is returned.
820  string parent = 1 [
821    (google.api.field_behavior) = REQUIRED,
822    (google.api.resource_reference) = {
823      type: "locations.googleapis.com/Location"
824    }
825  ];
826
827  // Required. Source language code.
828  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
829
830  // Required. Specify up to 10 language codes here.
831  repeated string target_language_codes = 3
832      [(google.api.field_behavior) = REQUIRED];
833
834  // Optional. The models to use for translation. Map's key is target language
835  // code. Map's value is model name. Value can be a built-in general model,
836  // or an AutoML Translation model.
837  //
838  // The value format depends on model type:
839  //
840  // - AutoML Translation models:
841  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
842  //
843  // - General (built-in) models:
844  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
845  //
846  //
847  // If the map is empty or a specific model is
848  // not requested for a language pair, then default google model (nmt) is used.
849  map<string, string> models = 4 [(google.api.field_behavior) = OPTIONAL];
850
851  // Required. Input configurations.
852  // The total number of files matched should be <= 100.
853  // The total content size should be <= 100M Unicode codepoints.
854  // The files must use UTF-8 encoding.
855  repeated InputConfig input_configs = 5
856      [(google.api.field_behavior) = REQUIRED];
857
858  // Required. Output configuration.
859  // If 2 input configs match to the same file (that is, same input path),
860  // we don't generate output for duplicate inputs.
861  OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED];
862
863  // Optional. Glossaries to be applied for translation.
864  // It's keyed by target language code.
865  map<string, TranslateTextGlossaryConfig> glossaries = 7
866      [(google.api.field_behavior) = OPTIONAL];
867
868  // Optional. The labels with user-defined metadata for the request.
869  //
870  // Label keys and values can be no longer than 63 characters
871  // (Unicode codepoints), can only contain lowercase letters, numeric
872  // characters, underscores and dashes. International characters are allowed.
873  // Label values are optional. Label keys must start with a letter.
874  //
875  // See https://cloud.google.com/translate/docs/labels for more information.
876  map<string, string> labels = 9 [(google.api.field_behavior) = OPTIONAL];
877}
878
879// State metadata for the batch translation operation.
880message BatchTranslateMetadata {
881  // State of the job.
882  enum State {
883    // Invalid.
884    STATE_UNSPECIFIED = 0;
885
886    // Request is being processed.
887    RUNNING = 1;
888
889    // The batch is processed, and at least one item was successfully
890    // processed.
891    SUCCEEDED = 2;
892
893    // The batch is done and no item was successfully processed.
894    FAILED = 3;
895
896    // Request is in the process of being canceled after caller invoked
897    // longrunning.Operations.CancelOperation on the request id.
898    CANCELLING = 4;
899
900    // The batch is done after the user has called the
901    // longrunning.Operations.CancelOperation. Any records processed before the
902    // cancel command are output as specified in the request.
903    CANCELLED = 5;
904  }
905
906  // The state of the operation.
907  State state = 1;
908
909  // Number of successfully translated characters so far (Unicode codepoints).
910  int64 translated_characters = 2;
911
912  // Number of characters that have failed to process so far (Unicode
913  // codepoints).
914  int64 failed_characters = 3;
915
916  // Total number of characters (Unicode codepoints).
917  // This is the total number of codepoints from input files times the number of
918  // target languages and appears here shortly after the call is submitted.
919  int64 total_characters = 4;
920
921  // Time when the operation was submitted.
922  google.protobuf.Timestamp submit_time = 5;
923}
924
925// Stored in the
926// [google.longrunning.Operation.response][google.longrunning.Operation.response]
927// field returned by BatchTranslateText if at least one sentence is translated
928// successfully.
929message BatchTranslateResponse {
930  // Total number of characters (Unicode codepoints).
931  int64 total_characters = 1;
932
933  // Number of successfully translated characters (Unicode codepoints).
934  int64 translated_characters = 2;
935
936  // Number of characters that have failed to process (Unicode codepoints).
937  int64 failed_characters = 3;
938
939  // Time when the operation was submitted.
940  google.protobuf.Timestamp submit_time = 4;
941
942  // The time when the operation is finished and
943  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
944  // set to true.
945  google.protobuf.Timestamp end_time = 5;
946}
947
948// Input configuration for glossaries.
949message GlossaryInputConfig {
950  // Required. Specify the input.
951  oneof source {
952    // Required. Google Cloud Storage location of glossary data.
953    // File format is determined based on the filename extension. API returns
954    // [google.rpc.Code.INVALID_ARGUMENT] for unsupported URI-s and file
955    // formats. Wildcards are not allowed. This must be a single file in one of
956    // the following formats:
957    //
958    // For unidirectional glossaries:
959    //
960    // - TSV/CSV (`.tsv`/`.csv`): 2 column file, tab- or comma-separated.
961    //   The first column is source text. The second column is target text.
962    //   The file must not contain headers. That is, the first row is data, not
963    //   column names.
964    //
965    // - TMX (`.tmx`): TMX file with parallel data defining source/target term
966    // pairs.
967    //
968    // For equivalent term sets glossaries:
969    //
970    // - CSV (`.csv`): Multi-column CSV file defining equivalent glossary terms
971    //   in multiple languages. See documentation for more information -
972    //   [glossaries](https://cloud.google.com/translate/docs/advanced/glossary).
973    GcsSource gcs_source = 1;
974  }
975}
976
977// Represents a glossary built from user provided data.
978message Glossary {
979  option (google.api.resource) = {
980    type: "translate.googleapis.com/Glossary"
981    pattern: "projects/{project}/locations/{location}/glossaries/{glossary}"
982  };
983
984  // Used with unidirectional glossaries.
985  message LanguageCodePair {
986    // Required. The BCP-47 language code of the input text, for example,
987    // "en-US". Expected to be an exact match for GlossaryTerm.language_code.
988    string source_language_code = 1;
989
990    // Required. The BCP-47 language code for translation output, for example,
991    // "zh-CN". Expected to be an exact match for GlossaryTerm.language_code.
992    string target_language_code = 2;
993  }
994
995  // Used with equivalent term set glossaries.
996  message LanguageCodesSet {
997    // The BCP-47 language code(s) for terms defined in the glossary.
998    // All entries are unique. The list contains at least two entries.
999    // Expected to be an exact match for GlossaryTerm.language_code.
1000    repeated string language_codes = 1;
1001  }
1002
1003  // Required. The resource name of the glossary. Glossary names have the form
1004  // `projects/{project-number-or-id}/locations/{location-id}/glossaries/{glossary-id}`.
1005  string name = 1 [(google.api.field_behavior) = REQUIRED];
1006
1007  // Languages supported by the glossary.
1008  oneof languages {
1009    // Used with unidirectional glossaries.
1010    LanguageCodePair language_pair = 3;
1011
1012    // Used with equivalent term set glossaries.
1013    LanguageCodesSet language_codes_set = 4;
1014  }
1015
1016  // Required. Provides examples to build the glossary from.
1017  // Total glossary must not exceed 10M Unicode codepoints.
1018  GlossaryInputConfig input_config = 5;
1019
1020  // Output only. The number of entries defined in the glossary.
1021  int32 entry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
1022
1023  // Output only. When CreateGlossary was called.
1024  google.protobuf.Timestamp submit_time = 7
1025      [(google.api.field_behavior) = OUTPUT_ONLY];
1026
1027  // Output only. When the glossary creation was finished.
1028  google.protobuf.Timestamp end_time = 8
1029      [(google.api.field_behavior) = OUTPUT_ONLY];
1030}
1031
1032// Request message for CreateGlossary.
1033message CreateGlossaryRequest {
1034  // Required. The project name.
1035  string parent = 1 [
1036    (google.api.field_behavior) = REQUIRED,
1037    (google.api.resource_reference) = {
1038      type: "locations.googleapis.com/Location"
1039    }
1040  ];
1041
1042  // Required. The glossary to create.
1043  Glossary glossary = 2 [(google.api.field_behavior) = REQUIRED];
1044}
1045
1046// Request message for GetGlossary.
1047message GetGlossaryRequest {
1048  // Required. The name of the glossary to retrieve.
1049  string name = 1 [
1050    (google.api.field_behavior) = REQUIRED,
1051    (google.api.resource_reference) = {
1052      type: "translate.googleapis.com/Glossary"
1053    }
1054  ];
1055}
1056
1057// Request message for DeleteGlossary.
1058message DeleteGlossaryRequest {
1059  // Required. The name of the glossary to delete.
1060  string name = 1 [
1061    (google.api.field_behavior) = REQUIRED,
1062    (google.api.resource_reference) = {
1063      type: "translate.googleapis.com/Glossary"
1064    }
1065  ];
1066}
1067
1068// Request message for ListGlossaries.
1069message ListGlossariesRequest {
1070  // Required. The name of the project from which to list all of the glossaries.
1071  string parent = 1 [
1072    (google.api.field_behavior) = REQUIRED,
1073    (google.api.resource_reference) = {
1074      type: "locations.googleapis.com/Location"
1075    }
1076  ];
1077
1078  // Optional. Requested page size. The server may return fewer glossaries than
1079  // requested. If unspecified, the server picks an appropriate default.
1080  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
1081
1082  // Optional. A token identifying a page of results the server should return.
1083  // Typically, this is the value of [ListGlossariesResponse.next_page_token]
1084  // returned from the previous call to `ListGlossaries` method.
1085  // The first page is returned if `page_token`is empty or missing.
1086  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
1087
1088  // Optional. Filter specifying constraints of a list operation.
1089  // Specify the constraint by the format of "key=value", where key must be
1090  // "src" or "tgt", and the value must be a valid language code.
1091  // For multiple restrictions, concatenate them by "AND" (uppercase only),
1092  // such as: "src=en-US AND tgt=zh-CN". Notice that the exact match is used
1093  // here, which means using 'en-US' and 'en' can lead to different results,
1094  // which depends on the language code you used when you create the glossary.
1095  // For the unidirectional glossaries, the "src" and "tgt" add restrictions
1096  // on the source and target language code separately.
1097  // For the equivalent term set glossaries, the "src" and/or "tgt" add
1098  // restrictions on the term set.
1099  // For example: "src=en-US AND tgt=zh-CN" will only pick the unidirectional
1100  // glossaries which exactly match the source language code as "en-US" and the
1101  // target language code "zh-CN", but all equivalent term set glossaries which
1102  // contain "en-US" and "zh-CN" in their language set will be picked.
1103  // If missing, no filtering is performed.
1104  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
1105}
1106
1107// Response message for ListGlossaries.
1108message ListGlossariesResponse {
1109  // The list of glossaries for a project.
1110  repeated Glossary glossaries = 1;
1111
1112  // A token to retrieve a page of results. Pass this value in the
1113  // [ListGlossariesRequest.page_token] field in the subsequent call to
1114  // `ListGlossaries` method to retrieve the next page of results.
1115  string next_page_token = 2;
1116}
1117
1118// Stored in the
1119// [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata]
1120// field returned by CreateGlossary.
1121message CreateGlossaryMetadata {
1122  // Enumerates the possible states that the creation request can be in.
1123  enum State {
1124    // Invalid.
1125    STATE_UNSPECIFIED = 0;
1126
1127    // Request is being processed.
1128    RUNNING = 1;
1129
1130    // The glossary was successfully created.
1131    SUCCEEDED = 2;
1132
1133    // Failed to create the glossary.
1134    FAILED = 3;
1135
1136    // Request is in the process of being canceled after caller invoked
1137    // longrunning.Operations.CancelOperation on the request id.
1138    CANCELLING = 4;
1139
1140    // The glossary creation request was successfully canceled.
1141    CANCELLED = 5;
1142  }
1143
1144  // The name of the glossary that is being created.
1145  string name = 1;
1146
1147  // The current state of the glossary creation operation.
1148  State state = 2;
1149
1150  // The time when the operation was submitted to the server.
1151  google.protobuf.Timestamp submit_time = 3;
1152}
1153
1154// Stored in the
1155// [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata]
1156// field returned by DeleteGlossary.
1157message DeleteGlossaryMetadata {
1158  // Enumerates the possible states that the creation request can be in.
1159  enum State {
1160    // Invalid.
1161    STATE_UNSPECIFIED = 0;
1162
1163    // Request is being processed.
1164    RUNNING = 1;
1165
1166    // The glossary was successfully deleted.
1167    SUCCEEDED = 2;
1168
1169    // Failed to delete the glossary.
1170    FAILED = 3;
1171
1172    // Request is in the process of being canceled after caller invoked
1173    // longrunning.Operations.CancelOperation on the request id.
1174    CANCELLING = 4;
1175
1176    // The glossary deletion request was successfully canceled.
1177    CANCELLED = 5;
1178  }
1179
1180  // The name of the glossary that is being deleted.
1181  string name = 1;
1182
1183  // The current state of the glossary deletion operation.
1184  State state = 2;
1185
1186  // The time when the operation was submitted to the server.
1187  google.protobuf.Timestamp submit_time = 3;
1188}
1189
1190// Stored in the
1191// [google.longrunning.Operation.response][google.longrunning.Operation.response]
1192// field returned by DeleteGlossary.
1193message DeleteGlossaryResponse {
1194  // The name of the deleted glossary.
1195  string name = 1;
1196
1197  // The time when the operation was submitted to the server.
1198  google.protobuf.Timestamp submit_time = 2;
1199
1200  // The time when the glossary deletion is finished and
1201  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
1202  // set to true.
1203  google.protobuf.Timestamp end_time = 3;
1204}
1205
1206// The BatchTranslateDocument request.
1207message BatchTranslateDocumentRequest {
1208  // Required. Location to make a regional call.
1209  //
1210  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
1211  //
1212  // The `global` location is not supported for batch translation.
1213  //
1214  // Only AutoML Translation models or glossaries within the same region (have
1215  // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400)
1216  // error is returned.
1217  string parent = 1 [
1218    (google.api.field_behavior) = REQUIRED,
1219    (google.api.resource_reference) = {
1220      type: "locations.googleapis.com/Location"
1221    }
1222  ];
1223
1224  // Required. The BCP-47 language code of the input document if known, for
1225  // example, "en-US" or "sr-Latn". Supported language codes are listed in
1226  // [Language Support](https://cloud.google.com/translate/docs/languages).
1227  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
1228
1229  // Required. The BCP-47 language code to use for translation of the input
1230  // document. Specify up to 10 language codes here.
1231  repeated string target_language_codes = 3
1232      [(google.api.field_behavior) = REQUIRED];
1233
1234  // Required. Input configurations.
1235  // The total number of files matched should be <= 100.
1236  // The total content size to translate should be <= 100M Unicode codepoints.
1237  // The files must use UTF-8 encoding.
1238  repeated BatchDocumentInputConfig input_configs = 4
1239      [(google.api.field_behavior) = REQUIRED];
1240
1241  // Required. Output configuration.
1242  // If 2 input configs match to the same file (that is, same input path),
1243  // we don't generate output for duplicate inputs.
1244  BatchDocumentOutputConfig output_config = 5
1245      [(google.api.field_behavior) = REQUIRED];
1246
1247  // Optional. The models to use for translation. Map's key is target language
1248  // code. Map's value is the model name. Value can be a built-in general model,
1249  // or an AutoML Translation model.
1250  //
1251  // The value format depends on model type:
1252  //
1253  // - AutoML Translation models:
1254  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
1255  //
1256  // - General (built-in) models:
1257  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
1258  //
1259  //
1260  // If the map is empty or a specific model is not requested for a language
1261  // pair, then default google model (nmt) is used.
1262  map<string, string> models = 6 [(google.api.field_behavior) = OPTIONAL];
1263
1264  // Optional. Glossaries to be applied. It's keyed by target language code.
1265  map<string, TranslateTextGlossaryConfig> glossaries = 7
1266      [(google.api.field_behavior) = OPTIONAL];
1267
1268  // Optional. File format conversion map to be applied to all input files.
1269  // Map's key is the original mime_type. Map's value is the target mime_type of
1270  // translated documents.
1271  //
1272  // Supported file format conversion includes:
1273  // - `application/pdf` to
1274  //   `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
1275  //
1276  // If nothing specified, output files will be in the same format as the
1277  // original file.
1278  map<string, string> format_conversions = 8
1279      [(google.api.field_behavior) = OPTIONAL];
1280
1281  // Optional. This flag is to support user customized attribution.
1282  // If not provided, the default is `Machine Translated by Google`.
1283  // Customized attribution should follow rules in
1284  // https://cloud.google.com/translate/attribution#attribution_and_logos
1285  string customized_attribution = 10 [(google.api.field_behavior) = OPTIONAL];
1286
1287  // Optional. If true, use the text removal server to remove the shadow text on
1288  // background image for native pdf translation.
1289  // Shadow removal feature can only be enabled when
1290  // is_translate_native_pdf_only: false && pdf_native_only: false
1291  bool enable_shadow_removal_native_pdf = 11
1292      [(google.api.field_behavior) = OPTIONAL];
1293
1294  // Optional. If true, enable auto rotation correction in DVS.
1295  bool enable_rotation_correction = 12 [(google.api.field_behavior) = OPTIONAL];
1296}
1297
1298// Input configuration for BatchTranslateDocument request.
1299message BatchDocumentInputConfig {
1300  // Specify the input.
1301  oneof source {
1302    // Google Cloud Storage location for the source input.
1303    // This can be a single file (for example,
1304    // `gs://translation-test/input.docx`) or a wildcard (for example,
1305    // `gs://translation-test/*`).
1306    //
1307    // File mime type is determined based on extension. Supported mime type
1308    // includes:
1309    // - `pdf`, application/pdf
1310    // - `docx`,
1311    // application/vnd.openxmlformats-officedocument.wordprocessingml.document
1312    // - `pptx`,
1313    // application/vnd.openxmlformats-officedocument.presentationml.presentation
1314    // - `xlsx`,
1315    // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
1316    //
1317    // The max file size to support for `.docx`, `.pptx` and `.xlsx` is 100MB.
1318    // The max file size to support for `.pdf` is 1GB and the max page limit is
1319    // 1000 pages.
1320    // The max file size to support for all input documents is 1GB.
1321    GcsSource gcs_source = 1;
1322  }
1323}
1324
1325// Output configuration for BatchTranslateDocument request.
1326message BatchDocumentOutputConfig {
1327  // The destination of output. The destination directory provided must exist
1328  // and be empty.
1329  oneof destination {
1330    // Google Cloud Storage destination for output content.
1331    // For every single input document (for example, gs://a/b/c.[extension]), we
1332    // generate at most 2 * n output files. (n is the # of target_language_codes
1333    // in the BatchTranslateDocumentRequest).
1334    //
1335    // While the input documents are being processed, we write/update an index
1336    // file `index.csv` under `gcs_destination.output_uri_prefix` (for example,
1337    // gs://translation_output/index.csv) The index file is generated/updated as
1338    // new files are being translated. The format is:
1339    //
1340    // input_document,target_language_code,translation_output,error_output,
1341    // glossary_translation_output,glossary_error_output
1342    //
1343    // `input_document` is one file we matched using gcs_source.input_uri.
1344    // `target_language_code` is provided in the request.
1345    // `translation_output` contains the translations. (details provided below)
1346    // `error_output` contains the error message during processing of the file.
1347    // Both translations_file and errors_file could be empty strings if we have
1348    // no content to output.
1349    // `glossary_translation_output` and `glossary_error_output` are the
1350    // translated output/error when we apply glossaries. They could also be
1351    // empty if we have no content to output.
1352    //
1353    // Once a row is present in index.csv, the input/output matching never
1354    // changes. Callers should also expect all the content in input_file are
1355    // processed and ready to be consumed (that is, no partial output file is
1356    // written).
1357    //
1358    // Since index.csv will be keeping updated during the process, please make
1359    // sure there is no custom retention policy applied on the output bucket
1360    // that may avoid file updating.
1361    // (https://cloud.google.com/storage/docs/bucket-lock#retention-policy)
1362    //
1363    // The naming format of translation output files follows (for target
1364    // language code [trg]): `translation_output`:
1365    // `gs://translation_output/a_b_c_[trg]_translation.[extension]`
1366    // `glossary_translation_output`:
1367    // `gs://translation_test/a_b_c_[trg]_glossary_translation.[extension]`. The
1368    // output document will maintain the same file format as the input document.
1369    //
1370    // The naming format of error output files follows (for target language code
1371    // [trg]): `error_output`: `gs://translation_test/a_b_c_[trg]_errors.txt`
1372    // `glossary_error_output`:
1373    // `gs://translation_test/a_b_c_[trg]_glossary_translation.txt` The error
1374    // output is a txt file containing error details.
1375    GcsDestination gcs_destination = 1;
1376  }
1377}
1378
1379// Stored in the
1380// [google.longrunning.Operation.response][google.longrunning.Operation.response]
1381// field returned by BatchTranslateDocument if at least one document is
1382// translated successfully.
1383message BatchTranslateDocumentResponse {
1384  // Total number of pages to translate in all documents. Documents without
1385  // clear page definition (such as XLSX) are not counted.
1386  int64 total_pages = 1;
1387
1388  // Number of successfully translated pages in all documents. Documents without
1389  // clear page definition (such as XLSX) are not counted.
1390  int64 translated_pages = 2;
1391
1392  // Number of pages that failed to process in all documents. Documents without
1393  // clear page definition (such as XLSX) are not counted.
1394  int64 failed_pages = 3;
1395
1396  // Number of billable pages in documents with clear page definition (such as
1397  // PDF, DOCX, PPTX)
1398  int64 total_billable_pages = 4;
1399
1400  // Total number of characters (Unicode codepoints) in all documents.
1401  int64 total_characters = 5;
1402
1403  // Number of successfully translated characters (Unicode codepoints) in all
1404  // documents.
1405  int64 translated_characters = 6;
1406
1407  // Number of characters that have failed to process (Unicode codepoints) in
1408  // all documents.
1409  int64 failed_characters = 7;
1410
1411  // Number of billable characters (Unicode codepoints) in documents without
1412  // clear page definition, such as XLSX.
1413  int64 total_billable_characters = 8;
1414
1415  // Time when the operation was submitted.
1416  google.protobuf.Timestamp submit_time = 9;
1417
1418  // The time when the operation is finished and
1419  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
1420  // set to true.
1421  google.protobuf.Timestamp end_time = 10;
1422}
1423
1424// State metadata for the batch translation operation.
1425message BatchTranslateDocumentMetadata {
1426  // State of the job.
1427  enum State {
1428    // Invalid.
1429    STATE_UNSPECIFIED = 0;
1430
1431    // Request is being processed.
1432    RUNNING = 1;
1433
1434    // The batch is processed, and at least one item was successfully processed.
1435    SUCCEEDED = 2;
1436
1437    // The batch is done and no item was successfully processed.
1438    FAILED = 3;
1439
1440    // Request is in the process of being canceled after caller invoked
1441    // longrunning.Operations.CancelOperation on the request id.
1442    CANCELLING = 4;
1443
1444    // The batch is done after the user has called the
1445    // longrunning.Operations.CancelOperation. Any records processed before the
1446    // cancel command are output as specified in the request.
1447    CANCELLED = 5;
1448  }
1449
1450  // The state of the operation.
1451  State state = 1;
1452
1453  // Total number of pages to translate in all documents so far. Documents
1454  // without clear page definition (such as XLSX) are not counted.
1455  int64 total_pages = 2;
1456
1457  // Number of successfully translated pages in all documents so far. Documents
1458  // without clear page definition (such as XLSX) are not counted.
1459  int64 translated_pages = 3;
1460
1461  // Number of pages that failed to process in all documents so far. Documents
1462  // without clear page definition (such as XLSX) are not counted.
1463  int64 failed_pages = 4;
1464
1465  // Number of billable pages in documents with clear page definition (such as
1466  // PDF, DOCX, PPTX) so far.
1467  int64 total_billable_pages = 5;
1468
1469  // Total number of characters (Unicode codepoints) in all documents so far.
1470  int64 total_characters = 6;
1471
1472  // Number of successfully translated characters (Unicode codepoints) in all
1473  // documents so far.
1474  int64 translated_characters = 7;
1475
1476  // Number of characters that have failed to process (Unicode codepoints) in
1477  // all documents so far.
1478  int64 failed_characters = 8;
1479
1480  // Number of billable characters (Unicode codepoints) in documents without
1481  // clear page definition (such as XLSX) so far.
1482  int64 total_billable_characters = 9;
1483
1484  // Time when the operation was submitted.
1485  google.protobuf.Timestamp submit_time = 10;
1486}
1487