1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.discoveryengine.v1beta; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21 22option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta"; 23option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb"; 24option java_multiple_files = true; 25option java_outer_classname = "DocumentProcessingConfigProto"; 26option java_package = "com.google.cloud.discoveryengine.v1beta"; 27option objc_class_prefix = "DISCOVERYENGINE"; 28option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta"; 29option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta"; 30 31// A singleton resource of 32// [DataStore][google.cloud.discoveryengine.v1beta.DataStore]. It's empty when 33// [DataStore][google.cloud.discoveryengine.v1beta.DataStore] is created, which 34// defaults to digital parser. The first call to 35// [DataStoreService.UpdateDocumentProcessingConfig][] method will initialize 36// the config. 37message DocumentProcessingConfig { 38 option (google.api.resource) = { 39 type: "discoveryengine.googleapis.com/DocumentProcessingConfig" 40 pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/documentProcessingConfig" 41 pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/documentProcessingConfig" 42 }; 43 44 // Related configurations applied to a specific type of document parser. 45 message ParsingConfig { 46 // The digital parsing configurations for documents. 47 message DigitalParsingConfig {} 48 49 // The OCR parsing configurations for documents. 50 message OcrParsingConfig { 51 // [DEPRECATED] This field is deprecated. To use the additional enhanced 52 // document elements processing, please switch to `layout_parsing_config`. 53 repeated string enhanced_document_elements = 1 [deprecated = true]; 54 55 // If true, will use native text instead of OCR text on pages containing 56 // native text. 57 bool use_native_text = 2; 58 } 59 60 // Configs for document processing types. 61 oneof type_dedicated_config { 62 // Configurations applied to digital parser. 63 DigitalParsingConfig digital_parsing_config = 1; 64 65 // Configurations applied to OCR parser. Currently it only applies to 66 // PDFs. 67 OcrParsingConfig ocr_parsing_config = 2; 68 } 69 } 70 71 // The full resource name of the Document Processing Config. 72 // Format: 73 // `projects/*/locations/*/collections/*/dataStores/*/documentProcessingConfig`. 74 string name = 1; 75 76 // Configurations for default Document parser. 77 // If not specified, we will configure it as default DigitalParsingConfig, and 78 // the default parsing config will be applied to all file types for Document 79 // parsing. 80 ParsingConfig default_parsing_config = 4; 81 82 // Map from file type to override the default parsing configuration based on 83 // the file type. Supported keys: 84 // * `pdf`: Override parsing config for PDF files, either digital parsing, ocr 85 // parsing or layout parsing is supported. 86 // * `html`: Override parsing config for HTML files, only digital parsing and 87 // or layout parsing are supported. 88 // * `docx`: Override parsing config for DOCX files, only digital parsing and 89 // or layout parsing are supported. 90 map<string, ParsingConfig> parsing_config_overrides = 5; 91} 92