1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.discoveryengine.v1beta;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21
22option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta";
23option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb";
24option java_multiple_files = true;
25option java_outer_classname = "DocumentProcessingConfigProto";
26option java_package = "com.google.cloud.discoveryengine.v1beta";
27option objc_class_prefix = "DISCOVERYENGINE";
28option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta";
29option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta";
30
31// A singleton resource of
32// [DataStore][google.cloud.discoveryengine.v1beta.DataStore]. It's empty when
33// [DataStore][google.cloud.discoveryengine.v1beta.DataStore] is created, which
34// defaults to digital parser. The first call to
35// [DataStoreService.UpdateDocumentProcessingConfig][] method will initialize
36// the config.
37message DocumentProcessingConfig {
38  option (google.api.resource) = {
39    type: "discoveryengine.googleapis.com/DocumentProcessingConfig"
40    pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/documentProcessingConfig"
41    pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/documentProcessingConfig"
42  };
43
44  // Related configurations applied to a specific type of document parser.
45  message ParsingConfig {
46    // The digital parsing configurations for documents.
47    message DigitalParsingConfig {}
48
49    // The OCR parsing configurations for documents.
50    message OcrParsingConfig {
51      // [DEPRECATED] This field is deprecated. To use the additional enhanced
52      // document elements processing, please switch to `layout_parsing_config`.
53      repeated string enhanced_document_elements = 1 [deprecated = true];
54
55      // If true, will use native text instead of OCR text on pages containing
56      // native text.
57      bool use_native_text = 2;
58    }
59
60    // Configs for document processing types.
61    oneof type_dedicated_config {
62      // Configurations applied to digital parser.
63      DigitalParsingConfig digital_parsing_config = 1;
64
65      // Configurations applied to OCR parser. Currently it only applies to
66      // PDFs.
67      OcrParsingConfig ocr_parsing_config = 2;
68    }
69  }
70
71  // The full resource name of the Document Processing Config.
72  // Format:
73  // `projects/*/locations/*/collections/*/dataStores/*/documentProcessingConfig`.
74  string name = 1;
75
76  // Configurations for default Document parser.
77  // If not specified, we will configure it as default DigitalParsingConfig, and
78  // the default parsing config will be applied to all file types for Document
79  // parsing.
80  ParsingConfig default_parsing_config = 4;
81
82  // Map from file type to override the default parsing configuration based on
83  // the file type. Supported keys:
84  // * `pdf`: Override parsing config for PDF files, either digital parsing, ocr
85  // parsing or layout parsing is supported.
86  // * `html`: Override parsing config for HTML files, only digital parsing and
87  // or layout parsing are supported.
88  // * `docx`: Override parsing config for DOCX files, only digital parsing and
89  // or layout parsing are supported.
90  map<string, ParsingConfig> parsing_config_overrides = 5;
91}
92