xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1p4beta1/text_annotation.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.vision.v1p4beta1;
19
20import "google/cloud/vision/v1p4beta1/geometry.proto";
21
22option cc_enable_arenas = true;
23option go_package = "cloud.google.com/go/vision/apiv1p4beta1/visionpb;visionpb";
24option java_multiple_files = true;
25option java_outer_classname = "TextAnnotationProto";
26option java_package = "com.google.cloud.vision.v1p4beta1";
27option objc_class_prefix = "GCVN";
28
29// TextAnnotation contains a structured representation of OCR extracted text.
30// The hierarchy of an OCR extracted text structure is like this:
31//     TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
32// Each structural component, starting from Page, may further have their own
33// properties. Properties describe detected languages, breaks etc.. Please refer
34// to the
35// [TextAnnotation.TextProperty][google.cloud.vision.v1p4beta1.TextAnnotation.TextProperty]
36// message definition below for more detail.
37message TextAnnotation {
38  // Detected language for a structural component.
39  message DetectedLanguage {
40    // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
41    // information, see
42    // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
43    string language_code = 1;
44
45    // Confidence of detected language. Range [0, 1].
46    float confidence = 2;
47  }
48
49  // Detected start or end of a structural component.
50  message DetectedBreak {
51    // Enum to denote the type of break found. New line, space etc.
52    enum BreakType {
53      // Unknown break label type.
54      UNKNOWN = 0;
55
56      // Regular space.
57      SPACE = 1;
58
59      // Sure space (very wide).
60      SURE_SPACE = 2;
61
62      // Line-wrapping break.
63      EOL_SURE_SPACE = 3;
64
65      // End-line hyphen that is not present in text; does not co-occur with
66      // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
67      HYPHEN = 4;
68
69      // Line break that ends a paragraph.
70      LINE_BREAK = 5;
71    }
72
73    // Detected break type.
74    BreakType type = 1;
75
76    // True if break prepends the element.
77    bool is_prefix = 2;
78  }
79
80  // Additional information detected on the structural component.
81  message TextProperty {
82    // A list of detected languages together with confidence.
83    repeated DetectedLanguage detected_languages = 1;
84
85    // Detected start or end of a text segment.
86    DetectedBreak detected_break = 2;
87  }
88
89  // List of pages detected by OCR.
90  repeated Page pages = 1;
91
92  // UTF-8 text detected on the pages.
93  string text = 2;
94}
95
96// Detected page from OCR.
97message Page {
98  // Additional information detected on the page.
99  TextAnnotation.TextProperty property = 1;
100
101  // Page width. For PDFs the unit is points. For images (including
102  // TIFFs) the unit is pixels.
103  int32 width = 2;
104
105  // Page height. For PDFs the unit is points. For images (including
106  // TIFFs) the unit is pixels.
107  int32 height = 3;
108
109  // List of blocks of text, images etc on this page.
110  repeated Block blocks = 4;
111
112  // Confidence of the OCR results on the page. Range [0, 1].
113  float confidence = 5;
114}
115
116// Logical element on the page.
117message Block {
118  // Type of a block (text, image etc) as identified by OCR.
119  enum BlockType {
120    // Unknown block type.
121    UNKNOWN = 0;
122
123    // Regular text block.
124    TEXT = 1;
125
126    // Table block.
127    TABLE = 2;
128
129    // Image block.
130    PICTURE = 3;
131
132    // Horizontal/vertical line box.
133    RULER = 4;
134
135    // Barcode block.
136    BARCODE = 5;
137  }
138
139  // Additional information detected for the block.
140  TextAnnotation.TextProperty property = 1;
141
142  // The bounding box for the block.
143  // The vertices are in the order of top-left, top-right, bottom-right,
144  // bottom-left. When a rotation of the bounding box is detected the rotation
145  // is represented as around the top-left corner as defined when the text is
146  // read in the 'natural' orientation.
147  // For example:
148  //
149  // * when the text is horizontal it might look like:
150  //
151  //         0----1
152  //         |    |
153  //         3----2
154  //
155  // * when it's rotated 180 degrees around the top-left corner it becomes:
156  //
157  //         2----3
158  //         |    |
159  //         1----0
160  //
161  //   and the vertex order will still be (0, 1, 2, 3).
162  BoundingPoly bounding_box = 2;
163
164  // List of paragraphs in this block (if this blocks is of type text).
165  repeated Paragraph paragraphs = 3;
166
167  // Detected block type (text, image etc) for this block.
168  BlockType block_type = 4;
169
170  // Confidence of the OCR results on the block. Range [0, 1].
171  float confidence = 5;
172}
173
174// Structural unit of text representing a number of words in certain order.
175message Paragraph {
176  // Additional information detected for the paragraph.
177  TextAnnotation.TextProperty property = 1;
178
179  // The bounding box for the paragraph.
180  // The vertices are in the order of top-left, top-right, bottom-right,
181  // bottom-left. When a rotation of the bounding box is detected the rotation
182  // is represented as around the top-left corner as defined when the text is
183  // read in the 'natural' orientation.
184  // For example:
185  //   * when the text is horizontal it might look like:
186  //      0----1
187  //      |    |
188  //      3----2
189  //   * when it's rotated 180 degrees around the top-left corner it becomes:
190  //      2----3
191  //      |    |
192  //      1----0
193  //   and the vertex order will still be (0, 1, 2, 3).
194  BoundingPoly bounding_box = 2;
195
196  // List of all words in this paragraph.
197  repeated Word words = 3;
198
199  // Confidence of the OCR results for the paragraph. Range [0, 1].
200  float confidence = 4;
201}
202
203// A word representation.
204message Word {
205  // Additional information detected for the word.
206  TextAnnotation.TextProperty property = 1;
207
208  // The bounding box for the word.
209  // The vertices are in the order of top-left, top-right, bottom-right,
210  // bottom-left. When a rotation of the bounding box is detected the rotation
211  // is represented as around the top-left corner as defined when the text is
212  // read in the 'natural' orientation.
213  // For example:
214  //   * when the text is horizontal it might look like:
215  //      0----1
216  //      |    |
217  //      3----2
218  //   * when it's rotated 180 degrees around the top-left corner it becomes:
219  //      2----3
220  //      |    |
221  //      1----0
222  //   and the vertex order will still be (0, 1, 2, 3).
223  BoundingPoly bounding_box = 2;
224
225  // List of symbols in the word.
226  // The order of the symbols follows the natural reading order.
227  repeated Symbol symbols = 3;
228
229  // Confidence of the OCR results for the word. Range [0, 1].
230  float confidence = 4;
231}
232
233// A single symbol representation.
234message Symbol {
235  // Additional information detected for the symbol.
236  TextAnnotation.TextProperty property = 1;
237
238  // The bounding box for the symbol.
239  // The vertices are in the order of top-left, top-right, bottom-right,
240  // bottom-left. When a rotation of the bounding box is detected the rotation
241  // is represented as around the top-left corner as defined when the text is
242  // read in the 'natural' orientation.
243  // For example:
244  //   * when the text is horizontal it might look like:
245  //      0----1
246  //      |    |
247  //      3----2
248  //   * when it's rotated 180 degrees around the top-left corner it becomes:
249  //      2----3
250  //      |    |
251  //      1----0
252  //   and the vertex order will still be (0, 1, 2, 3).
253  BoundingPoly bounding_box = 2;
254
255  // The actual UTF-8 representation of the symbol.
256  string text = 3;
257
258  // Confidence of the OCR results for the symbol. Range [0, 1].
259  float confidence = 4;
260}
261