xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1/text_annotation.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.vision.v1;
18
19import "google/cloud/vision/v1/geometry.proto";
20
21option cc_enable_arenas = true;
22option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb";
23option java_multiple_files = true;
24option java_outer_classname = "TextAnnotationProto";
25option java_package = "com.google.cloud.vision.v1";
26option objc_class_prefix = "GCVN";
27
28// TextAnnotation contains a structured representation of OCR extracted text.
29// The hierarchy of an OCR extracted text structure is like this:
30//     TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
31// Each structural component, starting from Page, may further have their own
32// properties. Properties describe detected languages, breaks etc.. Please refer
33// to the
34// [TextAnnotation.TextProperty][google.cloud.vision.v1.TextAnnotation.TextProperty]
35// message definition below for more detail.
36message TextAnnotation {
37  // Detected language for a structural component.
38  message DetectedLanguage {
39    // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
40    // information, see
41    // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
42    string language_code = 1;
43
44    // Confidence of detected language. Range [0, 1].
45    float confidence = 2;
46  }
47
48  // Detected start or end of a structural component.
49  message DetectedBreak {
50    // Enum to denote the type of break found. New line, space etc.
51    enum BreakType {
52      // Unknown break label type.
53      UNKNOWN = 0;
54
55      // Regular space.
56      SPACE = 1;
57
58      // Sure space (very wide).
59      SURE_SPACE = 2;
60
61      // Line-wrapping break.
62      EOL_SURE_SPACE = 3;
63
64      // End-line hyphen that is not present in text; does not co-occur with
65      // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
66      HYPHEN = 4;
67
68      // Line break that ends a paragraph.
69      LINE_BREAK = 5;
70    }
71
72    // Detected break type.
73    BreakType type = 1;
74
75    // True if break prepends the element.
76    bool is_prefix = 2;
77  }
78
79  // Additional information detected on the structural component.
80  message TextProperty {
81    // A list of detected languages together with confidence.
82    repeated DetectedLanguage detected_languages = 1;
83
84    // Detected start or end of a text segment.
85    DetectedBreak detected_break = 2;
86  }
87
88  // List of pages detected by OCR.
89  repeated Page pages = 1;
90
91  // UTF-8 text detected on the pages.
92  string text = 2;
93}
94
95// Detected page from OCR.
96message Page {
97  // Additional information detected on the page.
98  TextAnnotation.TextProperty property = 1;
99
100  // Page width. For PDFs the unit is points. For images (including
101  // TIFFs) the unit is pixels.
102  int32 width = 2;
103
104  // Page height. For PDFs the unit is points. For images (including
105  // TIFFs) the unit is pixels.
106  int32 height = 3;
107
108  // List of blocks of text, images etc on this page.
109  repeated Block blocks = 4;
110
111  // Confidence of the OCR results on the page. Range [0, 1].
112  float confidence = 5;
113}
114
115// Logical element on the page.
116message Block {
117  // Type of a block (text, image etc) as identified by OCR.
118  enum BlockType {
119    // Unknown block type.
120    UNKNOWN = 0;
121
122    // Regular text block.
123    TEXT = 1;
124
125    // Table block.
126    TABLE = 2;
127
128    // Image block.
129    PICTURE = 3;
130
131    // Horizontal/vertical line box.
132    RULER = 4;
133
134    // Barcode block.
135    BARCODE = 5;
136  }
137
138  // Additional information detected for the block.
139  TextAnnotation.TextProperty property = 1;
140
141  // The bounding box for the block.
142  // The vertices are in the order of top-left, top-right, bottom-right,
143  // bottom-left. When a rotation of the bounding box is detected the rotation
144  // is represented as around the top-left corner as defined when the text is
145  // read in the 'natural' orientation.
146  // For example:
147  //
148  // * when the text is horizontal it might look like:
149  //
150  //         0----1
151  //         |    |
152  //         3----2
153  //
154  // * when it's rotated 180 degrees around the top-left corner it becomes:
155  //
156  //         2----3
157  //         |    |
158  //         1----0
159  //
160  //   and the vertex order will still be (0, 1, 2, 3).
161  BoundingPoly bounding_box = 2;
162
163  // List of paragraphs in this block (if this blocks is of type text).
164  repeated Paragraph paragraphs = 3;
165
166  // Detected block type (text, image etc) for this block.
167  BlockType block_type = 4;
168
169  // Confidence of the OCR results on the block. Range [0, 1].
170  float confidence = 5;
171}
172
173// Structural unit of text representing a number of words in certain order.
174message Paragraph {
175  // Additional information detected for the paragraph.
176  TextAnnotation.TextProperty property = 1;
177
178  // The bounding box for the paragraph.
179  // The vertices are in the order of top-left, top-right, bottom-right,
180  // bottom-left. When a rotation of the bounding box is detected the rotation
181  // is represented as around the top-left corner as defined when the text is
182  // read in the 'natural' orientation.
183  // For example:
184  //   * when the text is horizontal it might look like:
185  //      0----1
186  //      |    |
187  //      3----2
188  //   * when it's rotated 180 degrees around the top-left corner it becomes:
189  //      2----3
190  //      |    |
191  //      1----0
192  //   and the vertex order will still be (0, 1, 2, 3).
193  BoundingPoly bounding_box = 2;
194
195  // List of all words in this paragraph.
196  repeated Word words = 3;
197
198  // Confidence of the OCR results for the paragraph. Range [0, 1].
199  float confidence = 4;
200}
201
202// A word representation.
203message Word {
204  // Additional information detected for the word.
205  TextAnnotation.TextProperty property = 1;
206
207  // The bounding box for the word.
208  // The vertices are in the order of top-left, top-right, bottom-right,
209  // bottom-left. When a rotation of the bounding box is detected the rotation
210  // is represented as around the top-left corner as defined when the text is
211  // read in the 'natural' orientation.
212  // For example:
213  //   * when the text is horizontal it might look like:
214  //      0----1
215  //      |    |
216  //      3----2
217  //   * when it's rotated 180 degrees around the top-left corner it becomes:
218  //      2----3
219  //      |    |
220  //      1----0
221  //   and the vertex order will still be (0, 1, 2, 3).
222  BoundingPoly bounding_box = 2;
223
224  // List of symbols in the word.
225  // The order of the symbols follows the natural reading order.
226  repeated Symbol symbols = 3;
227
228  // Confidence of the OCR results for the word. Range [0, 1].
229  float confidence = 4;
230}
231
232// A single symbol representation.
233message Symbol {
234  // Additional information detected for the symbol.
235  TextAnnotation.TextProperty property = 1;
236
237  // The bounding box for the symbol.
238  // The vertices are in the order of top-left, top-right, bottom-right,
239  // bottom-left. When a rotation of the bounding box is detected the rotation
240  // is represented as around the top-left corner as defined when the text is
241  // read in the 'natural' orientation.
242  // For example:
243  //   * when the text is horizontal it might look like:
244  //      0----1
245  //      |    |
246  //      3----2
247  //   * when it's rotated 180 degrees around the top-left corner it becomes:
248  //      2----3
249  //      |    |
250  //      1----0
251  //   and the vertex order will still be (0, 1, 2, 3).
252  BoundingPoly bounding_box = 2;
253
254  // The actual UTF-8 representation of the symbol.
255  string text = 3;
256
257  // Confidence of the OCR results for the symbol. Range [0, 1].
258  float confidence = 4;
259}
260