xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1p1beta1/text_annotation.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.vision.v1p1beta1;
18
19import "google/cloud/vision/v1p1beta1/geometry.proto";
20
21option cc_enable_arenas = true;
22option go_package = "cloud.google.com/go/vision/v2/apiv1p1beta1/visionpb;visionpb";
23option java_multiple_files = true;
24option java_outer_classname = "TextAnnotationProto";
25option java_package = "com.google.cloud.vision.v1p1beta1";
26
27// TextAnnotation contains a structured representation of OCR extracted text.
28// The hierarchy of an OCR extracted text structure is like this:
29//     TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol
30// Each structural component, starting from Page, may further have their own
31// properties. Properties describe detected languages, breaks etc.. Please refer
32// to the
33// [TextAnnotation.TextProperty][google.cloud.vision.v1p1beta1.TextAnnotation.TextProperty]
34// message definition below for more detail.
35message TextAnnotation {
36  // Detected language for a structural component.
37  message DetectedLanguage {
38    // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
39    // information, see
40    // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
41    string language_code = 1;
42
43    // Confidence of detected language. Range [0, 1].
44    float confidence = 2;
45  }
46
47  // Detected start or end of a structural component.
48  message DetectedBreak {
49    // Enum to denote the type of break found. New line, space etc.
50    enum BreakType {
51      // Unknown break label type.
52      UNKNOWN = 0;
53
54      // Regular space.
55      SPACE = 1;
56
57      // Sure space (very wide).
58      SURE_SPACE = 2;
59
60      // Line-wrapping break.
61      EOL_SURE_SPACE = 3;
62
63      // End-line hyphen that is not present in text; does not co-occur with
64      // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
65      HYPHEN = 4;
66
67      // Line break that ends a paragraph.
68      LINE_BREAK = 5;
69    }
70
71    // Detected break type.
72    BreakType type = 1;
73
74    // True if break prepends the element.
75    bool is_prefix = 2;
76  }
77
78  // Additional information detected on the structural component.
79  message TextProperty {
80    // A list of detected languages together with confidence.
81    repeated DetectedLanguage detected_languages = 1;
82
83    // Detected start or end of a text segment.
84    DetectedBreak detected_break = 2;
85  }
86
87  // List of pages detected by OCR.
88  repeated Page pages = 1;
89
90  // UTF-8 text detected on the pages.
91  string text = 2;
92}
93
94// Detected page from OCR.
95message Page {
96  // Additional information detected on the page.
97  TextAnnotation.TextProperty property = 1;
98
99  // Page width in pixels.
100  int32 width = 2;
101
102  // Page height in pixels.
103  int32 height = 3;
104
105  // List of blocks of text, images etc on this page.
106  repeated Block blocks = 4;
107
108  // Confidence of the OCR results on the page. Range [0, 1].
109  float confidence = 5;
110}
111
112// Logical element on the page.
113message Block {
114  // Type of a block (text, image etc) as identified by OCR.
115  enum BlockType {
116    // Unknown block type.
117    UNKNOWN = 0;
118
119    // Regular text block.
120    TEXT = 1;
121
122    // Table block.
123    TABLE = 2;
124
125    // Image block.
126    PICTURE = 3;
127
128    // Horizontal/vertical line box.
129    RULER = 4;
130
131    // Barcode block.
132    BARCODE = 5;
133  }
134
135  // Additional information detected for the block.
136  TextAnnotation.TextProperty property = 1;
137
138  // The bounding box for the block.
139  // The vertices are in the order of top-left, top-right, bottom-right,
140  // bottom-left. When a rotation of the bounding box is detected the rotation
141  // is represented as around the top-left corner as defined when the text is
142  // read in the 'natural' orientation.
143  // For example:
144  //   * when the text is horizontal it might look like:
145  //      0----1
146  //      |    |
147  //      3----2
148  //   * when it's rotated 180 degrees around the top-left corner it becomes:
149  //      2----3
150  //      |    |
151  //      1----0
152  //   and the vertice order will still be (0, 1, 2, 3).
153  BoundingPoly bounding_box = 2;
154
155  // List of paragraphs in this block (if this blocks is of type text).
156  repeated Paragraph paragraphs = 3;
157
158  // Detected block type (text, image etc) for this block.
159  BlockType block_type = 4;
160
161  // Confidence of the OCR results on the block. Range [0, 1].
162  float confidence = 5;
163}
164
165// Structural unit of text representing a number of words in certain order.
166message Paragraph {
167  // Additional information detected for the paragraph.
168  TextAnnotation.TextProperty property = 1;
169
170  // The bounding box for the paragraph.
171  // The vertices are in the order of top-left, top-right, bottom-right,
172  // bottom-left. When a rotation of the bounding box is detected the rotation
173  // is represented as around the top-left corner as defined when the text is
174  // read in the 'natural' orientation.
175  // For example:
176  //   * when the text is horizontal it might look like:
177  //      0----1
178  //      |    |
179  //      3----2
180  //   * when it's rotated 180 degrees around the top-left corner it becomes:
181  //      2----3
182  //      |    |
183  //      1----0
184  //   and the vertice order will still be (0, 1, 2, 3).
185  BoundingPoly bounding_box = 2;
186
187  // List of words in this paragraph.
188  repeated Word words = 3;
189
190  // Confidence of the OCR results for the paragraph. Range [0, 1].
191  float confidence = 4;
192}
193
194// A word representation.
195message Word {
196  // Additional information detected for the word.
197  TextAnnotation.TextProperty property = 1;
198
199  // The bounding box for the word.
200  // The vertices are in the order of top-left, top-right, bottom-right,
201  // bottom-left. When a rotation of the bounding box is detected the rotation
202  // is represented as around the top-left corner as defined when the text is
203  // read in the 'natural' orientation.
204  // For example:
205  //   * when the text is horizontal it might look like:
206  //      0----1
207  //      |    |
208  //      3----2
209  //   * when it's rotated 180 degrees around the top-left corner it becomes:
210  //      2----3
211  //      |    |
212  //      1----0
213  //   and the vertice order will still be (0, 1, 2, 3).
214  BoundingPoly bounding_box = 2;
215
216  // List of symbols in the word.
217  // The order of the symbols follows the natural reading order.
218  repeated Symbol symbols = 3;
219
220  // Confidence of the OCR results for the word. Range [0, 1].
221  float confidence = 4;
222}
223
224// A single symbol representation.
225message Symbol {
226  // Additional information detected for the symbol.
227  TextAnnotation.TextProperty property = 1;
228
229  // The bounding box for the symbol.
230  // The vertices are in the order of top-left, top-right, bottom-right,
231  // bottom-left. When a rotation of the bounding box is detected the rotation
232  // is represented as around the top-left corner as defined when the text is
233  // read in the 'natural' orientation.
234  // For example:
235  //   * when the text is horizontal it might look like:
236  //      0----1
237  //      |    |
238  //      3----2
239  //   * when it's rotated 180 degrees around the top-left corner it becomes:
240  //      2----3
241  //      |    |
242  //      1----0
243  //   and the vertice order will still be (0, 1, 2, 3).
244  BoundingPoly bounding_box = 2;
245
246  // The actual UTF-8 representation of the symbol.
247  string text = 3;
248
249  // Confidence of the OCR results for the symbol. Range [0, 1].
250  float confidence = 4;
251}
252