1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.vision.v1; 18 19import "google/cloud/vision/v1/geometry.proto"; 20 21option cc_enable_arenas = true; 22option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb"; 23option java_multiple_files = true; 24option java_outer_classname = "TextAnnotationProto"; 25option java_package = "com.google.cloud.vision.v1"; 26option objc_class_prefix = "GCVN"; 27 28// TextAnnotation contains a structured representation of OCR extracted text. 29// The hierarchy of an OCR extracted text structure is like this: 30// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol 31// Each structural component, starting from Page, may further have their own 32// properties. Properties describe detected languages, breaks etc.. Please refer 33// to the 34// [TextAnnotation.TextProperty][google.cloud.vision.v1.TextAnnotation.TextProperty] 35// message definition below for more detail. 36message TextAnnotation { 37 // Detected language for a structural component. 38 message DetectedLanguage { 39 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 40 // information, see 41 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 42 string language_code = 1; 43 44 // Confidence of detected language. Range [0, 1]. 45 float confidence = 2; 46 } 47 48 // Detected start or end of a structural component. 49 message DetectedBreak { 50 // Enum to denote the type of break found. New line, space etc. 51 enum BreakType { 52 // Unknown break label type. 53 UNKNOWN = 0; 54 55 // Regular space. 56 SPACE = 1; 57 58 // Sure space (very wide). 59 SURE_SPACE = 2; 60 61 // Line-wrapping break. 62 EOL_SURE_SPACE = 3; 63 64 // End-line hyphen that is not present in text; does not co-occur with 65 // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`. 66 HYPHEN = 4; 67 68 // Line break that ends a paragraph. 69 LINE_BREAK = 5; 70 } 71 72 // Detected break type. 73 BreakType type = 1; 74 75 // True if break prepends the element. 76 bool is_prefix = 2; 77 } 78 79 // Additional information detected on the structural component. 80 message TextProperty { 81 // A list of detected languages together with confidence. 82 repeated DetectedLanguage detected_languages = 1; 83 84 // Detected start or end of a text segment. 85 DetectedBreak detected_break = 2; 86 } 87 88 // List of pages detected by OCR. 89 repeated Page pages = 1; 90 91 // UTF-8 text detected on the pages. 92 string text = 2; 93} 94 95// Detected page from OCR. 96message Page { 97 // Additional information detected on the page. 98 TextAnnotation.TextProperty property = 1; 99 100 // Page width. For PDFs the unit is points. For images (including 101 // TIFFs) the unit is pixels. 102 int32 width = 2; 103 104 // Page height. For PDFs the unit is points. For images (including 105 // TIFFs) the unit is pixels. 106 int32 height = 3; 107 108 // List of blocks of text, images etc on this page. 109 repeated Block blocks = 4; 110 111 // Confidence of the OCR results on the page. Range [0, 1]. 112 float confidence = 5; 113} 114 115// Logical element on the page. 116message Block { 117 // Type of a block (text, image etc) as identified by OCR. 118 enum BlockType { 119 // Unknown block type. 120 UNKNOWN = 0; 121 122 // Regular text block. 123 TEXT = 1; 124 125 // Table block. 126 TABLE = 2; 127 128 // Image block. 129 PICTURE = 3; 130 131 // Horizontal/vertical line box. 132 RULER = 4; 133 134 // Barcode block. 135 BARCODE = 5; 136 } 137 138 // Additional information detected for the block. 139 TextAnnotation.TextProperty property = 1; 140 141 // The bounding box for the block. 142 // The vertices are in the order of top-left, top-right, bottom-right, 143 // bottom-left. When a rotation of the bounding box is detected the rotation 144 // is represented as around the top-left corner as defined when the text is 145 // read in the 'natural' orientation. 146 // For example: 147 // 148 // * when the text is horizontal it might look like: 149 // 150 // 0----1 151 // | | 152 // 3----2 153 // 154 // * when it's rotated 180 degrees around the top-left corner it becomes: 155 // 156 // 2----3 157 // | | 158 // 1----0 159 // 160 // and the vertex order will still be (0, 1, 2, 3). 161 BoundingPoly bounding_box = 2; 162 163 // List of paragraphs in this block (if this blocks is of type text). 164 repeated Paragraph paragraphs = 3; 165 166 // Detected block type (text, image etc) for this block. 167 BlockType block_type = 4; 168 169 // Confidence of the OCR results on the block. Range [0, 1]. 170 float confidence = 5; 171} 172 173// Structural unit of text representing a number of words in certain order. 174message Paragraph { 175 // Additional information detected for the paragraph. 176 TextAnnotation.TextProperty property = 1; 177 178 // The bounding box for the paragraph. 179 // The vertices are in the order of top-left, top-right, bottom-right, 180 // bottom-left. When a rotation of the bounding box is detected the rotation 181 // is represented as around the top-left corner as defined when the text is 182 // read in the 'natural' orientation. 183 // For example: 184 // * when the text is horizontal it might look like: 185 // 0----1 186 // | | 187 // 3----2 188 // * when it's rotated 180 degrees around the top-left corner it becomes: 189 // 2----3 190 // | | 191 // 1----0 192 // and the vertex order will still be (0, 1, 2, 3). 193 BoundingPoly bounding_box = 2; 194 195 // List of all words in this paragraph. 196 repeated Word words = 3; 197 198 // Confidence of the OCR results for the paragraph. Range [0, 1]. 199 float confidence = 4; 200} 201 202// A word representation. 203message Word { 204 // Additional information detected for the word. 205 TextAnnotation.TextProperty property = 1; 206 207 // The bounding box for the word. 208 // The vertices are in the order of top-left, top-right, bottom-right, 209 // bottom-left. When a rotation of the bounding box is detected the rotation 210 // is represented as around the top-left corner as defined when the text is 211 // read in the 'natural' orientation. 212 // For example: 213 // * when the text is horizontal it might look like: 214 // 0----1 215 // | | 216 // 3----2 217 // * when it's rotated 180 degrees around the top-left corner it becomes: 218 // 2----3 219 // | | 220 // 1----0 221 // and the vertex order will still be (0, 1, 2, 3). 222 BoundingPoly bounding_box = 2; 223 224 // List of symbols in the word. 225 // The order of the symbols follows the natural reading order. 226 repeated Symbol symbols = 3; 227 228 // Confidence of the OCR results for the word. Range [0, 1]. 229 float confidence = 4; 230} 231 232// A single symbol representation. 233message Symbol { 234 // Additional information detected for the symbol. 235 TextAnnotation.TextProperty property = 1; 236 237 // The bounding box for the symbol. 238 // The vertices are in the order of top-left, top-right, bottom-right, 239 // bottom-left. When a rotation of the bounding box is detected the rotation 240 // is represented as around the top-left corner as defined when the text is 241 // read in the 'natural' orientation. 242 // For example: 243 // * when the text is horizontal it might look like: 244 // 0----1 245 // | | 246 // 3----2 247 // * when it's rotated 180 degrees around the top-left corner it becomes: 248 // 2----3 249 // | | 250 // 1----0 251 // and the vertex order will still be (0, 1, 2, 3). 252 BoundingPoly bounding_box = 2; 253 254 // The actual UTF-8 representation of the symbol. 255 string text = 3; 256 257 // Confidence of the OCR results for the symbol. Range [0, 1]. 258 float confidence = 4; 259} 260