1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.vision.v1p4beta1; 19 20import "google/cloud/vision/v1p4beta1/geometry.proto"; 21 22option cc_enable_arenas = true; 23option go_package = "cloud.google.com/go/vision/apiv1p4beta1/visionpb;visionpb"; 24option java_multiple_files = true; 25option java_outer_classname = "TextAnnotationProto"; 26option java_package = "com.google.cloud.vision.v1p4beta1"; 27option objc_class_prefix = "GCVN"; 28 29// TextAnnotation contains a structured representation of OCR extracted text. 30// The hierarchy of an OCR extracted text structure is like this: 31// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol 32// Each structural component, starting from Page, may further have their own 33// properties. Properties describe detected languages, breaks etc.. Please refer 34// to the 35// [TextAnnotation.TextProperty][google.cloud.vision.v1p4beta1.TextAnnotation.TextProperty] 36// message definition below for more detail. 37message TextAnnotation { 38 // Detected language for a structural component. 39 message DetectedLanguage { 40 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 41 // information, see 42 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 43 string language_code = 1; 44 45 // Confidence of detected language. Range [0, 1]. 46 float confidence = 2; 47 } 48 49 // Detected start or end of a structural component. 50 message DetectedBreak { 51 // Enum to denote the type of break found. New line, space etc. 52 enum BreakType { 53 // Unknown break label type. 54 UNKNOWN = 0; 55 56 // Regular space. 57 SPACE = 1; 58 59 // Sure space (very wide). 60 SURE_SPACE = 2; 61 62 // Line-wrapping break. 63 EOL_SURE_SPACE = 3; 64 65 // End-line hyphen that is not present in text; does not co-occur with 66 // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`. 67 HYPHEN = 4; 68 69 // Line break that ends a paragraph. 70 LINE_BREAK = 5; 71 } 72 73 // Detected break type. 74 BreakType type = 1; 75 76 // True if break prepends the element. 77 bool is_prefix = 2; 78 } 79 80 // Additional information detected on the structural component. 81 message TextProperty { 82 // A list of detected languages together with confidence. 83 repeated DetectedLanguage detected_languages = 1; 84 85 // Detected start or end of a text segment. 86 DetectedBreak detected_break = 2; 87 } 88 89 // List of pages detected by OCR. 90 repeated Page pages = 1; 91 92 // UTF-8 text detected on the pages. 93 string text = 2; 94} 95 96// Detected page from OCR. 97message Page { 98 // Additional information detected on the page. 99 TextAnnotation.TextProperty property = 1; 100 101 // Page width. For PDFs the unit is points. For images (including 102 // TIFFs) the unit is pixels. 103 int32 width = 2; 104 105 // Page height. For PDFs the unit is points. For images (including 106 // TIFFs) the unit is pixels. 107 int32 height = 3; 108 109 // List of blocks of text, images etc on this page. 110 repeated Block blocks = 4; 111 112 // Confidence of the OCR results on the page. Range [0, 1]. 113 float confidence = 5; 114} 115 116// Logical element on the page. 117message Block { 118 // Type of a block (text, image etc) as identified by OCR. 119 enum BlockType { 120 // Unknown block type. 121 UNKNOWN = 0; 122 123 // Regular text block. 124 TEXT = 1; 125 126 // Table block. 127 TABLE = 2; 128 129 // Image block. 130 PICTURE = 3; 131 132 // Horizontal/vertical line box. 133 RULER = 4; 134 135 // Barcode block. 136 BARCODE = 5; 137 } 138 139 // Additional information detected for the block. 140 TextAnnotation.TextProperty property = 1; 141 142 // The bounding box for the block. 143 // The vertices are in the order of top-left, top-right, bottom-right, 144 // bottom-left. When a rotation of the bounding box is detected the rotation 145 // is represented as around the top-left corner as defined when the text is 146 // read in the 'natural' orientation. 147 // For example: 148 // 149 // * when the text is horizontal it might look like: 150 // 151 // 0----1 152 // | | 153 // 3----2 154 // 155 // * when it's rotated 180 degrees around the top-left corner it becomes: 156 // 157 // 2----3 158 // | | 159 // 1----0 160 // 161 // and the vertex order will still be (0, 1, 2, 3). 162 BoundingPoly bounding_box = 2; 163 164 // List of paragraphs in this block (if this blocks is of type text). 165 repeated Paragraph paragraphs = 3; 166 167 // Detected block type (text, image etc) for this block. 168 BlockType block_type = 4; 169 170 // Confidence of the OCR results on the block. Range [0, 1]. 171 float confidence = 5; 172} 173 174// Structural unit of text representing a number of words in certain order. 175message Paragraph { 176 // Additional information detected for the paragraph. 177 TextAnnotation.TextProperty property = 1; 178 179 // The bounding box for the paragraph. 180 // The vertices are in the order of top-left, top-right, bottom-right, 181 // bottom-left. When a rotation of the bounding box is detected the rotation 182 // is represented as around the top-left corner as defined when the text is 183 // read in the 'natural' orientation. 184 // For example: 185 // * when the text is horizontal it might look like: 186 // 0----1 187 // | | 188 // 3----2 189 // * when it's rotated 180 degrees around the top-left corner it becomes: 190 // 2----3 191 // | | 192 // 1----0 193 // and the vertex order will still be (0, 1, 2, 3). 194 BoundingPoly bounding_box = 2; 195 196 // List of all words in this paragraph. 197 repeated Word words = 3; 198 199 // Confidence of the OCR results for the paragraph. Range [0, 1]. 200 float confidence = 4; 201} 202 203// A word representation. 204message Word { 205 // Additional information detected for the word. 206 TextAnnotation.TextProperty property = 1; 207 208 // The bounding box for the word. 209 // The vertices are in the order of top-left, top-right, bottom-right, 210 // bottom-left. When a rotation of the bounding box is detected the rotation 211 // is represented as around the top-left corner as defined when the text is 212 // read in the 'natural' orientation. 213 // For example: 214 // * when the text is horizontal it might look like: 215 // 0----1 216 // | | 217 // 3----2 218 // * when it's rotated 180 degrees around the top-left corner it becomes: 219 // 2----3 220 // | | 221 // 1----0 222 // and the vertex order will still be (0, 1, 2, 3). 223 BoundingPoly bounding_box = 2; 224 225 // List of symbols in the word. 226 // The order of the symbols follows the natural reading order. 227 repeated Symbol symbols = 3; 228 229 // Confidence of the OCR results for the word. Range [0, 1]. 230 float confidence = 4; 231} 232 233// A single symbol representation. 234message Symbol { 235 // Additional information detected for the symbol. 236 TextAnnotation.TextProperty property = 1; 237 238 // The bounding box for the symbol. 239 // The vertices are in the order of top-left, top-right, bottom-right, 240 // bottom-left. When a rotation of the bounding box is detected the rotation 241 // is represented as around the top-left corner as defined when the text is 242 // read in the 'natural' orientation. 243 // For example: 244 // * when the text is horizontal it might look like: 245 // 0----1 246 // | | 247 // 3----2 248 // * when it's rotated 180 degrees around the top-left corner it becomes: 249 // 2----3 250 // | | 251 // 1----0 252 // and the vertex order will still be (0, 1, 2, 3). 253 BoundingPoly bounding_box = 2; 254 255 // The actual UTF-8 representation of the symbol. 256 string text = 3; 257 258 // Confidence of the OCR results for the symbol. Range [0, 1]. 259 float confidence = 4; 260} 261