1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.vision.v1; 18 19import "google/cloud/vision/v1/geometry.proto"; 20 21option cc_enable_arenas = true; 22option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb"; 23option java_multiple_files = true; 24option java_outer_classname = "TextAnnotationProto"; 25option java_package = "com.google.cloud.vision.v1"; 26option objc_class_prefix = "GCVN"; 27 28// TextAnnotation contains a structured representation of OCR extracted text. 29// The hierarchy of an OCR extracted text structure is like this: 30// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol 31// Each structural component, starting from Page, may further have their own 32// properties. Properties describe detected languages, breaks etc.. Please refer 33// to the [TextAnnotation.TextProperty][google.cloud.vision.v1.TextAnnotation.TextProperty] message definition below for more 34// detail. 35message TextAnnotation { 36 // Detected language for a structural component. 37 message DetectedLanguage { 38 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 39 // information, see 40 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 41 string language_code = 1; 42 43 // Confidence of detected language. Range [0, 1]. 44 float confidence = 2; 45 } 46 47 // Detected start or end of a structural component. 48 message DetectedBreak { 49 // Enum to denote the type of break found. New line, space etc. 50 enum BreakType { 51 // Unknown break label type. 52 UNKNOWN = 0; 53 54 // Regular space. 55 SPACE = 1; 56 57 // Sure space (very wide). 58 SURE_SPACE = 2; 59 60 // Line-wrapping break. 61 EOL_SURE_SPACE = 3; 62 63 // End-line hyphen that is not present in text; does not co-occur with 64 // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`. 65 HYPHEN = 4; 66 67 // Line break that ends a paragraph. 68 LINE_BREAK = 5; 69 } 70 71 // Detected break type. 72 BreakType type = 1; 73 74 // True if break prepends the element. 75 bool is_prefix = 2; 76 } 77 78 // Additional information detected on the structural component. 79 message TextProperty { 80 // A list of detected languages together with confidence. 81 repeated DetectedLanguage detected_languages = 1; 82 83 // Detected start or end of a text segment. 84 DetectedBreak detected_break = 2; 85 } 86 87 // List of pages detected by OCR. 88 repeated Page pages = 1; 89 90 // UTF-8 text detected on the pages. 91 string text = 2; 92} 93 94// Detected page from OCR. 95message Page { 96 // Additional information detected on the page. 97 TextAnnotation.TextProperty property = 1; 98 99 // Page width. For PDFs the unit is points. For images (including 100 // TIFFs) the unit is pixels. 101 int32 width = 2; 102 103 // Page height. For PDFs the unit is points. For images (including 104 // TIFFs) the unit is pixels. 105 int32 height = 3; 106 107 // List of blocks of text, images etc on this page. 108 repeated Block blocks = 4; 109 110 // Confidence of the OCR results on the page. Range [0, 1]. 111 float confidence = 5; 112} 113 114// Logical element on the page. 115message Block { 116 // Type of a block (text, image etc) as identified by OCR. 117 enum BlockType { 118 // Unknown block type. 119 UNKNOWN = 0; 120 121 // Regular text block. 122 TEXT = 1; 123 124 // Table block. 125 TABLE = 2; 126 127 // Image block. 128 PICTURE = 3; 129 130 // Horizontal/vertical line box. 131 RULER = 4; 132 133 // Barcode block. 134 BARCODE = 5; 135 } 136 137 // Additional information detected for the block. 138 TextAnnotation.TextProperty property = 1; 139 140 // The bounding box for the block. 141 // The vertices are in the order of top-left, top-right, bottom-right, 142 // bottom-left. When a rotation of the bounding box is detected the rotation 143 // is represented as around the top-left corner as defined when the text is 144 // read in the 'natural' orientation. 145 // For example: 146 // 147 // * when the text is horizontal it might look like: 148 // 149 // 0----1 150 // | | 151 // 3----2 152 // 153 // * when it's rotated 180 degrees around the top-left corner it becomes: 154 // 155 // 2----3 156 // | | 157 // 1----0 158 // 159 // and the vertex order will still be (0, 1, 2, 3). 160 BoundingPoly bounding_box = 2; 161 162 // List of paragraphs in this block (if this blocks is of type text). 163 repeated Paragraph paragraphs = 3; 164 165 // Detected block type (text, image etc) for this block. 166 BlockType block_type = 4; 167 168 // Confidence of the OCR results on the block. Range [0, 1]. 169 float confidence = 5; 170} 171 172// Structural unit of text representing a number of words in certain order. 173message Paragraph { 174 // Additional information detected for the paragraph. 175 TextAnnotation.TextProperty property = 1; 176 177 // The bounding box for the paragraph. 178 // The vertices are in the order of top-left, top-right, bottom-right, 179 // bottom-left. When a rotation of the bounding box is detected the rotation 180 // is represented as around the top-left corner as defined when the text is 181 // read in the 'natural' orientation. 182 // For example: 183 // * when the text is horizontal it might look like: 184 // 0----1 185 // | | 186 // 3----2 187 // * when it's rotated 180 degrees around the top-left corner it becomes: 188 // 2----3 189 // | | 190 // 1----0 191 // and the vertex order will still be (0, 1, 2, 3). 192 BoundingPoly bounding_box = 2; 193 194 // List of all words in this paragraph. 195 repeated Word words = 3; 196 197 // Confidence of the OCR results for the paragraph. Range [0, 1]. 198 float confidence = 4; 199} 200 201// A word representation. 202message Word { 203 // Additional information detected for the word. 204 TextAnnotation.TextProperty property = 1; 205 206 // The bounding box for the word. 207 // The vertices are in the order of top-left, top-right, bottom-right, 208 // bottom-left. When a rotation of the bounding box is detected the rotation 209 // is represented as around the top-left corner as defined when the text is 210 // read in the 'natural' orientation. 211 // For example: 212 // * when the text is horizontal it might look like: 213 // 0----1 214 // | | 215 // 3----2 216 // * when it's rotated 180 degrees around the top-left corner it becomes: 217 // 2----3 218 // | | 219 // 1----0 220 // and the vertex order will still be (0, 1, 2, 3). 221 BoundingPoly bounding_box = 2; 222 223 // List of symbols in the word. 224 // The order of the symbols follows the natural reading order. 225 repeated Symbol symbols = 3; 226 227 // Confidence of the OCR results for the word. Range [0, 1]. 228 float confidence = 4; 229} 230 231// A single symbol representation. 232message Symbol { 233 // Additional information detected for the symbol. 234 TextAnnotation.TextProperty property = 1; 235 236 // The bounding box for the symbol. 237 // The vertices are in the order of top-left, top-right, bottom-right, 238 // bottom-left. When a rotation of the bounding box is detected the rotation 239 // is represented as around the top-left corner as defined when the text is 240 // read in the 'natural' orientation. 241 // For example: 242 // * when the text is horizontal it might look like: 243 // 0----1 244 // | | 245 // 3----2 246 // * when it's rotated 180 degrees around the top-left corner it becomes: 247 // 2----3 248 // | | 249 // 1----0 250 // and the vertex order will still be (0, 1, 2, 3). 251 BoundingPoly bounding_box = 2; 252 253 // The actual UTF-8 representation of the symbol. 254 string text = 3; 255 256 // Confidence of the OCR results for the symbol. Range [0, 1]. 257 float confidence = 4; 258} 259