1// Copyright 2017 Google Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.vision.v1p1beta1; 18 19import "google/cloud/vision/v1p1beta1/geometry.proto"; 20 21option cc_enable_arenas = true; 22option go_package = "cloud.google.com/go/vision/v2/apiv1p1beta1/visionpb;visionpb"; 23option java_multiple_files = true; 24option java_outer_classname = "TextAnnotationProto"; 25option java_package = "com.google.cloud.vision.v1p1beta1"; 26 27// TextAnnotation contains a structured representation of OCR extracted text. 28// The hierarchy of an OCR extracted text structure is like this: 29// TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol 30// Each structural component, starting from Page, may further have their own 31// properties. Properties describe detected languages, breaks etc.. Please refer 32// to the 33// [TextAnnotation.TextProperty][google.cloud.vision.v1p1beta1.TextAnnotation.TextProperty] 34// message definition below for more detail. 35message TextAnnotation { 36 // Detected language for a structural component. 37 message DetectedLanguage { 38 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 39 // information, see 40 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 41 string language_code = 1; 42 43 // Confidence of detected language. Range [0, 1]. 44 float confidence = 2; 45 } 46 47 // Detected start or end of a structural component. 48 message DetectedBreak { 49 // Enum to denote the type of break found. New line, space etc. 50 enum BreakType { 51 // Unknown break label type. 52 UNKNOWN = 0; 53 54 // Regular space. 55 SPACE = 1; 56 57 // Sure space (very wide). 58 SURE_SPACE = 2; 59 60 // Line-wrapping break. 61 EOL_SURE_SPACE = 3; 62 63 // End-line hyphen that is not present in text; does not co-occur with 64 // `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`. 65 HYPHEN = 4; 66 67 // Line break that ends a paragraph. 68 LINE_BREAK = 5; 69 } 70 71 // Detected break type. 72 BreakType type = 1; 73 74 // True if break prepends the element. 75 bool is_prefix = 2; 76 } 77 78 // Additional information detected on the structural component. 79 message TextProperty { 80 // A list of detected languages together with confidence. 81 repeated DetectedLanguage detected_languages = 1; 82 83 // Detected start or end of a text segment. 84 DetectedBreak detected_break = 2; 85 } 86 87 // List of pages detected by OCR. 88 repeated Page pages = 1; 89 90 // UTF-8 text detected on the pages. 91 string text = 2; 92} 93 94// Detected page from OCR. 95message Page { 96 // Additional information detected on the page. 97 TextAnnotation.TextProperty property = 1; 98 99 // Page width in pixels. 100 int32 width = 2; 101 102 // Page height in pixels. 103 int32 height = 3; 104 105 // List of blocks of text, images etc on this page. 106 repeated Block blocks = 4; 107 108 // Confidence of the OCR results on the page. Range [0, 1]. 109 float confidence = 5; 110} 111 112// Logical element on the page. 113message Block { 114 // Type of a block (text, image etc) as identified by OCR. 115 enum BlockType { 116 // Unknown block type. 117 UNKNOWN = 0; 118 119 // Regular text block. 120 TEXT = 1; 121 122 // Table block. 123 TABLE = 2; 124 125 // Image block. 126 PICTURE = 3; 127 128 // Horizontal/vertical line box. 129 RULER = 4; 130 131 // Barcode block. 132 BARCODE = 5; 133 } 134 135 // Additional information detected for the block. 136 TextAnnotation.TextProperty property = 1; 137 138 // The bounding box for the block. 139 // The vertices are in the order of top-left, top-right, bottom-right, 140 // bottom-left. When a rotation of the bounding box is detected the rotation 141 // is represented as around the top-left corner as defined when the text is 142 // read in the 'natural' orientation. 143 // For example: 144 // * when the text is horizontal it might look like: 145 // 0----1 146 // | | 147 // 3----2 148 // * when it's rotated 180 degrees around the top-left corner it becomes: 149 // 2----3 150 // | | 151 // 1----0 152 // and the vertice order will still be (0, 1, 2, 3). 153 BoundingPoly bounding_box = 2; 154 155 // List of paragraphs in this block (if this blocks is of type text). 156 repeated Paragraph paragraphs = 3; 157 158 // Detected block type (text, image etc) for this block. 159 BlockType block_type = 4; 160 161 // Confidence of the OCR results on the block. Range [0, 1]. 162 float confidence = 5; 163} 164 165// Structural unit of text representing a number of words in certain order. 166message Paragraph { 167 // Additional information detected for the paragraph. 168 TextAnnotation.TextProperty property = 1; 169 170 // The bounding box for the paragraph. 171 // The vertices are in the order of top-left, top-right, bottom-right, 172 // bottom-left. When a rotation of the bounding box is detected the rotation 173 // is represented as around the top-left corner as defined when the text is 174 // read in the 'natural' orientation. 175 // For example: 176 // * when the text is horizontal it might look like: 177 // 0----1 178 // | | 179 // 3----2 180 // * when it's rotated 180 degrees around the top-left corner it becomes: 181 // 2----3 182 // | | 183 // 1----0 184 // and the vertice order will still be (0, 1, 2, 3). 185 BoundingPoly bounding_box = 2; 186 187 // List of words in this paragraph. 188 repeated Word words = 3; 189 190 // Confidence of the OCR results for the paragraph. Range [0, 1]. 191 float confidence = 4; 192} 193 194// A word representation. 195message Word { 196 // Additional information detected for the word. 197 TextAnnotation.TextProperty property = 1; 198 199 // The bounding box for the word. 200 // The vertices are in the order of top-left, top-right, bottom-right, 201 // bottom-left. When a rotation of the bounding box is detected the rotation 202 // is represented as around the top-left corner as defined when the text is 203 // read in the 'natural' orientation. 204 // For example: 205 // * when the text is horizontal it might look like: 206 // 0----1 207 // | | 208 // 3----2 209 // * when it's rotated 180 degrees around the top-left corner it becomes: 210 // 2----3 211 // | | 212 // 1----0 213 // and the vertice order will still be (0, 1, 2, 3). 214 BoundingPoly bounding_box = 2; 215 216 // List of symbols in the word. 217 // The order of the symbols follows the natural reading order. 218 repeated Symbol symbols = 3; 219 220 // Confidence of the OCR results for the word. Range [0, 1]. 221 float confidence = 4; 222} 223 224// A single symbol representation. 225message Symbol { 226 // Additional information detected for the symbol. 227 TextAnnotation.TextProperty property = 1; 228 229 // The bounding box for the symbol. 230 // The vertices are in the order of top-left, top-right, bottom-right, 231 // bottom-left. When a rotation of the bounding box is detected the rotation 232 // is represented as around the top-left corner as defined when the text is 233 // read in the 'natural' orientation. 234 // For example: 235 // * when the text is horizontal it might look like: 236 // 0----1 237 // | | 238 // 3----2 239 // * when it's rotated 180 degrees around the top-left corner it becomes: 240 // 2----3 241 // | | 242 // 1----0 243 // and the vertice order will still be (0, 1, 2, 3). 244 BoundingPoly bounding_box = 2; 245 246 // The actual UTF-8 representation of the symbol. 247 string text = 3; 248 249 // Confidence of the OCR results for the symbol. Range [0, 1]. 250 float confidence = 4; 251} 252