1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Implementation of image ops.""" 16 17import functools 18import numpy as np 19 20from tensorflow.python.eager import context 21from tensorflow.python.eager import def_function 22from tensorflow.python.framework import config 23from tensorflow.python.framework import constant_op 24from tensorflow.python.framework import dtypes 25from tensorflow.python.framework import ops 26from tensorflow.python.framework import random_seed 27from tensorflow.python.framework import tensor_shape 28from tensorflow.python.framework import tensor_util 29from tensorflow.python.ops import array_ops 30from tensorflow.python.ops import check_ops 31from tensorflow.python.ops import control_flow_ops 32from tensorflow.python.ops import gen_image_ops 33from tensorflow.python.ops import math_ops 34from tensorflow.python.ops import nn 35from tensorflow.python.ops import nn_ops 36from tensorflow.python.ops import random_ops 37from tensorflow.python.ops import sort_ops 38from tensorflow.python.ops import stateless_random_ops 39from tensorflow.python.ops import string_ops 40from tensorflow.python.ops import variables 41from tensorflow.python.util import deprecation 42from tensorflow.python.util import dispatch 43from tensorflow.python.util.tf_export import tf_export 44 45ops.NotDifferentiable('RandomCrop') 46# TODO(b/31222613): This op may be differentiable, and there may be 47# latent bugs here. 48ops.NotDifferentiable('HSVToRGB') 49ops.NotDifferentiable('DrawBoundingBoxes') 50ops.NotDifferentiable('SampleDistortedBoundingBox') 51ops.NotDifferentiable('SampleDistortedBoundingBoxV2') 52# TODO(bsteiner): Implement the gradient function for extract_glimpse 53# TODO(b/31222613): This op may be differentiable, and there may be 54# latent bugs here. 55ops.NotDifferentiable('ExtractGlimpse') 56ops.NotDifferentiable('NonMaxSuppression') 57ops.NotDifferentiable('NonMaxSuppressionV2') 58ops.NotDifferentiable('NonMaxSuppressionWithOverlaps') 59ops.NotDifferentiable('GenerateBoundingBoxProposals') 60 61 62# pylint: disable=invalid-name 63def _assert(cond, ex_type, msg): 64 """A polymorphic assert, works with tensors and boolean expressions. 65 66 If `cond` is not a tensor, behave like an ordinary assert statement, except 67 that a empty list is returned. If `cond` is a tensor, return a list 68 containing a single TensorFlow assert op. 69 70 Args: 71 cond: Something evaluates to a boolean value. May be a tensor. 72 ex_type: The exception class to use. 73 msg: The error message. 74 75 Returns: 76 A list, containing at most one assert op. 77 """ 78 if _is_tensor(cond): 79 return [control_flow_ops.Assert(cond, [msg])] 80 else: 81 if not cond: 82 raise ex_type(msg) 83 else: 84 return [] 85 86 87def _is_tensor(x): 88 """Returns `True` if `x` is a symbolic tensor-like object. 89 90 Args: 91 x: A python object to check. 92 93 Returns: 94 `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. 95 """ 96 return isinstance(x, (ops.Tensor, variables.Variable)) 97 98 99def _ImageDimensions(image, rank): 100 """Returns the dimensions of an image tensor. 101 102 Args: 103 image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`. 104 rank: The expected rank of the image 105 106 Returns: 107 A list of corresponding to the dimensions of the 108 input image. Dimensions that are statically known are python integers, 109 otherwise, they are integer scalar tensors. 110 """ 111 if image.get_shape().is_fully_defined(): 112 return image.get_shape().as_list() 113 else: 114 static_shape = image.get_shape().with_rank(rank).as_list() 115 dynamic_shape = array_ops.unstack(array_ops.shape(image), rank) 116 return [ 117 s if s is not None else d for s, d in zip(static_shape, dynamic_shape) 118 ] 119 120 121def _Check3DImage(image, require_static=True): 122 """Assert that we are working with a properly shaped image. 123 124 Args: 125 image: 3-D Tensor of shape [height, width, channels] 126 require_static: If `True`, requires that all dimensions of `image` are known 127 and non-zero. 128 129 Raises: 130 ValueError: if `image.shape` is not a 3-vector. 131 132 Returns: 133 An empty list, if `image` has fully defined dimensions. Otherwise, a list 134 containing an assert op is returned. 135 """ 136 try: 137 image_shape = image.get_shape().with_rank(3) 138 except ValueError: 139 raise ValueError("'image' (shape %s) must be three-dimensional." % 140 image.shape) 141 if require_static and not image_shape.is_fully_defined(): 142 raise ValueError("'image' (shape %s) must be fully defined." % image_shape) 143 if any(x == 0 for x in image_shape): 144 raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape) 145 if not image_shape.is_fully_defined(): 146 return [ 147 check_ops.assert_positive( 148 array_ops.shape(image), 149 ["all dims of 'image.shape' " 150 'must be > 0.']) 151 ] 152 else: 153 return [] 154 155 156def _Assert3DImage(image): 157 """Assert that we are working with a properly shaped image. 158 159 Performs the check statically if possible (i.e. if the shape 160 is statically known). Otherwise adds a control dependency 161 to an assert op that checks the dynamic shape. 162 163 Args: 164 image: 3-D Tensor of shape [height, width, channels] 165 166 Raises: 167 ValueError: if `image.shape` is not a 3-vector. 168 169 Returns: 170 If the shape of `image` could be verified statically, `image` is 171 returned unchanged, otherwise there will be a control dependency 172 added that asserts the correct dynamic shape. 173 """ 174 return control_flow_ops.with_dependencies( 175 _Check3DImage(image, require_static=False), image) 176 177 178def _AssertAtLeast3DImage(image): 179 """Assert that we are working with a properly shaped image. 180 181 Performs the check statically if possible (i.e. if the shape 182 is statically known). Otherwise adds a control dependency 183 to an assert op that checks the dynamic shape. 184 185 Args: 186 image: >= 3-D Tensor of size [*, height, width, depth] 187 188 Raises: 189 ValueError: if image.shape is not a [>= 3] vector. 190 191 Returns: 192 If the shape of `image` could be verified statically, `image` is 193 returned unchanged, otherwise there will be a control dependency 194 added that asserts the correct dynamic shape. 195 """ 196 return control_flow_ops.with_dependencies( 197 _CheckAtLeast3DImage(image, require_static=False), image) 198 199 200def _CheckAtLeast3DImage(image, require_static=True): 201 """Assert that we are working with a properly shaped image. 202 203 Args: 204 image: >= 3-D Tensor of size [*, height, width, depth] 205 require_static: If `True`, requires that all dimensions of `image` are known 206 and non-zero. 207 208 Raises: 209 ValueError: if image.shape is not a [>= 3] vector. 210 211 Returns: 212 An empty list, if `image` has fully defined dimensions. Otherwise, a list 213 containing an assert op is returned. 214 """ 215 try: 216 if image.get_shape().ndims is None: 217 image_shape = image.get_shape().with_rank(3) 218 else: 219 image_shape = image.get_shape().with_rank_at_least(3) 220 except ValueError: 221 raise ValueError("'image' (shape %s) must be at least three-dimensional." % 222 image.shape) 223 if require_static and not image_shape.is_fully_defined(): 224 raise ValueError('\'image\' must be fully defined.') 225 if any(x == 0 for x in image_shape[-3:]): 226 raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' % 227 image_shape) 228 if not image_shape[-3:].is_fully_defined(): 229 return [ 230 check_ops.assert_positive( 231 array_ops.shape(image)[-3:], 232 ["inner 3 dims of 'image.shape' " 233 'must be > 0.']), 234 check_ops.assert_greater_equal( 235 array_ops.rank(image), 236 3, 237 message="'image' must be at least three-dimensional.") 238 ] 239 else: 240 return [] 241 242 243def _AssertGrayscaleImage(image): 244 """Assert that we are working with a properly shaped grayscale image. 245 246 Performs the check statically if possible (i.e. if the shape 247 is statically known). Otherwise adds a control dependency 248 to an assert op that checks the dynamic shape. 249 250 Args: 251 image: >= 2-D Tensor of size [*, 1] 252 253 Raises: 254 ValueError: if image.shape is not a [>= 2] vector or if 255 last dimension is not size 1. 256 257 Returns: 258 If the shape of `image` could be verified statically, `image` is 259 returned unchanged, otherwise there will be a control dependency 260 added that asserts the correct dynamic shape. 261 """ 262 return control_flow_ops.with_dependencies( 263 _CheckGrayscaleImage(image, require_static=False), image) 264 265 266def _CheckGrayscaleImage(image, require_static=True): 267 """Assert that we are working with properly shaped grayscale image. 268 269 Args: 270 image: >= 2-D Tensor of size [*, 1] 271 require_static: Boolean, whether static shape is required. 272 273 Raises: 274 ValueError: if image.shape is not a [>= 2] vector or if 275 last dimension is not size 1. 276 277 Returns: 278 An empty list, if `image` has fully defined dimensions. Otherwise, a list 279 containing an assert op is returned. 280 """ 281 try: 282 if image.get_shape().ndims is None: 283 image_shape = image.get_shape().with_rank(2) 284 else: 285 image_shape = image.get_shape().with_rank_at_least(2) 286 except ValueError: 287 raise ValueError('A grayscale image (shape %s) must be at least ' 288 'two-dimensional.' % image.shape) 289 if require_static and not image_shape.is_fully_defined(): 290 raise ValueError('\'image\' must be fully defined.') 291 if image_shape.is_fully_defined(): 292 if image_shape[-1] != 1: 293 raise ValueError('Last dimension of a grayscale image should be size 1.') 294 if not image_shape.is_fully_defined(): 295 return [ 296 check_ops.assert_equal( 297 array_ops.shape(image)[-1], 298 1, 299 message='Last dimension of a grayscale image should be size 1.'), 300 check_ops.assert_greater_equal( 301 array_ops.rank(image), 302 3, 303 message='A grayscale image must be at least two-dimensional.') 304 ] 305 else: 306 return [] 307 308 309def fix_image_flip_shape(image, result): 310 """Set the shape to 3 dimensional if we don't know anything else. 311 312 Args: 313 image: original image size 314 result: flipped or transformed image 315 316 Returns: 317 An image whose shape is at least (None, None, None). 318 """ 319 320 image_shape = image.get_shape() 321 if image_shape == tensor_shape.unknown_shape(): 322 result.set_shape([None, None, None]) 323 else: 324 result.set_shape(image_shape) 325 return result 326 327 328@tf_export('image.random_flip_up_down') 329@dispatch.add_dispatch_support 330def random_flip_up_down(image, seed=None): 331 """Randomly flips an image vertically (upside down). 332 333 With a 1 in 2 chance, outputs the contents of `image` flipped along the first 334 dimension, which is `height`. Otherwise, output the image as-is. 335 When passing a batch of images, each image will be randomly flipped 336 independent of other images. 337 338 Example usage: 339 340 >>> image = np.array([[[1], [2]], [[3], [4]]]) 341 >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() 342 [[[3], [4]], [[1], [2]]] 343 344 Randomly flip multiple images. 345 346 >>> images = np.array( 347 ... [ 348 ... [[[1], [2]], [[3], [4]]], 349 ... [[[5], [6]], [[7], [8]]] 350 ... ]) 351 >>> tf.image.random_flip_up_down(images, 4).numpy().tolist() 352 [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]] 353 354 For producing deterministic results given a `seed` value, use 355 `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param 356 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 357 same results given the same seed independent of how many times the function is 358 called, and independent of global seed settings (e.g. tf.random.set_seed). 359 360 Args: 361 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 362 of shape `[height, width, channels]`. 363 seed: A Python integer. Used to create a random seed. See 364 `tf.compat.v1.set_random_seed` for behavior. 365 366 Returns: 367 A tensor of the same type and shape as `image`. 368 Raises: 369 ValueError: if the shape of `image` not supported. 370 """ 371 random_func = functools.partial(random_ops.random_uniform, seed=seed) 372 return _random_flip(image, 0, random_func, 'random_flip_up_down') 373 374 375@tf_export('image.random_flip_left_right') 376@dispatch.add_dispatch_support 377def random_flip_left_right(image, seed=None): 378 """Randomly flip an image horizontally (left to right). 379 380 With a 1 in 2 chance, outputs the contents of `image` flipped along the 381 second dimension, which is `width`. Otherwise output the image as-is. 382 When passing a batch of images, each image will be randomly flipped 383 independent of other images. 384 385 Example usage: 386 387 >>> image = np.array([[[1], [2]], [[3], [4]]]) 388 >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() 389 [[[2], [1]], [[4], [3]]] 390 391 Randomly flip multiple images. 392 393 >>> images = np.array( 394 ... [ 395 ... [[[1], [2]], [[3], [4]]], 396 ... [[[5], [6]], [[7], [8]]] 397 ... ]) 398 >>> tf.image.random_flip_left_right(images, 6).numpy().tolist() 399 [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]] 400 401 For producing deterministic results given a `seed` value, use 402 `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param 403 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 404 same results given the same seed independent of how many times the function is 405 called, and independent of global seed settings (e.g. tf.random.set_seed). 406 407 Args: 408 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 409 of shape `[height, width, channels]`. 410 seed: A Python integer. Used to create a random seed. See 411 `tf.compat.v1.set_random_seed` for behavior. 412 413 Returns: 414 A tensor of the same type and shape as `image`. 415 416 Raises: 417 ValueError: if the shape of `image` not supported. 418 """ 419 random_func = functools.partial(random_ops.random_uniform, seed=seed) 420 return _random_flip(image, 1, random_func, 'random_flip_left_right') 421 422 423@tf_export('image.stateless_random_flip_left_right', v1=[]) 424@dispatch.add_dispatch_support 425def stateless_random_flip_left_right(image, seed): 426 """Randomly flip an image horizontally (left to right) deterministically. 427 428 Guarantees the same results given the same `seed` independent of how many 429 times the function is called, and independent of global seed settings (e.g. 430 `tf.random.set_seed`). 431 432 Example usage: 433 434 >>> image = np.array([[[1], [2]], [[3], [4]]]) 435 >>> seed = (2, 3) 436 >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() 437 [[[2], [1]], [[4], [3]]] 438 439 Args: 440 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 441 of shape `[height, width, channels]`. 442 seed: A shape [2] Tensor, the seed to the random number generator. Must have 443 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 444 445 Returns: 446 A tensor of the same type and shape as `image`. 447 """ 448 random_func = functools.partial( 449 stateless_random_ops.stateless_random_uniform, seed=seed) 450 return _random_flip( 451 image, 1, random_func, 'stateless_random_flip_left_right') 452 453 454@tf_export('image.stateless_random_flip_up_down', v1=[]) 455@dispatch.add_dispatch_support 456def stateless_random_flip_up_down(image, seed): 457 """Randomly flip an image vertically (upside down) deterministically. 458 459 Guarantees the same results given the same `seed` independent of how many 460 times the function is called, and independent of global seed settings (e.g. 461 `tf.random.set_seed`). 462 463 Example usage: 464 465 >>> image = np.array([[[1], [2]], [[3], [4]]]) 466 >>> seed = (2, 3) 467 >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() 468 [[[3], [4]], [[1], [2]]] 469 470 Args: 471 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 472 of shape `[height, width, channels]`. 473 seed: A shape [2] Tensor, the seed to the random number generator. Must have 474 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 475 476 Returns: 477 A tensor of the same type and shape as `image`. 478 """ 479 random_func = functools.partial( 480 stateless_random_ops.stateless_random_uniform, seed=seed) 481 return _random_flip( 482 image, 0, random_func, 'stateless_random_flip_up_down') 483 484 485def _random_flip(image, flip_index, random_func, scope_name): 486 """Randomly (50% chance) flip an image along axis `flip_index`. 487 488 Args: 489 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 490 of shape `[height, width, channels]`. 491 flip_index: Dimension along which to flip the image. 492 Vertical is 0, Horizontal is 1. 493 random_func: partial function for calling either stateful or stateless 494 random ops with `seed` parameter specified. 495 scope_name: Name of the scope in which the ops are added. 496 497 Returns: 498 A tensor of the same type and shape as `image`. 499 500 Raises: 501 ValueError: if the shape of `image` not supported. 502 """ 503 with ops.name_scope(None, scope_name, [image]) as scope: 504 image = ops.convert_to_tensor(image, name='image') 505 image = _AssertAtLeast3DImage(image) 506 shape = image.get_shape() 507 508 def f_rank3(): 509 uniform_random = random_func(shape=[], minval=0, maxval=1.0) 510 mirror_cond = math_ops.less(uniform_random, .5) 511 result = control_flow_ops.cond( 512 mirror_cond, 513 lambda: array_ops.reverse(image, [flip_index]), 514 lambda: image, 515 name=scope) 516 return fix_image_flip_shape(image, result) 517 518 def f_rank4(): 519 batch_size = array_ops.shape(image)[0] 520 uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0) 521 flips = math_ops.round( 522 array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])) 523 flips = math_ops.cast(flips, image.dtype) 524 flipped_input = array_ops.reverse(image, [flip_index + 1]) 525 return flips * flipped_input + (1 - flips) * image 526 527 if shape.ndims is None: 528 rank = array_ops.rank(image) 529 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 530 if shape.ndims == 3: 531 return f_rank3() 532 elif shape.ndims == 4: 533 return f_rank4() 534 else: 535 raise ValueError( 536 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 537 538 539@tf_export('image.flip_left_right') 540@dispatch.add_dispatch_support 541def flip_left_right(image): 542 """Flip an image horizontally (left to right). 543 544 Outputs the contents of `image` flipped along the width dimension. 545 546 See also `tf.reverse`. 547 548 Usage Example: 549 550 >>> x = [[[1.0, 2.0, 3.0], 551 ... [4.0, 5.0, 6.0]], 552 ... [[7.0, 8.0, 9.0], 553 ... [10.0, 11.0, 12.0]]] 554 >>> tf.image.flip_left_right(x) 555 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 556 array([[[ 4., 5., 6.], 557 [ 1., 2., 3.]], 558 [[10., 11., 12.], 559 [ 7., 8., 9.]]], dtype=float32)> 560 561 Args: 562 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 563 of shape `[height, width, channels]`. 564 565 Returns: 566 A tensor of the same type and shape as `image`. 567 568 Raises: 569 ValueError: if the shape of `image` not supported. 570 """ 571 return _flip(image, 1, 'flip_left_right') 572 573 574@tf_export('image.flip_up_down') 575@dispatch.add_dispatch_support 576def flip_up_down(image): 577 """Flip an image vertically (upside down). 578 579 Outputs the contents of `image` flipped along the height dimension. 580 581 See also `reverse()`. 582 583 Usage Example: 584 585 >>> x = [[[1.0, 2.0, 3.0], 586 ... [4.0, 5.0, 6.0]], 587 ... [[7.0, 8.0, 9.0], 588 ... [10.0, 11.0, 12.0]]] 589 >>> tf.image.flip_up_down(x) 590 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 591 array([[[ 7., 8., 9.], 592 [10., 11., 12.]], 593 [[ 1., 2., 3.], 594 [ 4., 5., 6.]]], dtype=float32)> 595 596 Args: 597 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 598 of shape `[height, width, channels]`. 599 600 Returns: 601 A `Tensor` of the same type and shape as `image`. 602 603 Raises: 604 ValueError: if the shape of `image` not supported. 605 """ 606 return _flip(image, 0, 'flip_up_down') 607 608 609def _flip(image, flip_index, scope_name): 610 """Flip an image either horizontally or vertically. 611 612 Outputs the contents of `image` flipped along the dimension `flip_index`. 613 614 See also `reverse()`. 615 616 Args: 617 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 618 of shape `[height, width, channels]`. 619 flip_index: 0 For vertical, 1 for horizontal. 620 scope_name: string, scope name. 621 622 Returns: 623 A `Tensor` of the same type and shape as `image`. 624 625 Raises: 626 ValueError: if the shape of `image` not supported. 627 """ 628 with ops.name_scope(None, scope_name, [image]): 629 image = ops.convert_to_tensor(image, name='image') 630 image = _AssertAtLeast3DImage(image) 631 shape = image.get_shape() 632 633 def f_rank3(): 634 return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) 635 636 def f_rank4(): 637 return array_ops.reverse(image, [flip_index + 1]) 638 639 if shape.ndims is None: 640 rank = array_ops.rank(image) 641 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 642 elif shape.ndims == 3: 643 return f_rank3() 644 elif shape.ndims == 4: 645 return f_rank4() 646 else: 647 raise ValueError( 648 '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape) 649 650 651@tf_export('image.rot90') 652@dispatch.add_dispatch_support 653def rot90(image, k=1, name=None): 654 """Rotate image(s) counter-clockwise by 90 degrees. 655 656 657 For example: 658 659 >>> a=tf.constant([[[1],[2]], 660 ... [[3],[4]]]) 661 >>> # rotating `a` counter clockwise by 90 degrees 662 >>> a_rot=tf.image.rot90(a) 663 >>> print(a_rot[...,0].numpy()) 664 [[2 4] 665 [1 3]] 666 >>> # rotating `a` counter clockwise by 270 degrees 667 >>> a_rot=tf.image.rot90(a, k=3) 668 >>> print(a_rot[...,0].numpy()) 669 [[3 1] 670 [4 2]] 671 672 Args: 673 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 674 of shape `[height, width, channels]`. 675 k: A scalar integer tensor. The number of times the image(s) are 676 rotated by 90 degrees. 677 name: A name for this operation (optional). 678 679 Returns: 680 A rotated tensor of the same type and shape as `image`. 681 682 Raises: 683 ValueError: if the shape of `image` not supported. 684 """ 685 with ops.name_scope(name, 'rot90', [image, k]) as scope: 686 image = ops.convert_to_tensor(image, name='image') 687 image = _AssertAtLeast3DImage(image) 688 k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') 689 k.get_shape().assert_has_rank(0) 690 k = math_ops.mod(k, 4) 691 692 shape = image.get_shape() 693 if shape.ndims is None: 694 rank = array_ops.rank(image) 695 696 def f_rank3(): 697 return _rot90_3D(image, k, scope) 698 699 def f_rank4(): 700 return _rot90_4D(image, k, scope) 701 702 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 703 elif shape.ndims == 3: 704 return _rot90_3D(image, k, scope) 705 elif shape.ndims == 4: 706 return _rot90_4D(image, k, scope) 707 else: 708 raise ValueError( 709 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 710 711 712def _rot90_3D(image, k, name_scope): 713 """Rotate image counter-clockwise by 90 degrees `k` times. 714 715 Args: 716 image: 3-D Tensor of shape `[height, width, channels]`. 717 k: A scalar integer. The number of times the image is rotated by 90 degrees. 718 name_scope: A valid TensorFlow name scope. 719 720 Returns: 721 A 3-D tensor of the same type and shape as `image`. 722 723 """ 724 725 def _rot90(): 726 return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) 727 728 def _rot180(): 729 return array_ops.reverse_v2(image, [0, 1]) 730 731 def _rot270(): 732 return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) 733 734 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 735 (math_ops.equal(k, 3), _rot270)] 736 737 result = control_flow_ops.case( 738 cases, default=lambda: image, exclusive=True, name=name_scope) 739 result.set_shape([None, None, image.get_shape()[2]]) 740 return result 741 742 743def _rot90_4D(images, k, name_scope): 744 """Rotate batch of images counter-clockwise by 90 degrees `k` times. 745 746 Args: 747 images: 4-D Tensor of shape `[height, width, channels]`. 748 k: A scalar integer. The number of times the images are rotated by 90 749 degrees. 750 name_scope: A valid TensorFlow name scope. 751 752 Returns: 753 A 4-D `Tensor` of the same type and shape as `images`. 754 """ 755 756 def _rot90(): 757 return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3]) 758 759 def _rot180(): 760 return array_ops.reverse_v2(images, [1, 2]) 761 762 def _rot270(): 763 return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2]) 764 765 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 766 (math_ops.equal(k, 3), _rot270)] 767 768 result = control_flow_ops.case( 769 cases, default=lambda: images, exclusive=True, name=name_scope) 770 shape = result.get_shape() 771 result.set_shape([shape[0], None, None, shape[3]]) 772 return result 773 774 775@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image']) 776@dispatch.add_dispatch_support 777def transpose(image, name=None): 778 """Transpose image(s) by swapping the height and width dimension. 779 780 Usage Example: 781 782 >>> x = [[[1.0, 2.0, 3.0], 783 ... [4.0, 5.0, 6.0]], 784 ... [[7.0, 8.0, 9.0], 785 ... [10.0, 11.0, 12.0]]] 786 >>> tf.image.transpose(x) 787 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 788 array([[[ 1., 2., 3.], 789 [ 7., 8., 9.]], 790 [[ 4., 5., 6.], 791 [10., 11., 12.]]], dtype=float32)> 792 793 Args: 794 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 795 of shape `[height, width, channels]`. 796 name: A name for this operation (optional). 797 798 Returns: 799 If `image` was 4-D, a 4-D float Tensor of shape 800 `[batch, width, height, channels]` 801 If `image` was 3-D, a 3-D float Tensor of shape 802 `[width, height, channels]` 803 804 Raises: 805 ValueError: if the shape of `image` not supported. 806 807 Usage Example: 808 809 >>> image = [[[1, 2], [3, 4]], 810 ... [[5, 6], [7, 8]], 811 ... [[9, 10], [11, 12]]] 812 >>> image = tf.constant(image) 813 >>> tf.image.transpose(image) 814 <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy= 815 array([[[ 1, 2], 816 [ 5, 6], 817 [ 9, 10]], 818 [[ 3, 4], 819 [ 7, 8], 820 [11, 12]]], dtype=int32)> 821 """ 822 with ops.name_scope(name, 'transpose', [image]): 823 image = ops.convert_to_tensor(image, name='image') 824 image = _AssertAtLeast3DImage(image) 825 shape = image.get_shape() 826 if shape.ndims is None: 827 rank = array_ops.rank(image) 828 829 def f_rank3(): 830 return array_ops.transpose(image, [1, 0, 2], name=name) 831 832 def f_rank4(): 833 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 834 835 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 836 elif shape.ndims == 3: 837 return array_ops.transpose(image, [1, 0, 2], name=name) 838 elif shape.ndims == 4: 839 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 840 else: 841 raise ValueError( 842 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 843 844 845@tf_export('image.central_crop') 846@dispatch.add_dispatch_support 847def central_crop(image, central_fraction): 848 """Crop the central region of the image(s). 849 850 Remove the outer parts of an image but retain the central region of the image 851 along each dimension. If we specify `central_fraction = 0.5`, this function 852 returns the region marked with "X" in the below diagram. The larger the value 853 of `central_fraction`, the larger the dimension of the region to be cropped 854 and retained. 855 856 -------- 857 | | 858 | XXXX | 859 | XXXX | 860 | | where "X" is the central 50% of the image. 861 -------- 862 863 This function works on either a single image (`image` is a 3-D Tensor), or a 864 batch of images (`image` is a 4-D Tensor). 865 866 Usage Example: 867 868 >>> x = [[[1.0, 2.0, 3.0], 869 ... [4.0, 5.0, 6.0], 870 ... [7.0, 8.0, 9.0], 871 ... [10.0, 11.0, 12.0]], 872 ... [[13.0, 14.0, 15.0], 873 ... [16.0, 17.0, 18.0], 874 ... [19.0, 20.0, 21.0], 875 ... [22.0, 23.0, 24.0]], 876 ... [[25.0, 26.0, 27.0], 877 ... [28.0, 29.0, 30.0], 878 ... [31.0, 32.0, 33.0], 879 ... [34.0, 35.0, 36.0]], 880 ... [[37.0, 38.0, 39.0], 881 ... [40.0, 41.0, 42.0], 882 ... [43.0, 44.0, 45.0], 883 ... [46.0, 47.0, 48.0]]] 884 >>> tf.image.central_crop(x, 0.5) 885 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 886 array([[[16., 17., 18.], 887 [19., 20., 21.]], 888 [[28., 29., 30.], 889 [31., 32., 33.]]], dtype=float32)> 890 891 Args: 892 image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D 893 Tensor of shape [batch_size, height, width, depth]. 894 central_fraction: float (0, 1], fraction of size to crop 895 896 Raises: 897 ValueError: if central_crop_fraction is not within (0, 1]. 898 899 Returns: 900 3-D / 4-D float Tensor, as per the input. 901 """ 902 with ops.name_scope(None, 'central_crop', [image]): 903 image = ops.convert_to_tensor(image, name='image') 904 central_fraction_static = tensor_util.constant_value(central_fraction) 905 if central_fraction_static is not None: 906 if central_fraction_static <= 0.0 or central_fraction_static > 1.0: 907 raise ValueError('central_fraction must be within (0, 1]') 908 if central_fraction_static == 1.0: 909 return image 910 else: 911 assert_ops = _assert( 912 math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0), 913 ValueError, 'central_fraction must be within (0, 1]') 914 image = control_flow_ops.with_dependencies(assert_ops, image) 915 916 _AssertAtLeast3DImage(image) 917 rank = image.get_shape().ndims 918 if rank != 3 and rank != 4: 919 raise ValueError('`image` should either be a Tensor with rank = 3 or ' 920 'rank = 4. Had rank = {}.'.format(rank)) 921 922 # Helper method to return the `idx`-th dimension of `tensor`, along with 923 # a boolean signifying if the dimension is dynamic. 924 def _get_dim(tensor, idx): 925 static_shape = tensor.get_shape().dims[idx].value 926 if static_shape is not None: 927 return static_shape, False 928 return array_ops.shape(tensor)[idx], True 929 930 # Get the height, width, depth (and batch size, if the image is a 4-D 931 # tensor). 932 if rank == 3: 933 img_h, dynamic_h = _get_dim(image, 0) 934 img_w, dynamic_w = _get_dim(image, 1) 935 img_d = image.get_shape()[2] 936 else: 937 img_bs = image.get_shape()[0] 938 img_h, dynamic_h = _get_dim(image, 1) 939 img_w, dynamic_w = _get_dim(image, 2) 940 img_d = image.get_shape()[3] 941 942 dynamic_h = dynamic_h or (central_fraction_static is None) 943 dynamic_w = dynamic_w or (central_fraction_static is None) 944 945 # Compute the bounding boxes for the crop. The type and value of the 946 # bounding boxes depend on the `image` tensor's rank and whether / not the 947 # dimensions are statically defined. 948 if dynamic_h: 949 img_hd = math_ops.cast(img_h, dtypes.float64) 950 bbox_h_start = math_ops.cast( 951 (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) / 952 2, dtypes.int32) 953 else: 954 img_hd = float(img_h) 955 bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2) 956 957 if dynamic_w: 958 img_wd = math_ops.cast(img_w, dtypes.float64) 959 bbox_w_start = math_ops.cast( 960 (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) / 961 2, dtypes.int32) 962 else: 963 img_wd = float(img_w) 964 bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2) 965 966 bbox_h_size = img_h - bbox_h_start * 2 967 bbox_w_size = img_w - bbox_w_start * 2 968 969 if rank == 3: 970 bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) 971 bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) 972 else: 973 bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0]) 974 bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1]) 975 976 image = array_ops.slice(image, bbox_begin, bbox_size) 977 978 # Reshape the `image` tensor to the desired size. 979 if rank == 3: 980 image.set_shape([ 981 None if dynamic_h else bbox_h_size, 982 None if dynamic_w else bbox_w_size, img_d 983 ]) 984 else: 985 image.set_shape([ 986 img_bs, None if dynamic_h else bbox_h_size, 987 None if dynamic_w else bbox_w_size, img_d 988 ]) 989 return image 990 991 992@tf_export('image.pad_to_bounding_box') 993@dispatch.add_dispatch_support 994def pad_to_bounding_box(image, offset_height, offset_width, target_height, 995 target_width): 996 """Pad `image` with zeros to the specified `height` and `width`. 997 998 Adds `offset_height` rows of zeros on top, `offset_width` columns of 999 zeros on the left, and then pads the image on the bottom and right 1000 with zeros until it has dimensions `target_height`, `target_width`. 1001 1002 This op does nothing if `offset_*` is zero and the image already has size 1003 `target_height` by `target_width`. 1004 1005 Usage Example: 1006 1007 >>> x = [[[1., 2., 3.], 1008 ... [4., 5., 6.]], 1009 ... [[7., 8., 9.], 1010 ... [10., 11., 12.]]] 1011 >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4) 1012 >>> padded_image 1013 <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy= 1014 array([[[ 0., 0., 0.], 1015 [ 0., 0., 0.], 1016 [ 0., 0., 0.], 1017 [ 0., 0., 0.]], 1018 [[ 0., 0., 0.], 1019 [ 1., 2., 3.], 1020 [ 4., 5., 6.], 1021 [ 0., 0., 0.]], 1022 [[ 0., 0., 0.], 1023 [ 7., 8., 9.], 1024 [10., 11., 12.], 1025 [ 0., 0., 0.]], 1026 [[ 0., 0., 0.], 1027 [ 0., 0., 0.], 1028 [ 0., 0., 0.], 1029 [ 0., 0., 0.]]], dtype=float32)> 1030 1031 Args: 1032 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1033 of shape `[height, width, channels]`. 1034 offset_height: Number of rows of zeros to add on top. 1035 offset_width: Number of columns of zeros to add on the left. 1036 target_height: Height of output image. 1037 target_width: Width of output image. 1038 1039 Returns: 1040 If `image` was 4-D, a 4-D float Tensor of shape 1041 `[batch, target_height, target_width, channels]` 1042 If `image` was 3-D, a 3-D float Tensor of shape 1043 `[target_height, target_width, channels]` 1044 1045 Raises: 1046 ValueError: If the shape of `image` is incompatible with the `offset_*` or 1047 `target_*` arguments, or either `offset_height` or `offset_width` is 1048 negative. 1049 """ 1050 return pad_to_bounding_box_internal( 1051 image, 1052 offset_height, 1053 offset_width, 1054 target_height, 1055 target_width, 1056 check_dims=True) 1057 1058 1059# TODO(b/190099338) Remove this internal method and remap call sites to call 1060# image_ops.pad_to_bounding_box when asserts are no longer serialized. See also 1061# b/204377079#comment6 for more context. 1062def pad_to_bounding_box_internal(image, offset_height, offset_width, 1063 target_height, target_width, check_dims): 1064 """Pad `image` with zeros to the specified `height` and `width`. 1065 1066 Adds `offset_height` rows of zeros on top, `offset_width` columns of 1067 zeros on the left, and then pads the image on the bottom and right 1068 with zeros until it has dimensions `target_height`, `target_width`. 1069 1070 This op does nothing if `offset_*` is zero and the image already has size 1071 `target_height` by `target_width`. 1072 1073 Args: 1074 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1075 of shape `[height, width, channels]`. 1076 offset_height: Number of rows of zeros to add on top. 1077 offset_width: Number of columns of zeros to add on the left. 1078 target_height: Height of output image. 1079 target_width: Width of output image. 1080 check_dims: If True, assert that dimensions are non-negative and in range. 1081 In multi-GPU distributed settings, assertions can cause program slowdown. 1082 Setting this parameter to `False` avoids this, resulting in faster speed 1083 in some situations, with the tradeoff being that some error checking is 1084 not happening. 1085 1086 Returns: 1087 If `image` was 4-D, a 4-D float Tensor of shape 1088 `[batch, target_height, target_width, channels]` 1089 If `image` was 3-D, a 3-D float Tensor of shape 1090 `[target_height, target_width, channels]` 1091 1092 Raises: 1093 ValueError: If the shape of `image` is incompatible with the `offset_*` or 1094 `target_*` arguments, or either `offset_height` or `offset_width` is 1095 negative. Not raised if `check_dims` is `False`. 1096 """ 1097 with ops.name_scope(None, 'pad_to_bounding_box', [image]): 1098 image = ops.convert_to_tensor(image, name='image') 1099 1100 is_batch = True 1101 image_shape = image.get_shape() 1102 if image_shape.ndims == 3: 1103 is_batch = False 1104 image = array_ops.expand_dims(image, 0) 1105 elif image_shape.ndims is None: 1106 is_batch = False 1107 image = array_ops.expand_dims(image, 0) 1108 image.set_shape([None] * 4) 1109 elif image_shape.ndims != 4: 1110 raise ValueError( 1111 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1112 image_shape) 1113 1114 batch, height, width, depth = _ImageDimensions(image, rank=4) 1115 1116 after_padding_width = target_width - offset_width - width 1117 1118 after_padding_height = target_height - offset_height - height 1119 1120 if check_dims: 1121 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1122 assert_ops += _assert(offset_height >= 0, ValueError, 1123 'offset_height must be >= 0') 1124 assert_ops += _assert(offset_width >= 0, ValueError, 1125 'offset_width must be >= 0') 1126 assert_ops += _assert(after_padding_width >= 0, ValueError, 1127 'width must be <= target - offset') 1128 assert_ops += _assert(after_padding_height >= 0, ValueError, 1129 'height must be <= target - offset') 1130 image = control_flow_ops.with_dependencies(assert_ops, image) 1131 1132 # Do not pad on the depth dimensions. 1133 paddings = array_ops.reshape( 1134 array_ops.stack([ 1135 0, 0, offset_height, after_padding_height, offset_width, 1136 after_padding_width, 0, 0 1137 ]), [4, 2]) 1138 padded = array_ops.pad(image, paddings) 1139 1140 padded_shape = [ 1141 None if _is_tensor(i) else i 1142 for i in [batch, target_height, target_width, depth] 1143 ] 1144 padded.set_shape(padded_shape) 1145 1146 if not is_batch: 1147 padded = array_ops.squeeze(padded, axis=[0]) 1148 1149 return padded 1150 1151 1152@tf_export('image.crop_to_bounding_box') 1153@dispatch.add_dispatch_support 1154def crop_to_bounding_box(image, offset_height, offset_width, target_height, 1155 target_width): 1156 """Crops an `image` to a specified bounding box. 1157 1158 This op cuts a rectangular bounding box out of `image`. The top-left corner 1159 of the bounding box is at `offset_height, offset_width` in `image`, and the 1160 lower-right corner is at 1161 `offset_height + target_height, offset_width + target_width`. 1162 1163 Example Usage: 1164 1165 >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3]) 1166 >>> image[:,:,0] # print the first channel of the 3-D tensor 1167 <tf.Tensor: shape=(3, 3), dtype=float32, numpy= 1168 array([[ 1., 4., 7.], 1169 [10., 13., 16.], 1170 [19., 22., 25.]], dtype=float32)> 1171 >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2) 1172 >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor 1173 <tf.Tensor: shape=(2, 2), dtype=float32, numpy= 1174 array([[ 1., 4.], 1175 [10., 13.]], dtype=float32)> 1176 1177 Args: 1178 image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D 1179 `Tensor` of shape `[height, width, channels]`. 1180 offset_height: Vertical coordinate of the top-left corner of the bounding 1181 box in `image`. 1182 offset_width: Horizontal coordinate of the top-left corner of the bounding 1183 box in `image`. 1184 target_height: Height of the bounding box. 1185 target_width: Width of the bounding box. 1186 1187 Returns: 1188 If `image` was 4-D, a 4-D `Tensor` of shape 1189 `[batch, target_height, target_width, channels]`. 1190 If `image` was 3-D, a 3-D `Tensor` of shape 1191 `[target_height, target_width, channels]`. 1192 It has the same dtype with `image`. 1193 1194 Raises: 1195 ValueError: `image` is not a 3-D or 4-D `Tensor`. 1196 ValueError: `offset_width < 0` or `offset_height < 0`. 1197 ValueError: `target_width <= 0` or `target_height <= 0`. 1198 ValueError: `width < offset_width + target_width` or 1199 `height < offset_height + target_height`. 1200 """ 1201 with ops.name_scope(None, 'crop_to_bounding_box', [image]): 1202 image = ops.convert_to_tensor(image, name='image') 1203 1204 is_batch = True 1205 image_shape = image.get_shape() 1206 if image_shape.ndims == 3: 1207 is_batch = False 1208 image = array_ops.expand_dims(image, 0) 1209 elif image_shape.ndims is None: 1210 is_batch = False 1211 image = array_ops.expand_dims(image, 0) 1212 image.set_shape([None] * 4) 1213 elif image_shape.ndims != 4: 1214 raise ValueError( 1215 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1216 image_shape) 1217 1218 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1219 1220 batch, height, width, depth = _ImageDimensions(image, rank=4) 1221 1222 assert_ops += _assert(offset_width >= 0, ValueError, 1223 'offset_width must be >= 0.') 1224 assert_ops += _assert(offset_height >= 0, ValueError, 1225 'offset_height must be >= 0.') 1226 assert_ops += _assert(target_width > 0, ValueError, 1227 'target_width must be > 0.') 1228 assert_ops += _assert(target_height > 0, ValueError, 1229 'target_height must be > 0.') 1230 assert_ops += _assert(width >= (target_width + offset_width), ValueError, 1231 'width must be >= target + offset.') 1232 assert_ops += _assert(height >= (target_height + offset_height), ValueError, 1233 'height must be >= target + offset.') 1234 image = control_flow_ops.with_dependencies(assert_ops, image) 1235 1236 cropped = array_ops.slice( 1237 image, array_ops.stack([0, offset_height, offset_width, 0]), 1238 array_ops.stack([array_ops.shape(image)[0], target_height, target_width, 1239 array_ops.shape(image)[3]])) 1240 1241 cropped_shape = [ 1242 None if _is_tensor(i) else i 1243 for i in [batch, target_height, target_width, depth] 1244 ] 1245 cropped.set_shape(cropped_shape) 1246 1247 if not is_batch: 1248 cropped = array_ops.squeeze(cropped, axis=[0]) 1249 1250 return cropped 1251 1252 1253@tf_export( 1254 'image.resize_with_crop_or_pad', 1255 v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad']) 1256@dispatch.add_dispatch_support 1257def resize_image_with_crop_or_pad(image, target_height, target_width): 1258 """Crops and/or pads an image to a target width and height. 1259 1260 Resizes an image to a target width and height by either centrally 1261 cropping the image or padding it evenly with zeros. 1262 1263 If `width` or `height` is greater than the specified `target_width` or 1264 `target_height` respectively, this op centrally crops along that dimension. 1265 1266 For example: 1267 1268 >>> image = np.arange(75).reshape(5, 5, 3) # create 3-D image input 1269 >>> image[:,:,0] # print first channel just for demo purposes 1270 array([[ 0, 3, 6, 9, 12], 1271 [15, 18, 21, 24, 27], 1272 [30, 33, 36, 39, 42], 1273 [45, 48, 51, 54, 57], 1274 [60, 63, 66, 69, 72]]) 1275 >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3) # crop 1276 >>> # print first channel for demo purposes; centrally cropped output 1277 >>> image[:,:,0] 1278 <tf.Tensor: shape=(3, 3), dtype=int64, numpy= 1279 array([[18, 21, 24], 1280 [33, 36, 39], 1281 [48, 51, 54]])> 1282 1283 If `width` or `height` is smaller than the specified `target_width` or 1284 `target_height` respectively, this op centrally pads with 0 along that 1285 dimension. 1286 1287 For example: 1288 1289 >>> image = np.arange(1, 28).reshape(3, 3, 3) # create 3-D image input 1290 >>> image[:,:,0] # print first channel just for demo purposes 1291 array([[ 1, 4, 7], 1292 [10, 13, 16], 1293 [19, 22, 25]]) 1294 >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5) # pad 1295 >>> # print first channel for demo purposes; we should see 0 paddings 1296 >>> image[:,:,0] 1297 <tf.Tensor: shape=(5, 5), dtype=int64, numpy= 1298 array([[ 0, 0, 0, 0, 0], 1299 [ 0, 1, 4, 7, 0], 1300 [ 0, 10, 13, 16, 0], 1301 [ 0, 19, 22, 25, 0], 1302 [ 0, 0, 0, 0, 0]])> 1303 1304 Args: 1305 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1306 of shape `[height, width, channels]`. 1307 target_height: Target height. 1308 target_width: Target width. 1309 1310 Raises: 1311 ValueError: if `target_height` or `target_width` are zero or negative. 1312 1313 Returns: 1314 Cropped and/or padded image. 1315 If `images` was 4-D, a 4-D float Tensor of shape 1316 `[batch, new_height, new_width, channels]`. 1317 If `images` was 3-D, a 3-D float Tensor of shape 1318 `[new_height, new_width, channels]`. 1319 """ 1320 with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]): 1321 image = ops.convert_to_tensor(image, name='image') 1322 image_shape = image.get_shape() 1323 is_batch = True 1324 if image_shape.ndims == 3: 1325 is_batch = False 1326 image = array_ops.expand_dims(image, 0) 1327 elif image_shape.ndims is None: 1328 is_batch = False 1329 image = array_ops.expand_dims(image, 0) 1330 image.set_shape([None] * 4) 1331 elif image_shape.ndims != 4: 1332 raise ValueError( 1333 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1334 image_shape) 1335 1336 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1337 assert_ops += _assert(target_width > 0, ValueError, 1338 'target_width must be > 0.') 1339 assert_ops += _assert(target_height > 0, ValueError, 1340 'target_height must be > 0.') 1341 1342 image = control_flow_ops.with_dependencies(assert_ops, image) 1343 # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. 1344 # Make sure our checks come first, so that error messages are clearer. 1345 if _is_tensor(target_height): 1346 target_height = control_flow_ops.with_dependencies( 1347 assert_ops, target_height) 1348 if _is_tensor(target_width): 1349 target_width = control_flow_ops.with_dependencies(assert_ops, 1350 target_width) 1351 1352 def max_(x, y): 1353 if _is_tensor(x) or _is_tensor(y): 1354 return math_ops.maximum(x, y) 1355 else: 1356 return max(x, y) 1357 1358 def min_(x, y): 1359 if _is_tensor(x) or _is_tensor(y): 1360 return math_ops.minimum(x, y) 1361 else: 1362 return min(x, y) 1363 1364 def equal_(x, y): 1365 if _is_tensor(x) or _is_tensor(y): 1366 return math_ops.equal(x, y) 1367 else: 1368 return x == y 1369 1370 _, height, width, _ = _ImageDimensions(image, rank=4) 1371 width_diff = target_width - width 1372 offset_crop_width = max_(-width_diff // 2, 0) 1373 offset_pad_width = max_(width_diff // 2, 0) 1374 1375 height_diff = target_height - height 1376 offset_crop_height = max_(-height_diff // 2, 0) 1377 offset_pad_height = max_(height_diff // 2, 0) 1378 1379 # Maybe crop if needed. 1380 cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width, 1381 min_(target_height, height), 1382 min_(target_width, width)) 1383 1384 # Maybe pad if needed. 1385 resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, 1386 target_height, target_width) 1387 1388 # In theory all the checks below are redundant. 1389 if resized.get_shape().ndims is None: 1390 raise ValueError('resized contains no shape.') 1391 1392 _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4) 1393 1394 assert_ops = [] 1395 assert_ops += _assert( 1396 equal_(resized_height, target_height), ValueError, 1397 'resized height is not correct.') 1398 assert_ops += _assert( 1399 equal_(resized_width, target_width), ValueError, 1400 'resized width is not correct.') 1401 1402 resized = control_flow_ops.with_dependencies(assert_ops, resized) 1403 1404 if not is_batch: 1405 resized = array_ops.squeeze(resized, axis=[0]) 1406 1407 return resized 1408 1409 1410@tf_export(v1=['image.ResizeMethod']) 1411class ResizeMethodV1: 1412 """See `v1.image.resize` for details.""" 1413 BILINEAR = 0 1414 NEAREST_NEIGHBOR = 1 1415 BICUBIC = 2 1416 AREA = 3 1417 1418 1419@tf_export('image.ResizeMethod', v1=[]) 1420class ResizeMethod: 1421 """See `tf.image.resize` for details.""" 1422 BILINEAR = 'bilinear' 1423 NEAREST_NEIGHBOR = 'nearest' 1424 BICUBIC = 'bicubic' 1425 AREA = 'area' 1426 LANCZOS3 = 'lanczos3' 1427 LANCZOS5 = 'lanczos5' 1428 GAUSSIAN = 'gaussian' 1429 MITCHELLCUBIC = 'mitchellcubic' 1430 1431 1432def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name, 1433 skip_resize_if_same): 1434 """Core functionality for v1 and v2 resize functions.""" 1435 with ops.name_scope(name, 'resize', [images, size]): 1436 images = ops.convert_to_tensor(images, name='images') 1437 if images.get_shape().ndims is None: 1438 raise ValueError('\'images\' contains no shape.') 1439 # TODO(shlens): Migrate this functionality to the underlying Op's. 1440 is_batch = True 1441 if images.get_shape().ndims == 3: 1442 is_batch = False 1443 images = array_ops.expand_dims(images, 0) 1444 elif images.get_shape().ndims != 4: 1445 raise ValueError('\'images\' must have either 3 or 4 dimensions.') 1446 1447 _, height, width, _ = images.get_shape().as_list() 1448 1449 try: 1450 size = ops.convert_to_tensor(size, dtypes.int32, name='size') 1451 except (TypeError, ValueError): 1452 raise ValueError('\'size\' must be a 1-D int32 Tensor') 1453 if not size.get_shape().is_compatible_with([2]): 1454 raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: ' 1455 'new_height, new_width') 1456 1457 if preserve_aspect_ratio: 1458 # Get the current shapes of the image, even if dynamic. 1459 _, current_height, current_width, _ = _ImageDimensions(images, rank=4) 1460 1461 # do the computation to find the right scale and height/width. 1462 scale_factor_height = ( 1463 math_ops.cast(size[0], dtypes.float32) / 1464 math_ops.cast(current_height, dtypes.float32)) 1465 scale_factor_width = ( 1466 math_ops.cast(size[1], dtypes.float32) / 1467 math_ops.cast(current_width, dtypes.float32)) 1468 scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width) 1469 scaled_height_const = math_ops.cast( 1470 math_ops.round(scale_factor * 1471 math_ops.cast(current_height, dtypes.float32)), 1472 dtypes.int32) 1473 scaled_width_const = math_ops.cast( 1474 math_ops.round(scale_factor * 1475 math_ops.cast(current_width, dtypes.float32)), 1476 dtypes.int32) 1477 1478 # NOTE: Reset the size and other constants used later. 1479 size = ops.convert_to_tensor([scaled_height_const, scaled_width_const], 1480 dtypes.int32, 1481 name='size') 1482 1483 size_const_as_shape = tensor_util.constant_value_as_shape(size) 1484 new_height_const = tensor_shape.dimension_at_index(size_const_as_shape, 1485 0).value 1486 new_width_const = tensor_shape.dimension_at_index(size_const_as_shape, 1487 1).value 1488 1489 # If we can determine that the height and width will be unmodified by this 1490 # transformation, we avoid performing the resize. 1491 if skip_resize_if_same and all( 1492 x is not None 1493 for x in [new_width_const, width, new_height_const, height]) and ( 1494 width == new_width_const and height == new_height_const): 1495 if not is_batch: 1496 images = array_ops.squeeze(images, axis=[0]) 1497 return images 1498 1499 images = resizer_fn(images, size) 1500 1501 # NOTE(mrry): The shape functions for the resize ops cannot unpack 1502 # the packed values in `new_size`, so set the shape here. 1503 images.set_shape([None, new_height_const, new_width_const, None]) 1504 1505 if not is_batch: 1506 images = array_ops.squeeze(images, axis=[0]) 1507 return images 1508 1509 1510@tf_export(v1=['image.resize_images', 'image.resize']) 1511@dispatch.add_dispatch_support 1512def resize_images(images, 1513 size, 1514 method=ResizeMethodV1.BILINEAR, 1515 align_corners=False, 1516 preserve_aspect_ratio=False, 1517 name=None): 1518 """Resize `images` to `size` using the specified `method`. 1519 1520 Resized images will be distorted if their original aspect ratio is not 1521 the same as `size`. To avoid distortions see 1522 `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`. 1523 1524 The `method` can be one of: 1525 1526 * <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]( 1527 https://en.wikipedia.org/wiki/Bilinear_interpolation) 1528 * <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [ 1529 Nearest neighbor interpolation.]( 1530 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 1531 * <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]( 1532 https://en.wikipedia.org/wiki/Bicubic_interpolation) 1533 * <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation. 1534 1535 The return value has the same type as `images` if `method` is 1536 `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type 1537 as `images` if the size of `images` can be statically determined to be the 1538 same as `size`, because `images` is returned in this case. Otherwise, the 1539 return value has type `float32`. 1540 1541 Args: 1542 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1543 of shape `[height, width, channels]`. 1544 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 1545 size for the images. 1546 method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`. 1547 align_corners: bool. If True, the centers of the 4 corner pixels of the 1548 input and output tensors are aligned, preserving the values at the corner 1549 pixels. Defaults to `False`. 1550 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 1551 then `images` will be resized to a size that fits in `size` while 1552 preserving the aspect ratio of the original image. Scales up the image if 1553 `size` is bigger than the current size of the `image`. Defaults to False. 1554 name: A name for this operation (optional). 1555 1556 Raises: 1557 ValueError: if the shape of `images` is incompatible with the 1558 shape arguments to this function 1559 ValueError: if `size` has invalid shape or type. 1560 ValueError: if an unsupported resize method is specified. 1561 1562 Returns: 1563 If `images` was 4-D, a 4-D float Tensor of shape 1564 `[batch, new_height, new_width, channels]`. 1565 If `images` was 3-D, a 3-D float Tensor of shape 1566 `[new_height, new_width, channels]`. 1567 """ 1568 1569 def resize_fn(images_t, new_size): 1570 """Legacy resize core function, passed to _resize_images_common.""" 1571 if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR: 1572 return gen_image_ops.resize_bilinear( 1573 images_t, new_size, align_corners=align_corners) 1574 elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or 1575 method == ResizeMethod.NEAREST_NEIGHBOR): 1576 return gen_image_ops.resize_nearest_neighbor( 1577 images_t, new_size, align_corners=align_corners) 1578 elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC: 1579 return gen_image_ops.resize_bicubic( 1580 images_t, new_size, align_corners=align_corners) 1581 elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA: 1582 return gen_image_ops.resize_area( 1583 images_t, new_size, align_corners=align_corners) 1584 else: 1585 raise ValueError('Resize method is not implemented: {}'.format(method)) 1586 1587 return _resize_images_common( 1588 images, 1589 resize_fn, 1590 size, 1591 preserve_aspect_ratio=preserve_aspect_ratio, 1592 name=name, 1593 skip_resize_if_same=True) 1594 1595 1596@tf_export('image.resize', v1=[]) 1597@dispatch.add_dispatch_support 1598def resize_images_v2(images, 1599 size, 1600 method=ResizeMethod.BILINEAR, 1601 preserve_aspect_ratio=False, 1602 antialias=False, 1603 name=None): 1604 """Resize `images` to `size` using the specified `method`. 1605 1606 Resized images will be distorted if their original aspect ratio is not 1607 the same as `size`. To avoid distortions see 1608 `tf.image.resize_with_pad`. 1609 1610 >>> image = tf.constant([ 1611 ... [1,0,0,0,0], 1612 ... [0,1,0,0,0], 1613 ... [0,0,1,0,0], 1614 ... [0,0,0,1,0], 1615 ... [0,0,0,0,1], 1616 ... ]) 1617 >>> # Add "batch" and "channels" dimensions 1618 >>> image = image[tf.newaxis, ..., tf.newaxis] 1619 >>> image.shape.as_list() # [batch, height, width, channels] 1620 [1, 5, 5, 1] 1621 >>> tf.image.resize(image, [3,5])[0,...,0].numpy() 1622 array([[0.6666667, 0.3333333, 0. , 0. , 0. ], 1623 [0. , 0. , 1. , 0. , 0. ], 1624 [0. , 0. , 0. , 0.3333335, 0.6666665]], 1625 dtype=float32) 1626 1627 It works equally well with a single image instead of a batch of images: 1628 1629 >>> tf.image.resize(image[0], [3,5]).shape.as_list() 1630 [3, 5, 1] 1631 1632 When `antialias` is true, the sampling filter will anti-alias the input image 1633 as well as interpolate. When downsampling an image with [anti-aliasing]( 1634 https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter 1635 kernel is scaled in order to properly anti-alias the input image signal. 1636 `antialias` has no effect when upsampling an image: 1637 1638 >>> a = tf.image.resize(image, [5,10]) 1639 >>> b = tf.image.resize(image, [5,10], antialias=True) 1640 >>> tf.reduce_max(abs(a - b)).numpy() 1641 0.0 1642 1643 The `method` argument expects an item from the `image.ResizeMethod` enum, or 1644 the string equivalent. The options are: 1645 1646 * <b>`bilinear`</b>: [Bilinear interpolation.]( 1647 https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is 1648 true, becomes a hat/tent filter function with radius 1 when downsampling. 1649 * <b>`lanczos3`</b>: [Lanczos kernel]( 1650 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3. 1651 High-quality practical filter but may have some ringing, especially on 1652 synthetic images. 1653 * <b>`lanczos5`</b>: [Lanczos kernel] ( 1654 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5. 1655 Very-high-quality filter but may have stronger ringing. 1656 * <b>`bicubic`</b>: [Cubic interpolant]( 1657 https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to 1658 Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel, 1659 particularly when upsampling. 1660 * <b>`gaussian`</b>: [Gaussian kernel]( 1661 https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3, 1662 sigma = 1.5 / 3.0. 1663 * <b>`nearest`</b>: [Nearest neighbor interpolation.]( 1664 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 1665 `antialias` has no effect when used with nearest neighbor interpolation. 1666 * <b>`area`</b>: Anti-aliased resampling with area interpolation. 1667 `antialias` has no effect when used with area interpolation; it 1668 always anti-aliases. 1669 * <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter. 1670 For synthetic images (especially those lacking proper prefiltering), less 1671 ringing than Keys cubic kernel but less sharp. 1672 1673 Note: Near image edges the filtering kernel may be partially outside the 1674 image boundaries. For these pixels, only input pixels inside the image will be 1675 included in the filter sum, and the output value will be appropriately 1676 normalized. 1677 1678 The return value has type `float32`, unless the `method` is 1679 `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype 1680 of `images`: 1681 1682 >>> nn = tf.image.resize(image, [5,7], method='nearest') 1683 >>> nn[0,...,0].numpy() 1684 array([[1, 0, 0, 0, 0, 0, 0], 1685 [0, 1, 1, 0, 0, 0, 0], 1686 [0, 0, 0, 1, 0, 0, 0], 1687 [0, 0, 0, 0, 1, 1, 0], 1688 [0, 0, 0, 0, 0, 0, 1]], dtype=int32) 1689 1690 With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size` 1691 is the maximum for each dimension: 1692 1693 >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True) 1694 >>> max_10_20.shape.as_list() 1695 [1, 10, 10, 1] 1696 1697 Args: 1698 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1699 of shape `[height, width, channels]`. 1700 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 1701 size for the images. 1702 method: An `image.ResizeMethod`, or string equivalent. Defaults to 1703 `bilinear`. 1704 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 1705 then `images` will be resized to a size that fits in `size` while 1706 preserving the aspect ratio of the original image. Scales up the image if 1707 `size` is bigger than the current size of the `image`. Defaults to False. 1708 antialias: Whether to use an anti-aliasing filter when downsampling an 1709 image. 1710 name: A name for this operation (optional). 1711 1712 Raises: 1713 ValueError: if the shape of `images` is incompatible with the 1714 shape arguments to this function 1715 ValueError: if `size` has an invalid shape or type. 1716 ValueError: if an unsupported resize method is specified. 1717 1718 Returns: 1719 If `images` was 4-D, a 4-D float Tensor of shape 1720 `[batch, new_height, new_width, channels]`. 1721 If `images` was 3-D, a 3-D float Tensor of shape 1722 `[new_height, new_width, channels]`. 1723 """ 1724 1725 def resize_fn(images_t, new_size): 1726 """Resize core function, passed to _resize_images_common.""" 1727 scale_and_translate_methods = [ 1728 ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN, 1729 ResizeMethod.MITCHELLCUBIC 1730 ] 1731 1732 def resize_with_scale_and_translate(method): 1733 scale = ( 1734 math_ops.cast(new_size, dtype=dtypes.float32) / 1735 math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32)) 1736 return gen_image_ops.scale_and_translate( 1737 images_t, 1738 new_size, 1739 scale, 1740 array_ops.zeros([2]), 1741 kernel_type=method, 1742 antialias=antialias) 1743 1744 if method == ResizeMethod.BILINEAR: 1745 if antialias: 1746 return resize_with_scale_and_translate('triangle') 1747 else: 1748 return gen_image_ops.resize_bilinear( 1749 images_t, new_size, half_pixel_centers=True) 1750 elif method == ResizeMethod.NEAREST_NEIGHBOR: 1751 return gen_image_ops.resize_nearest_neighbor( 1752 images_t, new_size, half_pixel_centers=True) 1753 elif method == ResizeMethod.BICUBIC: 1754 if antialias: 1755 return resize_with_scale_and_translate('keyscubic') 1756 else: 1757 return gen_image_ops.resize_bicubic( 1758 images_t, new_size, half_pixel_centers=True) 1759 elif method == ResizeMethod.AREA: 1760 return gen_image_ops.resize_area(images_t, new_size) 1761 elif method in scale_and_translate_methods: 1762 return resize_with_scale_and_translate(method) 1763 else: 1764 raise ValueError('Resize method is not implemented: {}'.format(method)) 1765 1766 return _resize_images_common( 1767 images, 1768 resize_fn, 1769 size, 1770 preserve_aspect_ratio=preserve_aspect_ratio, 1771 name=name, 1772 skip_resize_if_same=False) 1773 1774 1775def _resize_image_with_pad_common(image, target_height, target_width, 1776 resize_fn): 1777 """Core functionality for v1 and v2 resize_image_with_pad functions.""" 1778 with ops.name_scope(None, 'resize_image_with_pad', [image]): 1779 image = ops.convert_to_tensor(image, name='image') 1780 image_shape = image.get_shape() 1781 is_batch = True 1782 if image_shape.ndims == 3: 1783 is_batch = False 1784 image = array_ops.expand_dims(image, 0) 1785 elif image_shape.ndims is None: 1786 is_batch = False 1787 image = array_ops.expand_dims(image, 0) 1788 image.set_shape([None] * 4) 1789 elif image_shape.ndims != 4: 1790 raise ValueError( 1791 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1792 image_shape) 1793 1794 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1795 assert_ops += _assert(target_width > 0, ValueError, 1796 'target_width must be > 0.') 1797 assert_ops += _assert(target_height > 0, ValueError, 1798 'target_height must be > 0.') 1799 1800 image = control_flow_ops.with_dependencies(assert_ops, image) 1801 1802 def max_(x, y): 1803 if _is_tensor(x) or _is_tensor(y): 1804 return math_ops.maximum(x, y) 1805 else: 1806 return max(x, y) 1807 1808 _, height, width, _ = _ImageDimensions(image, rank=4) 1809 1810 # convert values to float, to ease divisions 1811 f_height = math_ops.cast(height, dtype=dtypes.float32) 1812 f_width = math_ops.cast(width, dtype=dtypes.float32) 1813 f_target_height = math_ops.cast(target_height, dtype=dtypes.float32) 1814 f_target_width = math_ops.cast(target_width, dtype=dtypes.float32) 1815 1816 # Find the ratio by which the image must be adjusted 1817 # to fit within the target 1818 ratio = max_(f_width / f_target_width, f_height / f_target_height) 1819 resized_height_float = f_height / ratio 1820 resized_width_float = f_width / ratio 1821 resized_height = math_ops.cast( 1822 math_ops.floor(resized_height_float), dtype=dtypes.int32) 1823 resized_width = math_ops.cast( 1824 math_ops.floor(resized_width_float), dtype=dtypes.int32) 1825 1826 padding_height = (f_target_height - resized_height_float) / 2 1827 padding_width = (f_target_width - resized_width_float) / 2 1828 f_padding_height = math_ops.floor(padding_height) 1829 f_padding_width = math_ops.floor(padding_width) 1830 p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32)) 1831 p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32)) 1832 1833 # Resize first, then pad to meet requested dimensions 1834 resized = resize_fn(image, [resized_height, resized_width]) 1835 1836 padded = pad_to_bounding_box(resized, p_height, p_width, target_height, 1837 target_width) 1838 1839 if padded.get_shape().ndims is None: 1840 raise ValueError('padded contains no shape.') 1841 1842 _ImageDimensions(padded, rank=4) 1843 1844 if not is_batch: 1845 padded = array_ops.squeeze(padded, axis=[0]) 1846 1847 return padded 1848 1849 1850@tf_export(v1=['image.resize_image_with_pad']) 1851@dispatch.add_dispatch_support 1852def resize_image_with_pad_v1(image, 1853 target_height, 1854 target_width, 1855 method=ResizeMethodV1.BILINEAR, 1856 align_corners=False): 1857 """Resizes and pads an image to a target width and height. 1858 1859 Resizes an image to a target width and height by keeping 1860 the aspect ratio the same without distortion. If the target 1861 dimensions don't match the image dimensions, the image 1862 is resized and then padded with zeroes to match requested 1863 dimensions. 1864 1865 Args: 1866 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1867 of shape `[height, width, channels]`. 1868 target_height: Target height. 1869 target_width: Target width. 1870 method: Method to use for resizing image. See `resize_images()` 1871 align_corners: bool. If True, the centers of the 4 corner pixels of the 1872 input and output tensors are aligned, preserving the values at the corner 1873 pixels. Defaults to `False`. 1874 1875 Raises: 1876 ValueError: if `target_height` or `target_width` are zero or negative. 1877 1878 Returns: 1879 Resized and padded image. 1880 If `images` was 4-D, a 4-D float Tensor of shape 1881 `[batch, new_height, new_width, channels]`. 1882 If `images` was 3-D, a 3-D float Tensor of shape 1883 `[new_height, new_width, channels]`. 1884 """ 1885 1886 def _resize_fn(im, new_size): 1887 return resize_images(im, new_size, method, align_corners=align_corners) 1888 1889 return _resize_image_with_pad_common(image, target_height, target_width, 1890 _resize_fn) 1891 1892 1893@tf_export('image.resize_with_pad', v1=[]) 1894@dispatch.add_dispatch_support 1895def resize_image_with_pad_v2(image, 1896 target_height, 1897 target_width, 1898 method=ResizeMethod.BILINEAR, 1899 antialias=False): 1900 """Resizes and pads an image to a target width and height. 1901 1902 Resizes an image to a target width and height by keeping 1903 the aspect ratio the same without distortion. If the target 1904 dimensions don't match the image dimensions, the image 1905 is resized and then padded with zeroes to match requested 1906 dimensions. 1907 1908 Args: 1909 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1910 of shape `[height, width, channels]`. 1911 target_height: Target height. 1912 target_width: Target width. 1913 method: Method to use for resizing image. See `image.resize()` 1914 antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'. 1915 1916 Raises: 1917 ValueError: if `target_height` or `target_width` are zero or negative. 1918 1919 Returns: 1920 Resized and padded image. 1921 If `images` was 4-D, a 4-D float Tensor of shape 1922 `[batch, new_height, new_width, channels]`. 1923 If `images` was 3-D, a 3-D float Tensor of shape 1924 `[new_height, new_width, channels]`. 1925 """ 1926 1927 def _resize_fn(im, new_size): 1928 return resize_images_v2(im, new_size, method, antialias=antialias) 1929 1930 return _resize_image_with_pad_common(image, target_height, target_width, 1931 _resize_fn) 1932 1933 1934@tf_export('image.per_image_standardization') 1935@dispatch.add_dispatch_support 1936def per_image_standardization(image): 1937 """Linearly scales each image in `image` to have mean 0 and variance 1. 1938 1939 For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`, 1940 where 1941 1942 - `mean` is the average of all values in `x` 1943 - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to 1944 protect against division by 0 when handling uniform images 1945 - `N` is the number of elements in `x` 1946 - `stddev` is the standard deviation of all values in `x` 1947 1948 Example Usage: 1949 1950 >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3]) 1951 >>> image # 3-D tensor 1952 <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy= 1953 array([[[ 1, 2, 3], 1954 [ 4, 5, 6]], 1955 [[ 7, 8, 9], 1956 [10, 11, 12]]], dtype=int32)> 1957 >>> new_image = tf.image.per_image_standardization(image) 1958 >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1 1959 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 1960 array([[[-1.593255 , -1.3035723 , -1.0138896 ], 1961 [-0.7242068 , -0.4345241 , -0.14484136]], 1962 [[ 0.14484136, 0.4345241 , 0.7242068 ], 1963 [ 1.0138896 , 1.3035723 , 1.593255 ]]], dtype=float32)> 1964 1965 Args: 1966 image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are 1967 the dimensions of each image. 1968 1969 Returns: 1970 A `Tensor` with the same shape as `image` and its dtype is `float32`. 1971 1972 Raises: 1973 ValueError: The shape of `image` has fewer than 3 dimensions. 1974 """ 1975 with ops.name_scope(None, 'per_image_standardization', [image]) as scope: 1976 image = ops.convert_to_tensor(image, name='image') 1977 image = _AssertAtLeast3DImage(image) 1978 1979 image = math_ops.cast(image, dtype=dtypes.float32) 1980 num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) 1981 image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) 1982 1983 # Apply a minimum normalization that protects us against uniform images. 1984 stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) 1985 min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) 1986 adjusted_stddev = math_ops.maximum(stddev, min_stddev) 1987 1988 image -= image_mean 1989 image = math_ops.divide(image, adjusted_stddev, name=scope) 1990 return image 1991 1992 1993@tf_export('image.random_brightness') 1994@dispatch.register_unary_elementwise_api 1995@dispatch.add_dispatch_support 1996def random_brightness(image, max_delta, seed=None): 1997 """Adjust the brightness of images by a random factor. 1998 1999 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 2000 interval `[-max_delta, max_delta)`. 2001 2002 For producing deterministic results given a `seed` value, use 2003 `tf.image.stateless_random_brightness`. Unlike using the `seed` param 2004 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2005 same results given the same seed independent of how many times the function is 2006 called, and independent of global seed settings (e.g. tf.random.set_seed). 2007 2008 Args: 2009 image: An image or images to adjust. 2010 max_delta: float, must be non-negative. 2011 seed: A Python integer. Used to create a random seed. See 2012 `tf.compat.v1.set_random_seed` for behavior. 2013 2014 Usage Example: 2015 2016 >>> x = [[[1.0, 2.0, 3.0], 2017 ... [4.0, 5.0, 6.0]], 2018 ... [[7.0, 8.0, 9.0], 2019 ... [10.0, 11.0, 12.0]]] 2020 >>> tf.image.random_brightness(x, 0.2) 2021 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2022 2023 Returns: 2024 The brightness-adjusted image(s). 2025 2026 Raises: 2027 ValueError: if `max_delta` is negative. 2028 """ 2029 if max_delta < 0: 2030 raise ValueError('max_delta must be non-negative.') 2031 2032 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 2033 return adjust_brightness(image, delta) 2034 2035 2036@tf_export('image.stateless_random_brightness', v1=[]) 2037@dispatch.register_unary_elementwise_api 2038@dispatch.add_dispatch_support 2039def stateless_random_brightness(image, max_delta, seed): 2040 """Adjust the brightness of images by a random factor deterministically. 2041 2042 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 2043 interval `[-max_delta, max_delta)`. 2044 2045 Guarantees the same results given the same `seed` independent of how many 2046 times the function is called, and independent of global seed settings (e.g. 2047 `tf.random.set_seed`). 2048 2049 Usage Example: 2050 2051 >>> x = [[[1.0, 2.0, 3.0], 2052 ... [4.0, 5.0, 6.0]], 2053 ... [[7.0, 8.0, 9.0], 2054 ... [10.0, 11.0, 12.0]]] 2055 >>> seed = (1, 2) 2056 >>> tf.image.stateless_random_brightness(x, 0.2, seed) 2057 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2058 array([[[ 1.1376241, 2.1376243, 3.1376243], 2059 [ 4.1376243, 5.1376243, 6.1376243]], 2060 [[ 7.1376243, 8.137624 , 9.137624 ], 2061 [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)> 2062 2063 Args: 2064 image: An image or images to adjust. 2065 max_delta: float, must be non-negative. 2066 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2067 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2068 2069 Returns: 2070 The brightness-adjusted image(s). 2071 2072 Raises: 2073 ValueError: if `max_delta` is negative. 2074 """ 2075 if max_delta < 0: 2076 raise ValueError('max_delta must be non-negative.') 2077 2078 delta = stateless_random_ops.stateless_random_uniform( 2079 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 2080 return adjust_brightness(image, delta) 2081 2082 2083@tf_export('image.random_contrast') 2084@dispatch.add_dispatch_support 2085def random_contrast(image, lower, upper, seed=None): 2086 """Adjust the contrast of an image or images by a random factor. 2087 2088 Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly 2089 picked in the interval `[lower, upper)`. 2090 2091 For producing deterministic results given a `seed` value, use 2092 `tf.image.stateless_random_contrast`. Unlike using the `seed` param 2093 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2094 same results given the same seed independent of how many times the function is 2095 called, and independent of global seed settings (e.g. tf.random.set_seed). 2096 2097 Args: 2098 image: An image tensor with 3 or more dimensions. 2099 lower: float. Lower bound for the random contrast factor. 2100 upper: float. Upper bound for the random contrast factor. 2101 seed: A Python integer. Used to create a random seed. See 2102 `tf.compat.v1.set_random_seed` for behavior. 2103 2104 Usage Example: 2105 2106 >>> x = [[[1.0, 2.0, 3.0], 2107 ... [4.0, 5.0, 6.0]], 2108 ... [[7.0, 8.0, 9.0], 2109 ... [10.0, 11.0, 12.0]]] 2110 >>> tf.image.random_contrast(x, 0.2, 0.5) 2111 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2112 2113 Returns: 2114 The contrast-adjusted image(s). 2115 2116 Raises: 2117 ValueError: if `upper <= lower` or if `lower < 0`. 2118 """ 2119 if upper <= lower: 2120 raise ValueError('upper must be > lower.') 2121 2122 if lower < 0: 2123 raise ValueError('lower must be non-negative.') 2124 2125 contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed) 2126 return adjust_contrast(image, contrast_factor) 2127 2128 2129@tf_export('image.stateless_random_contrast', v1=[]) 2130@dispatch.add_dispatch_support 2131def stateless_random_contrast(image, lower, upper, seed): 2132 """Adjust the contrast of images by a random factor deterministically. 2133 2134 Guarantees the same results given the same `seed` independent of how many 2135 times the function is called, and independent of global seed settings (e.g. 2136 `tf.random.set_seed`). 2137 2138 Args: 2139 image: An image tensor with 3 or more dimensions. 2140 lower: float. Lower bound for the random contrast factor. 2141 upper: float. Upper bound for the random contrast factor. 2142 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2143 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2144 2145 Usage Example: 2146 2147 >>> x = [[[1.0, 2.0, 3.0], 2148 ... [4.0, 5.0, 6.0]], 2149 ... [[7.0, 8.0, 9.0], 2150 ... [10.0, 11.0, 12.0]]] 2151 >>> seed = (1, 2) 2152 >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed) 2153 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2154 array([[[3.4605184, 4.4605184, 5.4605184], 2155 [4.820173 , 5.820173 , 6.820173 ]], 2156 [[6.179827 , 7.179827 , 8.179828 ], 2157 [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)> 2158 2159 Returns: 2160 The contrast-adjusted image(s). 2161 2162 Raises: 2163 ValueError: if `upper <= lower` or if `lower < 0`. 2164 """ 2165 if upper <= lower: 2166 raise ValueError('upper must be > lower.') 2167 2168 if lower < 0: 2169 raise ValueError('lower must be non-negative.') 2170 2171 contrast_factor = stateless_random_ops.stateless_random_uniform( 2172 shape=[], minval=lower, maxval=upper, seed=seed) 2173 return adjust_contrast(image, contrast_factor) 2174 2175 2176@tf_export('image.adjust_brightness') 2177@dispatch.register_unary_elementwise_api 2178@dispatch.add_dispatch_support 2179def adjust_brightness(image, delta): 2180 """Adjust the brightness of RGB or Grayscale images. 2181 2182 This is a convenience method that converts RGB images to float 2183 representation, adjusts their brightness, and then converts them back to the 2184 original data type. If several adjustments are chained, it is advisable to 2185 minimize the number of redundant conversions. 2186 2187 The value `delta` is added to all components of the tensor `image`. `image` is 2188 converted to `float` and scaled appropriately if it is in fixed-point 2189 representation, and `delta` is converted to the same data type. For regular 2190 images, `delta` should be in the range `(-1,1)`, as it is added to the image 2191 in floating point representation, where pixel values are in the `[0,1)` range. 2192 2193 Usage Example: 2194 2195 >>> x = [[[1.0, 2.0, 3.0], 2196 ... [4.0, 5.0, 6.0]], 2197 ... [[7.0, 8.0, 9.0], 2198 ... [10.0, 11.0, 12.0]]] 2199 >>> tf.image.adjust_brightness(x, delta=0.1) 2200 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2201 array([[[ 1.1, 2.1, 3.1], 2202 [ 4.1, 5.1, 6.1]], 2203 [[ 7.1, 8.1, 9.1], 2204 [10.1, 11.1, 12.1]]], dtype=float32)> 2205 2206 Args: 2207 image: RGB image or images to adjust. 2208 delta: A scalar. Amount to add to the pixel values. 2209 2210 Returns: 2211 A brightness-adjusted tensor of the same shape and type as `image`. 2212 """ 2213 with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name: 2214 image = ops.convert_to_tensor(image, name='image') 2215 # Remember original dtype to so we can convert back if needed 2216 orig_dtype = image.dtype 2217 2218 if orig_dtype in [dtypes.float16, dtypes.float32]: 2219 flt_image = image 2220 else: 2221 flt_image = convert_image_dtype(image, dtypes.float32) 2222 2223 adjusted = math_ops.add( 2224 flt_image, math_ops.cast(delta, flt_image.dtype), name=name) 2225 2226 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 2227 2228 2229@tf_export('image.adjust_contrast') 2230@dispatch.add_dispatch_support 2231def adjust_contrast(images, contrast_factor): 2232 """Adjust contrast of RGB or grayscale images. 2233 2234 This is a convenience method that converts RGB images to float 2235 representation, adjusts their contrast, and then converts them back to the 2236 original data type. If several adjustments are chained, it is advisable to 2237 minimize the number of redundant conversions. 2238 2239 `images` is a tensor of at least 3 dimensions. The last 3 dimensions are 2240 interpreted as `[height, width, channels]`. The other dimensions only 2241 represent a collection of images, such as `[batch, height, width, channels].` 2242 2243 Contrast is adjusted independently for each channel of each image. 2244 2245 For each channel, this Op computes the mean of the image pixels in the 2246 channel and then adjusts each component `x` of each pixel to 2247 `(x - mean) * contrast_factor + mean`. 2248 2249 `contrast_factor` must be in the interval `(-inf, inf)`. 2250 2251 Usage Example: 2252 2253 >>> x = [[[1.0, 2.0, 3.0], 2254 ... [4.0, 5.0, 6.0]], 2255 ... [[7.0, 8.0, 9.0], 2256 ... [10.0, 11.0, 12.0]]] 2257 >>> tf.image.adjust_contrast(x, 2.) 2258 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2259 array([[[-3.5, -2.5, -1.5], 2260 [ 2.5, 3.5, 4.5]], 2261 [[ 8.5, 9.5, 10.5], 2262 [14.5, 15.5, 16.5]]], dtype=float32)> 2263 2264 Args: 2265 images: Images to adjust. At least 3-D. 2266 contrast_factor: A float multiplier for adjusting contrast. 2267 2268 Returns: 2269 The contrast-adjusted image or images. 2270 """ 2271 with ops.name_scope(None, 'adjust_contrast', 2272 [images, contrast_factor]) as name: 2273 images = ops.convert_to_tensor(images, name='images') 2274 # Remember original dtype to so we can convert back if needed 2275 orig_dtype = images.dtype 2276 2277 if orig_dtype in (dtypes.float16, dtypes.float32): 2278 flt_images = images 2279 else: 2280 flt_images = convert_image_dtype(images, dtypes.float32) 2281 2282 adjusted = gen_image_ops.adjust_contrastv2( 2283 flt_images, contrast_factor=contrast_factor, name=name) 2284 2285 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 2286 2287 2288@tf_export('image.adjust_gamma') 2289@dispatch.register_unary_elementwise_api 2290@dispatch.add_dispatch_support 2291def adjust_gamma(image, gamma=1, gain=1): 2292 """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction). 2293 2294 on the input image. 2295 2296 Also known as Power Law Transform. This function converts the 2297 input images at first to float representation, then transforms them 2298 pixelwise according to the equation `Out = gain * In**gamma`, 2299 and then converts the back to the original data type. 2300 2301 Usage Example: 2302 2303 >>> x = [[[1.0, 2.0, 3.0], 2304 ... [4.0, 5.0, 6.0]], 2305 ... [[7.0, 8.0, 9.0], 2306 ... [10.0, 11.0, 12.0]]] 2307 >>> tf.image.adjust_gamma(x, 0.2) 2308 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2309 array([[[1. , 1.1486983, 1.2457309], 2310 [1.319508 , 1.3797297, 1.4309691]], 2311 [[1.4757731, 1.5157166, 1.5518456], 2312 [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)> 2313 2314 Args: 2315 image : RGB image or images to adjust. 2316 gamma : A scalar or tensor. Non-negative real number. 2317 gain : A scalar or tensor. The constant multiplier. 2318 2319 Returns: 2320 A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`. 2321 2322 Raises: 2323 ValueError: If gamma is negative. 2324 Notes: 2325 For gamma greater than 1, the histogram will shift towards left and 2326 the output image will be darker than the input image. 2327 For gamma less than 1, the histogram will shift towards right and 2328 the output image will be brighter than the input image. 2329 References: 2330 [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction) 2331 """ 2332 2333 with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: 2334 image = ops.convert_to_tensor(image, name='image') 2335 # Remember original dtype to so we can convert back if needed 2336 orig_dtype = image.dtype 2337 2338 if orig_dtype in [dtypes.float16, dtypes.float32]: 2339 flt_image = image 2340 else: 2341 flt_image = convert_image_dtype(image, dtypes.float32) 2342 2343 assert_op = _assert(gamma >= 0, ValueError, 2344 'Gamma should be a non-negative real number.') 2345 if assert_op: 2346 gamma = control_flow_ops.with_dependencies(assert_op, gamma) 2347 2348 # According to the definition of gamma correction. 2349 adjusted_img = gain * flt_image**gamma 2350 2351 return convert_image_dtype(adjusted_img, orig_dtype, saturate=True) 2352 2353 2354@tf_export('image.convert_image_dtype') 2355@dispatch.register_unary_elementwise_api 2356@dispatch.add_dispatch_support 2357def convert_image_dtype(image, dtype, saturate=False, name=None): 2358 """Convert `image` to `dtype`, scaling its values if needed. 2359 2360 The operation supports data types (for `image` and `dtype`) of 2361 `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, 2362 `float16`, `float32`, `float64`, `bfloat16`. 2363 2364 Images that are represented using floating point values are expected to have 2365 values in the range [0,1). Image data stored in integer data types are 2366 expected to have values in the range `[0,MAX]`, where `MAX` is the largest 2367 positive representable number for the data type. 2368 2369 This op converts between data types, scaling the values appropriately before 2370 casting. 2371 2372 Usage Example: 2373 2374 >>> x = [[[1, 2, 3], [4, 5, 6]], 2375 ... [[7, 8, 9], [10, 11, 12]]] 2376 >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8) 2377 >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False) 2378 <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy= 2379 array([[[0.00787, 0.01575, 0.02362], 2380 [0.0315 , 0.03937, 0.04724]], 2381 [[0.0551 , 0.063 , 0.07086], 2382 [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)> 2383 2384 Converting integer types to floating point types returns normalized floating 2385 point values in the range [0, 1); the values are normalized by the `MAX` value 2386 of the input dtype. Consider the following two examples: 2387 2388 >>> a = [[[1], [2]], [[3], [4]]] 2389 >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8) 2390 >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32) 2391 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2392 array([[[0.00787402], 2393 [0.01574803]], 2394 [[0.02362205], 2395 [0.03149606]]], dtype=float32)> 2396 2397 >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32) 2398 >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32) 2399 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2400 array([[[4.6566129e-10], 2401 [9.3132257e-10]], 2402 [[1.3969839e-09], 2403 [1.8626451e-09]]], dtype=float32)> 2404 2405 Despite having identical values of `a` and output dtype of `float32`, the 2406 outputs differ due to the different input dtypes (`int8` vs. `int32`). This 2407 is, again, because the values are normalized by the `MAX` value of the input 2408 dtype. 2409 2410 Note that converting floating point values to integer type may lose precision. 2411 In the example below, an image tensor `b` of dtype `float32` is converted to 2412 `int8` and back to `float32`. The final output, however, is different from 2413 the original input `b` due to precision loss. 2414 2415 >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]] 2416 >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32) 2417 >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8) 2418 >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32) 2419 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2420 array([[[0.11811024], 2421 [0.33858266]], 2422 [[0.5590551 ], 2423 [0.77952754]]], dtype=float32)> 2424 2425 Scaling up from an integer type (input dtype) to another integer type (output 2426 dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting 2427 back and forth should result in no change. For example, as shown below, the 2428 `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767) 2429 but, when scaled back, we get the same, original values of `c`. 2430 2431 >>> c = [[[1], [2]], [[127], [127]]] 2432 >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8) 2433 >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16) 2434 >>> print(c_int16) 2435 tf.Tensor( 2436 [[[ 256] 2437 [ 512]] 2438 [[32512] 2439 [32512]]], shape=(2, 2, 1), dtype=int16) 2440 >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8) 2441 >>> print(c_int8_back) 2442 tf.Tensor( 2443 [[[ 1] 2444 [ 2]] 2445 [[127] 2446 [127]]], shape=(2, 2, 1), dtype=int8) 2447 2448 Scaling down from an integer type to another integer type can be a lossy 2449 conversion. Notice in the example below that converting `int16` to `uint8` and 2450 back to `int16` has lost precision. 2451 2452 >>> d = [[[1000], [2000]], [[3000], [4000]]] 2453 >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16) 2454 >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8) 2455 >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16) 2456 >>> print(d_int16_back) 2457 tf.Tensor( 2458 [[[ 896] 2459 [1920]] 2460 [[2944] 2461 [3968]]], shape=(2, 2, 1), dtype=int16) 2462 2463 Note that converting from floating point inputs to integer types may lead to 2464 over/underflow problems. Set saturate to `True` to avoid such problem in 2465 problematic conversions. If enabled, saturation will clip the output into the 2466 allowed range before performing a potentially dangerous cast (and only before 2467 performing such a cast, i.e., when casting from a floating point to an integer 2468 type, and when casting from a signed to an unsigned type; `saturate` has no 2469 effect on casts between floats, or on casts that increase the type's range). 2470 2471 Args: 2472 image: An image. 2473 dtype: A `DType` to convert `image` to. 2474 saturate: If `True`, clip the input before casting (if necessary). 2475 name: A name for this operation (optional). 2476 2477 Returns: 2478 `image`, converted to `dtype`. 2479 2480 Raises: 2481 AttributeError: Raises an attribute error when dtype is neither 2482 float nor integer. 2483 """ 2484 image = ops.convert_to_tensor(image, name='image') 2485 dtype = dtypes.as_dtype(dtype) 2486 if not dtype.is_floating and not dtype.is_integer: 2487 raise AttributeError('dtype must be either floating point or integer') 2488 if dtype == image.dtype: 2489 return array_ops.identity(image, name=name) 2490 2491 with ops.name_scope(name, 'convert_image', [image]) as name: 2492 # Both integer: use integer multiplication in the larger range 2493 if image.dtype.is_integer and dtype.is_integer: 2494 scale_in = image.dtype.max 2495 scale_out = dtype.max 2496 if scale_in > scale_out: 2497 # Scaling down, scale first, then cast. The scaling factor will 2498 # cause in.max to be mapped to above out.max but below out.max+1, 2499 # so that the output is safely in the supported range. 2500 scale = (scale_in + 1) // (scale_out + 1) 2501 scaled = math_ops.floordiv(image, scale) 2502 2503 if saturate: 2504 return math_ops.saturate_cast(scaled, dtype, name=name) 2505 else: 2506 return math_ops.cast(scaled, dtype, name=name) 2507 else: 2508 # Scaling up, cast first, then scale. The scale will not map in.max to 2509 # out.max, but converting back and forth should result in no change. 2510 if saturate: 2511 cast = math_ops.saturate_cast(image, dtype) 2512 else: 2513 cast = math_ops.cast(image, dtype) 2514 scale = (scale_out + 1) // (scale_in + 1) 2515 return math_ops.multiply(cast, scale, name=name) 2516 elif image.dtype.is_floating and dtype.is_floating: 2517 # Both float: Just cast, no possible overflows in the allowed ranges. 2518 # Note: We're ignoring float overflows. If your image dynamic range 2519 # exceeds float range, you're on your own. 2520 return math_ops.cast(image, dtype, name=name) 2521 else: 2522 if image.dtype.is_integer: 2523 # Converting to float: first cast, then scale. No saturation possible. 2524 cast = math_ops.cast(image, dtype) 2525 scale = 1. / image.dtype.max 2526 return math_ops.multiply(cast, scale, name=name) 2527 else: 2528 # Converting from float: first scale, then cast 2529 scale = dtype.max + 0.5 # avoid rounding problems in the cast 2530 scaled = math_ops.multiply(image, scale) 2531 if saturate: 2532 return math_ops.saturate_cast(scaled, dtype, name=name) 2533 else: 2534 return math_ops.cast(scaled, dtype, name=name) 2535 2536 2537@tf_export('image.rgb_to_grayscale') 2538@dispatch.add_dispatch_support 2539def rgb_to_grayscale(images, name=None): 2540 """Converts one or more images from RGB to Grayscale. 2541 2542 Outputs a tensor of the same `DType` and rank as `images`. The size of the 2543 last dimension of the output is 1, containing the Grayscale value of the 2544 pixels. 2545 2546 >>> original = tf.constant([[[1.0, 2.0, 3.0]]]) 2547 >>> converted = tf.image.rgb_to_grayscale(original) 2548 >>> print(converted.numpy()) 2549 [[[1.81...]]] 2550 2551 Args: 2552 images: The RGB tensor to convert. The last dimension must have size 3 and 2553 should contain RGB values. 2554 name: A name for the operation (optional). 2555 2556 Returns: 2557 The converted grayscale image(s). 2558 """ 2559 with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name: 2560 images = ops.convert_to_tensor(images, name='images') 2561 # Remember original dtype to so we can convert back if needed 2562 orig_dtype = images.dtype 2563 flt_image = convert_image_dtype(images, dtypes.float32) 2564 2565 # Reference for converting between RGB and grayscale. 2566 # https://en.wikipedia.org/wiki/Luma_%28video%29 2567 rgb_weights = [0.2989, 0.5870, 0.1140] 2568 gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1]) 2569 gray_float = array_ops.expand_dims(gray_float, -1) 2570 return convert_image_dtype(gray_float, orig_dtype, name=name) 2571 2572 2573@tf_export('image.grayscale_to_rgb') 2574@dispatch.add_dispatch_support 2575def grayscale_to_rgb(images, name=None): 2576 """Converts one or more images from Grayscale to RGB. 2577 2578 Outputs a tensor of the same `DType` and rank as `images`. The size of the 2579 last dimension of the output is 3, containing the RGB value of the pixels. 2580 The input images' last dimension must be size 1. 2581 2582 >>> original = tf.constant([[[1.0], [2.0], [3.0]]]) 2583 >>> converted = tf.image.grayscale_to_rgb(original) 2584 >>> print(converted.numpy()) 2585 [[[1. 1. 1.] 2586 [2. 2. 2.] 2587 [3. 3. 3.]]] 2588 2589 Args: 2590 images: The Grayscale tensor to convert. The last dimension must be size 1. 2591 name: A name for the operation (optional). 2592 2593 Returns: 2594 The converted grayscale image(s). 2595 """ 2596 with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name: 2597 images = _AssertGrayscaleImage(images) 2598 2599 images = ops.convert_to_tensor(images, name='images') 2600 rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) 2601 shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] + 2602 [array_ops.expand_dims(3, 0)]) 2603 multiples = array_ops.concat(shape_list, 0) 2604 rgb = array_ops.tile(images, multiples, name=name) 2605 rgb.set_shape(images.get_shape()[:-1].concatenate([3])) 2606 return rgb 2607 2608 2609# pylint: disable=invalid-name 2610@tf_export('image.random_hue') 2611@dispatch.add_dispatch_support 2612def random_hue(image, max_delta, seed=None): 2613 """Adjust the hue of RGB images by a random factor. 2614 2615 Equivalent to `adjust_hue()` but uses a `delta` randomly 2616 picked in the interval `[-max_delta, max_delta)`. 2617 2618 `max_delta` must be in the interval `[0, 0.5]`. 2619 2620 Usage Example: 2621 2622 >>> x = [[[1.0, 2.0, 3.0], 2623 ... [4.0, 5.0, 6.0]], 2624 ... [[7.0, 8.0, 9.0], 2625 ... [10.0, 11.0, 12.0]]] 2626 >>> tf.image.random_hue(x, 0.2) 2627 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2628 2629 For producing deterministic results given a `seed` value, use 2630 `tf.image.stateless_random_hue`. Unlike using the `seed` param with 2631 `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same 2632 results given the same seed independent of how many times the function is 2633 called, and independent of global seed settings (e.g. tf.random.set_seed). 2634 2635 Args: 2636 image: RGB image or images. The size of the last dimension must be 3. 2637 max_delta: float. The maximum value for the random delta. 2638 seed: An operation-specific seed. It will be used in conjunction with the 2639 graph-level seed to determine the real seeds that will be used in this 2640 operation. Please see the documentation of set_random_seed for its 2641 interaction with the graph-level random seed. 2642 2643 Returns: 2644 Adjusted image(s), same shape and DType as `image`. 2645 2646 Raises: 2647 ValueError: if `max_delta` is invalid. 2648 """ 2649 if max_delta > 0.5: 2650 raise ValueError('max_delta must be <= 0.5.') 2651 2652 if max_delta < 0: 2653 raise ValueError('max_delta must be non-negative.') 2654 2655 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 2656 return adjust_hue(image, delta) 2657 2658 2659@tf_export('image.stateless_random_hue', v1=[]) 2660@dispatch.add_dispatch_support 2661def stateless_random_hue(image, max_delta, seed): 2662 """Adjust the hue of RGB images by a random factor deterministically. 2663 2664 Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the 2665 interval `[-max_delta, max_delta)`. 2666 2667 Guarantees the same results given the same `seed` independent of how many 2668 times the function is called, and independent of global seed settings (e.g. 2669 `tf.random.set_seed`). 2670 2671 `max_delta` must be in the interval `[0, 0.5]`. 2672 2673 Usage Example: 2674 2675 >>> x = [[[1.0, 2.0, 3.0], 2676 ... [4.0, 5.0, 6.0]], 2677 ... [[7.0, 8.0, 9.0], 2678 ... [10.0, 11.0, 12.0]]] 2679 >>> seed = (1, 2) 2680 >>> tf.image.stateless_random_hue(x, 0.2, seed) 2681 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2682 array([[[ 1.6514902, 1. , 3. ], 2683 [ 4.65149 , 4. , 6. ]], 2684 [[ 7.65149 , 7. , 9. ], 2685 [10.65149 , 10. , 12. ]]], dtype=float32)> 2686 2687 Args: 2688 image: RGB image or images. The size of the last dimension must be 3. 2689 max_delta: float. The maximum value for the random delta. 2690 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2691 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2692 2693 Returns: 2694 Adjusted image(s), same shape and DType as `image`. 2695 2696 Raises: 2697 ValueError: if `max_delta` is invalid. 2698 """ 2699 if max_delta > 0.5: 2700 raise ValueError('max_delta must be <= 0.5.') 2701 2702 if max_delta < 0: 2703 raise ValueError('max_delta must be non-negative.') 2704 2705 delta = stateless_random_ops.stateless_random_uniform( 2706 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 2707 return adjust_hue(image, delta) 2708 2709 2710@tf_export('image.adjust_hue') 2711@dispatch.add_dispatch_support 2712def adjust_hue(image, delta, name=None): 2713 """Adjust hue of RGB images. 2714 2715 This is a convenience method that converts an RGB image to float 2716 representation, converts it to HSV, adds an offset to the 2717 hue channel, converts back to RGB and then back to the original 2718 data type. If several adjustments are chained it is advisable to minimize 2719 the number of redundant conversions. 2720 2721 `image` is an RGB image. The image hue is adjusted by converting the 2722 image(s) to HSV and rotating the hue channel (H) by 2723 `delta`. The image is then converted back to RGB. 2724 2725 `delta` must be in the interval `[-1, 1]`. 2726 2727 Usage Example: 2728 2729 >>> x = [[[1.0, 2.0, 3.0], 2730 ... [4.0, 5.0, 6.0]], 2731 ... [[7.0, 8.0, 9.0], 2732 ... [10.0, 11.0, 12.0]]] 2733 >>> tf.image.adjust_hue(x, 0.2) 2734 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2735 array([[[ 2.3999996, 1. , 3. ], 2736 [ 5.3999996, 4. , 6. ]], 2737 [[ 8.4 , 7. , 9. ], 2738 [11.4 , 10. , 12. ]]], dtype=float32)> 2739 2740 Args: 2741 image: RGB image or images. The size of the last dimension must be 3. 2742 delta: float. How much to add to the hue channel. 2743 name: A name for this operation (optional). 2744 2745 Returns: 2746 Adjusted image(s), same shape and DType as `image`. 2747 2748 Raises: 2749 InvalidArgumentError: image must have at least 3 dimensions. 2750 InvalidArgumentError: The size of the last dimension must be 3. 2751 ValueError: if `delta` is not in the interval of `[-1, 1]`. 2752 2753 Usage Example: 2754 2755 >>> image = [[[1, 2, 3], [4, 5, 6]], 2756 ... [[7, 8, 9], [10, 11, 12]], 2757 ... [[13, 14, 15], [16, 17, 18]]] 2758 >>> image = tf.constant(image) 2759 >>> tf.image.adjust_hue(image, 0.2) 2760 <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy= 2761 array([[[ 2, 1, 3], 2762 [ 5, 4, 6]], 2763 [[ 8, 7, 9], 2764 [11, 10, 12]], 2765 [[14, 13, 15], 2766 [17, 16, 18]]], dtype=int32)> 2767 """ 2768 with ops.name_scope(name, 'adjust_hue', [image]) as name: 2769 if context.executing_eagerly(): 2770 if delta < -1 or delta > 1: 2771 raise ValueError('delta must be in the interval [-1, 1]') 2772 image = ops.convert_to_tensor(image, name='image') 2773 # Remember original dtype to so we can convert back if needed 2774 orig_dtype = image.dtype 2775 if orig_dtype in (dtypes.float16, dtypes.float32): 2776 flt_image = image 2777 else: 2778 flt_image = convert_image_dtype(image, dtypes.float32) 2779 2780 rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) 2781 2782 return convert_image_dtype(rgb_altered, orig_dtype) 2783 2784 2785# pylint: disable=invalid-name 2786@tf_export('image.random_jpeg_quality') 2787@dispatch.add_dispatch_support 2788def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None): 2789 """Randomly changes jpeg encoding quality for inducing jpeg noise. 2790 2791 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 2792 `max_jpeg_quality`. 2793 `max_jpeg_quality` must be in the interval `[0, 100]`. 2794 2795 Usage Example: 2796 2797 >>> x = tf.constant([[[1, 2, 3], 2798 ... [4, 5, 6]], 2799 ... [[7, 8, 9], 2800 ... [10, 11, 12]]], dtype=tf.uint8) 2801 >>> tf.image.random_jpeg_quality(x, 75, 95) 2802 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=...> 2803 2804 For producing deterministic results given a `seed` value, use 2805 `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param 2806 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2807 same results given the same seed independent of how many times the function is 2808 called, and independent of global seed settings (e.g. tf.random.set_seed). 2809 2810 Args: 2811 image: 3D image. Size of the last dimension must be 1 or 3. 2812 min_jpeg_quality: Minimum jpeg encoding quality to use. 2813 max_jpeg_quality: Maximum jpeg encoding quality to use. 2814 seed: An operation-specific seed. It will be used in conjunction with the 2815 graph-level seed to determine the real seeds that will be used in this 2816 operation. Please see the documentation of set_random_seed for its 2817 interaction with the graph-level random seed. 2818 2819 Returns: 2820 Adjusted image(s), same shape and DType as `image`. 2821 2822 Raises: 2823 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 2824 """ 2825 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 2826 max_jpeg_quality > 100): 2827 raise ValueError('jpeg encoding range must be between 0 and 100.') 2828 2829 if min_jpeg_quality >= max_jpeg_quality: 2830 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 2831 2832 jpeg_quality = random_ops.random_uniform([], 2833 min_jpeg_quality, 2834 max_jpeg_quality, 2835 seed=seed, 2836 dtype=dtypes.int32) 2837 return adjust_jpeg_quality(image, jpeg_quality) 2838 2839 2840@tf_export('image.stateless_random_jpeg_quality', v1=[]) 2841@dispatch.add_dispatch_support 2842def stateless_random_jpeg_quality(image, 2843 min_jpeg_quality, 2844 max_jpeg_quality, 2845 seed): 2846 """Deterministically radomize jpeg encoding quality for inducing jpeg noise. 2847 2848 Guarantees the same results given the same `seed` independent of how many 2849 times the function is called, and independent of global seed settings (e.g. 2850 `tf.random.set_seed`). 2851 2852 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 2853 `max_jpeg_quality`. 2854 `max_jpeg_quality` must be in the interval `[0, 100]`. 2855 2856 Usage Example: 2857 2858 >>> x = tf.constant([[[1, 2, 3], 2859 ... [4, 5, 6]], 2860 ... [[7, 8, 9], 2861 ... [10, 11, 12]]], dtype=tf.uint8) 2862 >>> seed = (1, 2) 2863 >>> tf.image.stateless_random_jpeg_quality(x, 75, 95, seed) 2864 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy= 2865 array([[[ 0, 4, 5], 2866 [ 1, 5, 6]], 2867 [[ 5, 9, 10], 2868 [ 5, 9, 10]]], dtype=uint8)> 2869 2870 Args: 2871 image: 3D image. Size of the last dimension must be 1 or 3. 2872 min_jpeg_quality: Minimum jpeg encoding quality to use. 2873 max_jpeg_quality: Maximum jpeg encoding quality to use. 2874 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2875 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2876 2877 Returns: 2878 Adjusted image(s), same shape and DType as `image`. 2879 2880 Raises: 2881 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 2882 """ 2883 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 2884 max_jpeg_quality > 100): 2885 raise ValueError('jpeg encoding range must be between 0 and 100.') 2886 2887 if min_jpeg_quality >= max_jpeg_quality: 2888 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 2889 2890 jpeg_quality = stateless_random_ops.stateless_random_uniform( 2891 shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed, 2892 dtype=dtypes.int32) 2893 return adjust_jpeg_quality(image, jpeg_quality) 2894 2895 2896@tf_export('image.adjust_jpeg_quality') 2897@dispatch.add_dispatch_support 2898def adjust_jpeg_quality(image, jpeg_quality, name=None): 2899 """Adjust jpeg encoding quality of an image. 2900 2901 This is a convenience method that converts an image to uint8 representation, 2902 encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back 2903 to the original data type. 2904 2905 `jpeg_quality` must be in the interval `[0, 100]`. 2906 2907 Usage Examples: 2908 2909 >>> x = [[[0.01, 0.02, 0.03], 2910 ... [0.04, 0.05, 0.06]], 2911 ... [[0.07, 0.08, 0.09], 2912 ... [0.10, 0.11, 0.12]]] 2913 >>> x_jpeg = tf.image.adjust_jpeg_quality(x, 75) 2914 >>> x_jpeg.numpy() 2915 array([[[0.00392157, 0.01960784, 0.03137255], 2916 [0.02745098, 0.04313726, 0.05490196]], 2917 [[0.05882353, 0.07450981, 0.08627451], 2918 [0.08235294, 0.09803922, 0.10980393]]], dtype=float32) 2919 2920 Note that floating point values are expected to have values in the range 2921 [0,1) and values outside this range are clipped. 2922 2923 >>> x = [[[1.0, 2.0, 3.0], 2924 ... [4.0, 5.0, 6.0]], 2925 ... [[7.0, 8.0, 9.0], 2926 ... [10.0, 11.0, 12.0]]] 2927 >>> tf.image.adjust_jpeg_quality(x, 75) 2928 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2929 array([[[1., 1., 1.], 2930 [1., 1., 1.]], 2931 [[1., 1., 1.], 2932 [1., 1., 1.]]], dtype=float32)> 2933 2934 Note that `jpeg_quality` 100 is still lossy compresson. 2935 2936 >>> x = tf.constant([[[1, 2, 3], 2937 ... [4, 5, 6]], 2938 ... [[7, 8, 9], 2939 ... [10, 11, 12]]], dtype=tf.uint8) 2940 >>> tf.image.adjust_jpeg_quality(x, 100) 2941 <tf.Tensor: shape(2, 2, 3), dtype=uint8, numpy= 2942 array([[[ 0, 1, 3], 2943 [ 3, 4, 6]], 2944 [[ 6, 7, 9], 2945 [ 9, 10, 12]]], dtype=uint8)> 2946 2947 Args: 2948 image: 3D image. The size of the last dimension must be None, 1 or 3. 2949 jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality. 2950 name: A name for this operation (optional). 2951 2952 Returns: 2953 Adjusted image, same shape and DType as `image`. 2954 2955 Raises: 2956 InvalidArgumentError: quality must be in [0,100] 2957 InvalidArgumentError: image must have 1 or 3 channels 2958 """ 2959 with ops.name_scope(name, 'adjust_jpeg_quality', [image]): 2960 image = ops.convert_to_tensor(image, name='image') 2961 channels = image.shape.as_list()[-1] 2962 # Remember original dtype to so we can convert back if needed 2963 orig_dtype = image.dtype 2964 image = convert_image_dtype(image, dtypes.uint8, saturate=True) 2965 if not _is_tensor(jpeg_quality): 2966 # If jpeg_quality is a int (not tensor). 2967 jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32) 2968 image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality) 2969 2970 image = gen_image_ops.decode_jpeg(image, channels=channels) 2971 return convert_image_dtype(image, orig_dtype, saturate=True) 2972 2973 2974@tf_export('image.random_saturation') 2975@dispatch.add_dispatch_support 2976def random_saturation(image, lower, upper, seed=None): 2977 """Adjust the saturation of RGB images by a random factor. 2978 2979 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 2980 picked in the interval `[lower, upper)`. 2981 2982 Usage Example: 2983 2984 >>> x = [[[1.0, 2.0, 3.0], 2985 ... [4.0, 5.0, 6.0]], 2986 ... [[7.0, 8.0, 9.0], 2987 ... [10.0, 11.0, 12.0]]] 2988 >>> tf.image.random_saturation(x, 5, 10) 2989 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2990 array([[[ 0. , 1.5, 3. ], 2991 [ 0. , 3. , 6. ]], 2992 [[ 0. , 4.5, 9. ], 2993 [ 0. , 6. , 12. ]]], dtype=float32)> 2994 2995 For producing deterministic results given a `seed` value, use 2996 `tf.image.stateless_random_saturation`. Unlike using the `seed` param 2997 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2998 same results given the same seed independent of how many times the function is 2999 called, and independent of global seed settings (e.g. tf.random.set_seed). 3000 3001 Args: 3002 image: RGB image or images. The size of the last dimension must be 3. 3003 lower: float. Lower bound for the random saturation factor. 3004 upper: float. Upper bound for the random saturation factor. 3005 seed: An operation-specific seed. It will be used in conjunction with the 3006 graph-level seed to determine the real seeds that will be used in this 3007 operation. Please see the documentation of set_random_seed for its 3008 interaction with the graph-level random seed. 3009 3010 Returns: 3011 Adjusted image(s), same shape and DType as `image`. 3012 3013 Raises: 3014 ValueError: if `upper <= lower` or if `lower < 0`. 3015 """ 3016 if upper <= lower: 3017 raise ValueError('upper must be > lower.') 3018 3019 if lower < 0: 3020 raise ValueError('lower must be non-negative.') 3021 3022 saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed) 3023 return adjust_saturation(image, saturation_factor) 3024 3025 3026@tf_export('image.stateless_random_saturation', v1=[]) 3027@dispatch.add_dispatch_support 3028def stateless_random_saturation(image, lower, upper, seed=None): 3029 """Adjust the saturation of RGB images by a random factor deterministically. 3030 3031 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 3032 picked in the interval `[lower, upper)`. 3033 3034 Guarantees the same results given the same `seed` independent of how many 3035 times the function is called, and independent of global seed settings (e.g. 3036 `tf.random.set_seed`). 3037 3038 Usage Example: 3039 3040 >>> x = [[[1.0, 2.0, 3.0], 3041 ... [4.0, 5.0, 6.0]], 3042 ... [[7.0, 8.0, 9.0], 3043 ... [10.0, 11.0, 12.0]]] 3044 >>> seed = (1, 2) 3045 >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed) 3046 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 3047 array([[[ 1.1559395, 2.0779698, 3. ], 3048 [ 4.1559396, 5.07797 , 6. ]], 3049 [[ 7.1559396, 8.07797 , 9. ], 3050 [10.155939 , 11.07797 , 12. ]]], dtype=float32)> 3051 3052 Args: 3053 image: RGB image or images. The size of the last dimension must be 3. 3054 lower: float. Lower bound for the random saturation factor. 3055 upper: float. Upper bound for the random saturation factor. 3056 seed: A shape [2] Tensor, the seed to the random number generator. Must have 3057 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 3058 3059 Returns: 3060 Adjusted image(s), same shape and DType as `image`. 3061 3062 Raises: 3063 ValueError: if `upper <= lower` or if `lower < 0`. 3064 """ 3065 if upper <= lower: 3066 raise ValueError('upper must be > lower.') 3067 3068 if lower < 0: 3069 raise ValueError('lower must be non-negative.') 3070 3071 saturation_factor = stateless_random_ops.stateless_random_uniform( 3072 shape=[], minval=lower, maxval=upper, seed=seed) 3073 return adjust_saturation(image, saturation_factor) 3074 3075 3076@tf_export('image.adjust_saturation') 3077@dispatch.add_dispatch_support 3078def adjust_saturation(image, saturation_factor, name=None): 3079 """Adjust saturation of RGB images. 3080 3081 This is a convenience method that converts RGB images to float 3082 representation, converts them to HSV, adds an offset to the 3083 saturation channel, converts back to RGB and then back to the original 3084 data type. If several adjustments are chained it is advisable to minimize 3085 the number of redundant conversions. 3086 3087 `image` is an RGB image or images. The image saturation is adjusted by 3088 converting the images to HSV and multiplying the saturation (S) channel by 3089 `saturation_factor` and clipping. The images are then converted back to RGB. 3090 3091 `saturation_factor` must be in the interval `[0, inf)`. 3092 3093 Usage Example: 3094 3095 >>> x = [[[1.0, 2.0, 3.0], 3096 ... [4.0, 5.0, 6.0]], 3097 ... [[7.0, 8.0, 9.0], 3098 ... [10.0, 11.0, 12.0]]] 3099 >>> tf.image.adjust_saturation(x, 0.5) 3100 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 3101 array([[[ 2. , 2.5, 3. ], 3102 [ 5. , 5.5, 6. ]], 3103 [[ 8. , 8.5, 9. ], 3104 [11. , 11.5, 12. ]]], dtype=float32)> 3105 3106 Args: 3107 image: RGB image or images. The size of the last dimension must be 3. 3108 saturation_factor: float. Factor to multiply the saturation by. 3109 name: A name for this operation (optional). 3110 3111 Returns: 3112 Adjusted image(s), same shape and DType as `image`. 3113 3114 Raises: 3115 InvalidArgumentError: input must have 3 channels 3116 """ 3117 with ops.name_scope(name, 'adjust_saturation', [image]) as name: 3118 image = ops.convert_to_tensor(image, name='image') 3119 # Remember original dtype to so we can convert back if needed 3120 orig_dtype = image.dtype 3121 if orig_dtype in (dtypes.float16, dtypes.float32): 3122 flt_image = image 3123 else: 3124 flt_image = convert_image_dtype(image, dtypes.float32) 3125 3126 adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor) 3127 3128 return convert_image_dtype(adjusted, orig_dtype) 3129 3130 3131@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg']) 3132def is_jpeg(contents, name=None): 3133 r"""Convenience function to check if the 'contents' encodes a JPEG image. 3134 3135 Args: 3136 contents: 0-D `string`. The encoded image bytes. 3137 name: A name for the operation (optional) 3138 3139 Returns: 3140 A scalar boolean tensor indicating if 'contents' may be a JPEG image. 3141 is_jpeg is susceptible to false positives. 3142 """ 3143 # Normal JPEGs start with \xff\xd8\xff\xe0 3144 # JPEG with EXIF starts with \xff\xd8\xff\xe1 3145 # Use \xff\xd8\xff to cover both. 3146 with ops.name_scope(name, 'is_jpeg'): 3147 substr = string_ops.substr(contents, 0, 3) 3148 return math_ops.equal(substr, b'\xff\xd8\xff', name=name) 3149 3150 3151def _is_png(contents, name=None): 3152 r"""Convenience function to check if the 'contents' encodes a PNG image. 3153 3154 Args: 3155 contents: 0-D `string`. The encoded image bytes. 3156 name: A name for the operation (optional) 3157 3158 Returns: 3159 A scalar boolean tensor indicating if 'contents' may be a PNG image. 3160 is_png is susceptible to false positives. 3161 """ 3162 with ops.name_scope(name, 'is_png'): 3163 substr = string_ops.substr(contents, 0, 3) 3164 return math_ops.equal(substr, b'\211PN', name=name) 3165 3166 3167tf_export( 3168 'io.decode_and_crop_jpeg', 3169 'image.decode_and_crop_jpeg', 3170 v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])( 3171 dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg)) 3172 3173tf_export( 3174 'io.decode_bmp', 3175 'image.decode_bmp', 3176 v1=['io.decode_bmp', 'image.decode_bmp'])( 3177 dispatch.add_dispatch_support(gen_image_ops.decode_bmp)) 3178tf_export( 3179 'io.decode_gif', 3180 'image.decode_gif', 3181 v1=['io.decode_gif', 'image.decode_gif'])( 3182 dispatch.add_dispatch_support(gen_image_ops.decode_gif)) 3183tf_export( 3184 'io.decode_jpeg', 3185 'image.decode_jpeg', 3186 v1=['io.decode_jpeg', 'image.decode_jpeg'])( 3187 dispatch.add_dispatch_support(gen_image_ops.decode_jpeg)) 3188tf_export( 3189 'io.decode_png', 3190 'image.decode_png', 3191 v1=['io.decode_png', 'image.decode_png'])( 3192 dispatch.add_dispatch_support(gen_image_ops.decode_png)) 3193 3194tf_export( 3195 'io.encode_jpeg', 3196 'image.encode_jpeg', 3197 v1=['io.encode_jpeg', 'image.encode_jpeg'])( 3198 dispatch.add_dispatch_support(gen_image_ops.encode_jpeg)) 3199tf_export( 3200 'io.extract_jpeg_shape', 3201 'image.extract_jpeg_shape', 3202 v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])( 3203 dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape)) 3204 3205 3206@tf_export('io.encode_png', 'image.encode_png') 3207@dispatch.add_dispatch_support 3208def encode_png(image, compression=-1, name=None): 3209 r"""PNG-encode an image. 3210 3211 `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` 3212 where `channels` is: 3213 3214 * 1: for grayscale. 3215 * 2: for grayscale + alpha. 3216 * 3: for RGB. 3217 * 4: for RGBA. 3218 3219 The ZLIB compression level, `compression`, can be -1 for the PNG-encoder 3220 default or a value from 0 to 9. 9 is the highest compression level, 3221 generating the smallest output, but is slower. 3222 3223 Args: 3224 image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`. 3225 3-D with shape `[height, width, channels]`. 3226 compression: An optional `int`. Defaults to `-1`. Compression level. 3227 name: A name for the operation (optional). 3228 3229 Returns: 3230 A `Tensor` of type `string`. 3231 """ 3232 return gen_image_ops.encode_png( 3233 ops.convert_to_tensor(image), compression, name) 3234 3235 3236@tf_export( 3237 'io.decode_image', 3238 'image.decode_image', 3239 v1=['io.decode_image', 'image.decode_image']) 3240@dispatch.add_dispatch_support 3241def decode_image(contents, 3242 channels=None, 3243 dtype=dtypes.uint8, 3244 name=None, 3245 expand_animations=True): 3246 """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. 3247 3248 Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the 3249 appropriate operation to convert the input bytes `string` into a `Tensor` 3250 of type `dtype`. 3251 3252 Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as 3253 opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D 3254 arrays `[height, width, num_channels]`. Make sure to take this into account 3255 when constructing your graph if you are intermixing GIF files with BMP, JPEG, 3256 and/or PNG files. Alternately, set the `expand_animations` argument of this 3257 function to `False`, in which case the op will return 3-dimensional tensors 3258 and will truncate animated GIF files to the first frame. 3259 3260 NOTE: If the first frame of an animated GIF does not occupy the entire 3261 canvas (maximum frame width x maximum frame height), then it fills the 3262 unoccupied areas (in the first frame) with zeros (black). For frames after the 3263 first frame that does not occupy the entire canvas, it uses the previous 3264 frame to fill the unoccupied areas. 3265 3266 Args: 3267 contents: A `Tensor` of type `string`. 0-D. The encoded image bytes. 3268 channels: An optional `int`. Defaults to `0`. Number of color channels for 3269 the decoded image. 3270 dtype: The desired DType of the returned `Tensor`. 3271 name: A name for the operation (optional) 3272 expand_animations: An optional `bool`. Defaults to `True`. Controls the 3273 shape of the returned op's output. If `True`, the returned op will produce 3274 a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs, 3275 whether animated or not. If, `False`, the returned op will produce a 3-D 3276 tensor for all file types and will truncate animated GIFs to the first 3277 frame. 3278 3279 Returns: 3280 `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on 3281 the file type and the value of the `expand_animations` parameter. 3282 3283 Raises: 3284 ValueError: On incorrect number of channels. 3285 """ 3286 with ops.name_scope(name, 'decode_image'): 3287 channels = 0 if channels is None else channels 3288 if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]: 3289 dest_dtype = dtype 3290 dtype = dtypes.uint16 3291 return convert_image_dtype( 3292 gen_image_ops.decode_image( 3293 contents=contents, 3294 channels=channels, 3295 expand_animations=expand_animations, 3296 dtype=dtype), dest_dtype) 3297 else: 3298 return gen_image_ops.decode_image( 3299 contents=contents, 3300 channels=channels, 3301 expand_animations=expand_animations, 3302 dtype=dtype) 3303 3304 3305@tf_export('image.total_variation') 3306@dispatch.add_dispatch_support 3307def total_variation(images, name=None): 3308 """Calculate and return the total variation for one or more images. 3309 3310 The total variation is the sum of the absolute differences for neighboring 3311 pixel-values in the input images. This measures how much noise is in the 3312 images. 3313 3314 This can be used as a loss-function during optimization so as to suppress 3315 noise in images. If you have a batch of images, then you should calculate 3316 the scalar loss-value as the sum: 3317 `loss = tf.reduce_sum(tf.image.total_variation(images))` 3318 3319 This implements the anisotropic 2-D version of the formula described here: 3320 3321 https://en.wikipedia.org/wiki/Total_variation_denoising 3322 3323 Args: 3324 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 3325 of shape `[height, width, channels]`. 3326 name: A name for the operation (optional). 3327 3328 Raises: 3329 ValueError: if images.shape is not a 3-D or 4-D vector. 3330 3331 Returns: 3332 The total variation of `images`. 3333 3334 If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the 3335 total variation for each image in the batch. 3336 If `images` was 3-D, return a scalar float with the total variation for 3337 that image. 3338 """ 3339 3340 with ops.name_scope(name, 'total_variation'): 3341 ndims = images.get_shape().ndims 3342 3343 if ndims == 3: 3344 # The input is a single image with shape [height, width, channels]. 3345 3346 # Calculate the difference of neighboring pixel-values. 3347 # The images are shifted one pixel along the height and width by slicing. 3348 pixel_dif1 = images[1:, :, :] - images[:-1, :, :] 3349 pixel_dif2 = images[:, 1:, :] - images[:, :-1, :] 3350 3351 # Sum for all axis. (None is an alias for all axis.) 3352 sum_axis = None 3353 elif ndims == 4: 3354 # The input is a batch of images with shape: 3355 # [batch, height, width, channels]. 3356 3357 # Calculate the difference of neighboring pixel-values. 3358 # The images are shifted one pixel along the height and width by slicing. 3359 pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :] 3360 pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :] 3361 3362 # Only sum for the last 3 axis. 3363 # This results in a 1-D tensor with the total variation for each image. 3364 sum_axis = [1, 2, 3] 3365 else: 3366 raise ValueError('\'images\' must be either 3 or 4-dimensional.') 3367 3368 # Calculate the total variation by taking the absolute value of the 3369 # pixel-differences and summing over the appropriate axis. 3370 tot_var = ( 3371 math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) + 3372 math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis)) 3373 3374 return tot_var 3375 3376 3377@tf_export('image.sample_distorted_bounding_box', v1=[]) 3378@dispatch.add_dispatch_support 3379def sample_distorted_bounding_box_v2(image_size, 3380 bounding_boxes, 3381 seed=0, 3382 min_object_covered=0.1, 3383 aspect_ratio_range=None, 3384 area_range=None, 3385 max_attempts=None, 3386 use_image_if_no_bounding_boxes=None, 3387 name=None): 3388 """Generate a single randomly distorted bounding box for an image. 3389 3390 Bounding box annotations are often supplied in addition to ground-truth labels 3391 in image recognition or object localization tasks. A common technique for 3392 training such a system is to randomly distort an image while preserving 3393 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 3394 localization of an object, i.e. bounding box, given an `image_size`, 3395 `bounding_boxes` and a series of constraints. 3396 3397 The output of this Op is a single bounding box that may be used to crop the 3398 original image. The output is returned as 3 tensors: `begin`, `size` and 3399 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3400 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3401 visualize what the bounding box looks like. 3402 3403 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3404 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 3405 and the height of the underlying image. 3406 3407 For example, 3408 3409 ```python 3410 # Generate a single distorted bounding box. 3411 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 3412 tf.shape(image), 3413 bounding_boxes=bounding_boxes, 3414 min_object_covered=0.1) 3415 3416 # Draw the bounding box in an image summary. 3417 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 3418 bbox_for_draw) 3419 tf.compat.v1.summary.image('images_with_box', image_with_box) 3420 3421 # Employ the bounding box to distort the image. 3422 distorted_image = tf.slice(image, begin, size) 3423 ``` 3424 3425 Note that if no bounding box information is available, setting 3426 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 3427 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3428 false and no bounding boxes are supplied, an error is raised. 3429 3430 For producing deterministic results given a `seed` value, use 3431 `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed` 3432 param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops 3433 guarantee the same results given the same seed independent of how many times 3434 the function is called, and independent of global seed settings 3435 (e.g. tf.random.set_seed). 3436 3437 Args: 3438 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3439 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3440 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3441 describing the N bounding boxes associated with the image. 3442 seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the 3443 random number generator is seeded by the given `seed`. Otherwise, it is 3444 seeded by a random seed. 3445 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3446 cropped area of the image must contain at least this fraction of any 3447 bounding box supplied. The value of this parameter should be non-negative. 3448 In the case of 0, the cropped area does not need to overlap any of the 3449 bounding boxes supplied. 3450 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3451 1.33]`. The cropped area of the image must have an aspect `ratio = width / 3452 height` within this range. 3453 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3454 cropped area of the image must contain a fraction of the supplied image 3455 within this range. 3456 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3457 generating a cropped region of the image of the specified constraints. 3458 After `max_attempts` failures, return the entire image. 3459 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3460 Controls behavior if no bounding boxes supplied. If true, assume an 3461 implicit bounding box covering the whole input. If false, raise an error. 3462 name: A name for the operation (optional). 3463 3464 Returns: 3465 A tuple of `Tensor` objects (begin, size, bboxes). 3466 3467 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3468 `[offset_height, offset_width, 0]`. Provide as input to 3469 `tf.slice`. 3470 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3471 `[target_height, target_width, -1]`. Provide as input to 3472 `tf.slice`. 3473 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3474 the distorted bounding box. 3475 Provide as input to `tf.image.draw_bounding_boxes`. 3476 3477 Raises: 3478 ValueError: If no seed is specified and op determinism is enabled. 3479 """ 3480 if seed: 3481 seed1, seed2 = random_seed.get_seed(seed) 3482 else: 3483 if config.is_op_determinism_enabled(): 3484 raise ValueError( 3485 f'tf.image.sample_distorted_bounding_box requires a non-zero seed to ' 3486 f'be passed in when determinism is enabled, but got seed={seed}. ' 3487 f'Please pass in a non-zero seed, e.g. by passing "seed=1".') 3488 seed1, seed2 = (0, 0) 3489 with ops.name_scope(name, 'sample_distorted_bounding_box'): 3490 return gen_image_ops.sample_distorted_bounding_box_v2( 3491 image_size, 3492 bounding_boxes, 3493 seed=seed1, 3494 seed2=seed2, 3495 min_object_covered=min_object_covered, 3496 aspect_ratio_range=aspect_ratio_range, 3497 area_range=area_range, 3498 max_attempts=max_attempts, 3499 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3500 name=name) 3501 3502 3503@tf_export('image.stateless_sample_distorted_bounding_box', v1=[]) 3504@dispatch.add_dispatch_support 3505def stateless_sample_distorted_bounding_box(image_size, 3506 bounding_boxes, 3507 seed, 3508 min_object_covered=0.1, 3509 aspect_ratio_range=None, 3510 area_range=None, 3511 max_attempts=None, 3512 use_image_if_no_bounding_boxes=None, 3513 name=None): 3514 """Generate a randomly distorted bounding box for an image deterministically. 3515 3516 Bounding box annotations are often supplied in addition to ground-truth labels 3517 in image recognition or object localization tasks. A common technique for 3518 training such a system is to randomly distort an image while preserving 3519 its content, i.e. *data augmentation*. This Op, given the same `seed`, 3520 deterministically outputs a randomly distorted localization of an object, i.e. 3521 bounding box, given an `image_size`, `bounding_boxes` and a series of 3522 constraints. 3523 3524 The output of this Op is a single bounding box that may be used to crop the 3525 original image. The output is returned as 3 tensors: `begin`, `size` and 3526 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3527 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3528 visualize what the bounding box looks like. 3529 3530 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3531 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 3532 and the height of the underlying image. 3533 3534 The output of this Op is guaranteed to be the same given the same `seed` and 3535 is independent of how many times the function is called, and independent of 3536 global seed settings (e.g. `tf.random.set_seed`). 3537 3538 Example usage: 3539 3540 >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) 3541 >>> bbox = tf.constant( 3542 ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 3543 >>> seed = (1, 2) 3544 >>> # Generate a single distorted bounding box. 3545 >>> bbox_begin, bbox_size, bbox_draw = ( 3546 ... tf.image.stateless_sample_distorted_bounding_box( 3547 ... tf.shape(image), bounding_boxes=bbox, seed=seed)) 3548 >>> # Employ the bounding box to distort the image. 3549 >>> tf.slice(image, bbox_begin, bbox_size) 3550 <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy= 3551 array([[[1], 3552 [2]], 3553 [[4], 3554 [5]]])> 3555 >>> # Draw the bounding box in an image summary. 3556 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 3557 >>> tf.image.draw_bounding_boxes( 3558 ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) 3559 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy= 3560 array([[[[1.], 3561 [1.], 3562 [3.]], 3563 [[1.], 3564 [1.], 3565 [6.]], 3566 [[7.], 3567 [8.], 3568 [9.]]]], dtype=float32)> 3569 3570 Note that if no bounding box information is available, setting 3571 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 3572 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3573 false and no bounding boxes are supplied, an error is raised. 3574 3575 Args: 3576 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3577 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3578 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3579 describing the N bounding boxes associated with the image. 3580 seed: A shape [2] Tensor, the seed to the random number generator. Must have 3581 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 3582 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3583 cropped area of the image must contain at least this fraction of any 3584 bounding box supplied. The value of this parameter should be non-negative. 3585 In the case of 0, the cropped area does not need to overlap any of the 3586 bounding boxes supplied. 3587 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3588 1.33]`. The cropped area of the image must have an aspect `ratio = width / 3589 height` within this range. 3590 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3591 cropped area of the image must contain a fraction of the supplied image 3592 within this range. 3593 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3594 generating a cropped region of the image of the specified constraints. 3595 After `max_attempts` failures, return the entire image. 3596 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3597 Controls behavior if no bounding boxes supplied. If true, assume an 3598 implicit bounding box covering the whole input. If false, raise an error. 3599 name: A name for the operation (optional). 3600 3601 Returns: 3602 A tuple of `Tensor` objects (begin, size, bboxes). 3603 3604 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3605 `[offset_height, offset_width, 0]`. Provide as input to 3606 `tf.slice`. 3607 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3608 `[target_height, target_width, -1]`. Provide as input to 3609 `tf.slice`. 3610 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3611 the distorted bounding box. 3612 Provide as input to `tf.image.draw_bounding_boxes`. 3613 """ 3614 with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'): 3615 return gen_image_ops.stateless_sample_distorted_bounding_box( 3616 image_size=image_size, 3617 bounding_boxes=bounding_boxes, 3618 seed=seed, 3619 min_object_covered=min_object_covered, 3620 aspect_ratio_range=aspect_ratio_range, 3621 area_range=area_range, 3622 max_attempts=max_attempts, 3623 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3624 name=name) 3625 3626 3627@tf_export(v1=['image.sample_distorted_bounding_box']) 3628@dispatch.add_dispatch_support 3629@deprecation.deprecated( 3630 date=None, 3631 instructions='`seed2` arg is deprecated.' 3632 'Use sample_distorted_bounding_box_v2 instead.') 3633def sample_distorted_bounding_box(image_size, 3634 bounding_boxes, 3635 seed=None, 3636 seed2=None, 3637 min_object_covered=0.1, 3638 aspect_ratio_range=None, 3639 area_range=None, 3640 max_attempts=None, 3641 use_image_if_no_bounding_boxes=None, 3642 name=None): 3643 """Generate a single randomly distorted bounding box for an image. 3644 3645 Bounding box annotations are often supplied in addition to ground-truth labels 3646 in image recognition or object localization tasks. A common technique for 3647 training such a system is to randomly distort an image while preserving 3648 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 3649 localization of an object, i.e. bounding box, given an `image_size`, 3650 `bounding_boxes` and a series of constraints. 3651 3652 The output of this Op is a single bounding box that may be used to crop the 3653 original image. The output is returned as 3 tensors: `begin`, `size` and 3654 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3655 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3656 visualize what the bounding box looks like. 3657 3658 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3659 The 3660 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and 3661 height of the underlying image. 3662 3663 For example, 3664 3665 ```python 3666 # Generate a single distorted bounding box. 3667 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 3668 tf.shape(image), 3669 bounding_boxes=bounding_boxes, 3670 min_object_covered=0.1) 3671 3672 # Draw the bounding box in an image summary. 3673 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 3674 bbox_for_draw) 3675 tf.compat.v1.summary.image('images_with_box', image_with_box) 3676 3677 # Employ the bounding box to distort the image. 3678 distorted_image = tf.slice(image, begin, size) 3679 ``` 3680 3681 Note that if no bounding box information is available, setting 3682 `use_image_if_no_bounding_boxes = True` will assume there is a single implicit 3683 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3684 false and no bounding boxes are supplied, an error is raised. 3685 3686 Args: 3687 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3688 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3689 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3690 describing the N bounding boxes associated with the image. 3691 seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are 3692 set to non-zero, the random number generator is seeded by the given 3693 `seed`. Otherwise, it is seeded by a random seed. 3694 seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed 3695 collision. 3696 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3697 cropped area of the image must contain at least this fraction of any 3698 bounding box supplied. The value of this parameter should be non-negative. 3699 In the case of 0, the cropped area does not need to overlap any of the 3700 bounding boxes supplied. 3701 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3702 1.33]`. The cropped area of the image must have an aspect ratio = width / 3703 height within this range. 3704 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3705 cropped area of the image must contain a fraction of the supplied image 3706 within this range. 3707 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3708 generating a cropped region of the image of the specified constraints. 3709 After `max_attempts` failures, return the entire image. 3710 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3711 Controls behavior if no bounding boxes supplied. If true, assume an 3712 implicit bounding box covering the whole input. If false, raise an error. 3713 name: A name for the operation (optional). 3714 3715 Returns: 3716 A tuple of `Tensor` objects (begin, size, bboxes). 3717 3718 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3719 `[offset_height, offset_width, 0]`. Provide as input to 3720 `tf.slice`. 3721 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3722 `[target_height, target_width, -1]`. Provide as input to 3723 `tf.slice`. 3724 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3725 the distorted bounding box. 3726 Provide as input to `tf.image.draw_bounding_boxes`. 3727 3728 Raises: 3729 ValueError: If no seed is specified and op determinism is enabled. 3730 """ 3731 if not seed and not seed2 and config.is_op_determinism_enabled(): 3732 raise ValueError( 3733 f'tf.compat.v1.image.sample_distorted_bounding_box requires "seed" or ' 3734 f'"seed2" to be non-zero when determinism is enabled. Please pass in ' 3735 f'a non-zero seed, e.g. by passing "seed=1". Got seed={seed} and ' 3736 f"seed2={seed2}") 3737 with ops.name_scope(name, 'sample_distorted_bounding_box'): 3738 return gen_image_ops.sample_distorted_bounding_box_v2( 3739 image_size, 3740 bounding_boxes, 3741 seed=seed, 3742 seed2=seed2, 3743 min_object_covered=min_object_covered, 3744 aspect_ratio_range=aspect_ratio_range, 3745 area_range=area_range, 3746 max_attempts=max_attempts, 3747 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3748 name=name) 3749 3750 3751@tf_export('image.non_max_suppression') 3752@dispatch.add_dispatch_support 3753def non_max_suppression(boxes, 3754 scores, 3755 max_output_size, 3756 iou_threshold=0.5, 3757 score_threshold=float('-inf'), 3758 name=None): 3759 """Greedily selects a subset of bounding boxes in descending order of score. 3760 3761 Prunes away boxes that have high intersection-over-union (IOU) overlap 3762 with previously selected boxes. Bounding boxes are supplied as 3763 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 3764 diagonal pair of box corners and the coordinates can be provided as normalized 3765 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 3766 is agnostic to where the origin is in the coordinate system. Note that this 3767 algorithm is invariant to orthogonal transformations and translations 3768 of the coordinate system; thus translating or reflections of the coordinate 3769 system result in the same boxes being selected by the algorithm. 3770 The output of this operation is a set of integers indexing into the input 3771 collection of bounding boxes representing the selected boxes. The bounding 3772 box coordinates corresponding to the selected indices can then be obtained 3773 using the `tf.gather` operation. For example: 3774 ```python 3775 selected_indices = tf.image.non_max_suppression( 3776 boxes, scores, max_output_size, iou_threshold) 3777 selected_boxes = tf.gather(boxes, selected_indices) 3778 ``` 3779 3780 Args: 3781 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 3782 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3783 score corresponding to each box (each row of boxes). 3784 max_output_size: A scalar integer `Tensor` representing the maximum number 3785 of boxes to be selected by non-max suppression. 3786 iou_threshold: A 0-D float tensor representing the threshold for deciding 3787 whether boxes overlap too much with respect to IOU. 3788 score_threshold: A 0-D float tensor representing the threshold for deciding 3789 when to remove boxes based on score. 3790 name: A name for the operation (optional). 3791 3792 Returns: 3793 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3794 selected indices from the boxes tensor, where `M <= max_output_size`. 3795 """ 3796 with ops.name_scope(name, 'non_max_suppression'): 3797 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 3798 score_threshold = ops.convert_to_tensor( 3799 score_threshold, name='score_threshold') 3800 return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size, 3801 iou_threshold, score_threshold) 3802 3803 3804@tf_export('image.non_max_suppression_with_scores') 3805@dispatch.add_dispatch_support 3806def non_max_suppression_with_scores(boxes, 3807 scores, 3808 max_output_size, 3809 iou_threshold=0.5, 3810 score_threshold=float('-inf'), 3811 soft_nms_sigma=0.0, 3812 name=None): 3813 """Greedily selects a subset of bounding boxes in descending order of score. 3814 3815 Prunes away boxes that have high intersection-over-union (IOU) overlap 3816 with previously selected boxes. Bounding boxes are supplied as 3817 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 3818 diagonal pair of box corners and the coordinates can be provided as normalized 3819 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 3820 is agnostic to where the origin is in the coordinate system. Note that this 3821 algorithm is invariant to orthogonal transformations and translations 3822 of the coordinate system; thus translating or reflections of the coordinate 3823 system result in the same boxes being selected by the algorithm. 3824 The output of this operation is a set of integers indexing into the input 3825 collection of bounding boxes representing the selected boxes. The bounding 3826 box coordinates corresponding to the selected indices can then be obtained 3827 using the `tf.gather` operation. For example: 3828 ```python 3829 selected_indices, selected_scores = tf.image.non_max_suppression_padded( 3830 boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1, 3831 soft_nms_sigma=0.5) 3832 selected_boxes = tf.gather(boxes, selected_indices) 3833 ``` 3834 3835 This function generalizes the `tf.image.non_max_suppression` op by also 3836 supporting a Soft-NMS (with Gaussian weighting) mode (c.f. 3837 Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score 3838 of other overlapping boxes instead of directly causing them to be pruned. 3839 Consequently, in contrast to `tf.image.non_max_suppression`, 3840 `tf.image.non_max_suppression_with_scores` returns the new scores of each 3841 input box in the second output, `selected_scores`. 3842 3843 To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be 3844 larger than 0. When `soft_nms_sigma` equals 0, the behavior of 3845 `tf.image.non_max_suppression_with_scores` is identical to that of 3846 `tf.image.non_max_suppression` (except for the extra output) both in function 3847 and in running time. 3848 3849 Note that when `soft_nms_sigma` > 0, Soft-NMS is performed and `iou_threshold` 3850 is ignored. `iou_threshold` is only used for standard NMS. 3851 3852 Args: 3853 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 3854 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3855 score corresponding to each box (each row of boxes). 3856 max_output_size: A scalar integer `Tensor` representing the maximum number 3857 of boxes to be selected by non-max suppression. 3858 iou_threshold: A 0-D float tensor representing the threshold for deciding 3859 whether boxes overlap too much with respect to IOU. 3860 score_threshold: A 0-D float tensor representing the threshold for deciding 3861 when to remove boxes based on score. 3862 soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft 3863 NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503). When 3864 `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) 3865 NMS. 3866 name: A name for the operation (optional). 3867 3868 Returns: 3869 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3870 selected indices from the boxes tensor, where `M <= max_output_size`. 3871 selected_scores: A 1-D float tensor of shape `[M]` representing the 3872 corresponding scores for each selected box, where `M <= max_output_size`. 3873 Scores only differ from corresponding input scores when using Soft NMS 3874 (i.e. when `soft_nms_sigma>0`) 3875 """ 3876 with ops.name_scope(name, 'non_max_suppression_with_scores'): 3877 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 3878 score_threshold = ops.convert_to_tensor( 3879 score_threshold, name='score_threshold') 3880 soft_nms_sigma = ops.convert_to_tensor( 3881 soft_nms_sigma, name='soft_nms_sigma') 3882 (selected_indices, selected_scores, 3883 _) = gen_image_ops.non_max_suppression_v5( 3884 boxes, 3885 scores, 3886 max_output_size, 3887 iou_threshold, 3888 score_threshold, 3889 soft_nms_sigma, 3890 pad_to_max_output_size=False) 3891 return selected_indices, selected_scores 3892 3893 3894@tf_export('image.non_max_suppression_overlaps') 3895@dispatch.add_dispatch_support 3896def non_max_suppression_with_overlaps(overlaps, 3897 scores, 3898 max_output_size, 3899 overlap_threshold=0.5, 3900 score_threshold=float('-inf'), 3901 name=None): 3902 """Greedily selects a subset of bounding boxes in descending order of score. 3903 3904 Prunes away boxes that have high overlap with previously selected boxes. 3905 N-by-n overlap values are supplied as square matrix. 3906 The output of this operation is a set of integers indexing into the input 3907 collection of bounding boxes representing the selected boxes. The bounding 3908 box coordinates corresponding to the selected indices can then be obtained 3909 using the `tf.gather` operation. For example: 3910 ```python 3911 selected_indices = tf.image.non_max_suppression_overlaps( 3912 overlaps, scores, max_output_size, iou_threshold) 3913 selected_boxes = tf.gather(boxes, selected_indices) 3914 ``` 3915 3916 Args: 3917 overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]` 3918 representing the n-by-n box overlap values. 3919 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3920 score corresponding to each box (each row of boxes). 3921 max_output_size: A scalar integer `Tensor` representing the maximum number 3922 of boxes to be selected by non-max suppression. 3923 overlap_threshold: A 0-D float tensor representing the threshold for 3924 deciding whether boxes overlap too much with respect to the provided 3925 overlap values. 3926 score_threshold: A 0-D float tensor representing the threshold for deciding 3927 when to remove boxes based on score. 3928 name: A name for the operation (optional). 3929 3930 Returns: 3931 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3932 selected indices from the overlaps tensor, where `M <= max_output_size`. 3933 """ 3934 with ops.name_scope(name, 'non_max_suppression_overlaps'): 3935 overlap_threshold = ops.convert_to_tensor( 3936 overlap_threshold, name='overlap_threshold') 3937 # pylint: disable=protected-access 3938 return gen_image_ops.non_max_suppression_with_overlaps( 3939 overlaps, scores, max_output_size, overlap_threshold, score_threshold) 3940 # pylint: enable=protected-access 3941 3942 3943_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115], 3944 [0.587, -0.27455667, -0.52273617], 3945 [0.114, -0.32134392, 0.31119955]] 3946 3947 3948@tf_export('image.rgb_to_yiq') 3949@dispatch.add_dispatch_support 3950def rgb_to_yiq(images): 3951 """Converts one or more images from RGB to YIQ. 3952 3953 Outputs a tensor of the same shape as the `images` tensor, containing the YIQ 3954 value of the pixels. 3955 The output is only well defined if the value in images are in [0,1]. 3956 3957 Usage Example: 3958 3959 >>> x = tf.constant([[[1.0, 2.0, 3.0]]]) 3960 >>> tf.image.rgb_to_yiq(x) 3961 <tf.Tensor: shape=(1, 1, 3), dtype=float32, 3962 numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)> 3963 3964 Args: 3965 images: 2-D or higher rank. Image data to convert. Last dimension must be 3966 size 3. 3967 3968 Returns: 3969 images: tensor with the same shape as `images`. 3970 """ 3971 images = ops.convert_to_tensor(images, name='images') 3972 kernel = ops.convert_to_tensor( 3973 _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel') 3974 ndims = images.get_shape().ndims 3975 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3976 3977 3978_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021], 3979 [0.6208248, -0.64720424, 1.70423049]] 3980 3981 3982@tf_export('image.yiq_to_rgb') 3983@dispatch.add_dispatch_support 3984def yiq_to_rgb(images): 3985 """Converts one or more images from YIQ to RGB. 3986 3987 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 3988 value of the pixels. 3989 The output is only well defined if the Y value in images are in [0,1], 3990 I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226]. 3991 3992 Args: 3993 images: 2-D or higher rank. Image data to convert. Last dimension must be 3994 size 3. 3995 3996 Returns: 3997 images: tensor with the same shape as `images`. 3998 """ 3999 images = ops.convert_to_tensor(images, name='images') 4000 kernel = ops.convert_to_tensor( 4001 _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel') 4002 ndims = images.get_shape().ndims 4003 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 4004 4005 4006_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538], 4007 [0.587, -0.28886916, -0.51496512], 4008 [0.114, 0.43601035, -0.10001026]] 4009 4010 4011@tf_export('image.rgb_to_yuv') 4012@dispatch.add_dispatch_support 4013def rgb_to_yuv(images): 4014 """Converts one or more images from RGB to YUV. 4015 4016 Outputs a tensor of the same shape as the `images` tensor, containing the YUV 4017 value of the pixels. 4018 The output is only well defined if the value in images are in [0, 1]. 4019 There are two ways of representing an image: [0, 255] pixel values range or 4020 [0, 1] (as float) pixel values range. Users need to convert the input image 4021 into a float [0, 1] range. 4022 4023 Args: 4024 images: 2-D or higher rank. Image data to convert. Last dimension must be 4025 size 3. 4026 4027 Returns: 4028 images: tensor with the same shape as `images`. 4029 """ 4030 images = ops.convert_to_tensor(images, name='images') 4031 kernel = ops.convert_to_tensor( 4032 _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel') 4033 ndims = images.get_shape().ndims 4034 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 4035 4036 4037_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185], 4038 [1.13988303, -0.58062185, 0]] 4039 4040 4041@tf_export('image.yuv_to_rgb') 4042@dispatch.add_dispatch_support 4043def yuv_to_rgb(images): 4044 """Converts one or more images from YUV to RGB. 4045 4046 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 4047 value of the pixels. 4048 The output is only well defined if the Y value in images are in [0,1], 4049 U and V value are in [-0.5,0.5]. 4050 4051 As per the above description, you need to scale your YUV images if their 4052 pixel values are not in the required range. Below given example illustrates 4053 preprocessing of each channel of images before feeding them to `yuv_to_rgb`. 4054 4055 ```python 4056 yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255) 4057 last_dimension_axis = len(yuv_images.shape) - 1 4058 yuv_tensor_images = tf.truediv( 4059 tf.subtract( 4060 yuv_images, 4061 tf.reduce_min(yuv_images) 4062 ), 4063 tf.subtract( 4064 tf.reduce_max(yuv_images), 4065 tf.reduce_min(yuv_images) 4066 ) 4067 ) 4068 y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis) 4069 target_uv_min, target_uv_max = -0.5, 0.5 4070 u = u * (target_uv_max - target_uv_min) + target_uv_min 4071 v = v * (target_uv_max - target_uv_min) + target_uv_min 4072 preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis) 4073 rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images) 4074 ``` 4075 4076 Args: 4077 images: 2-D or higher rank. Image data to convert. Last dimension must be 4078 size 3. 4079 4080 Returns: 4081 images: tensor with the same shape as `images`. 4082 """ 4083 images = ops.convert_to_tensor(images, name='images') 4084 kernel = ops.convert_to_tensor( 4085 _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel') 4086 ndims = images.get_shape().ndims 4087 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 4088 4089 4090def _verify_compatible_image_shapes(img1, img2): 4091 """Checks if two image tensors are compatible for applying SSIM or PSNR. 4092 4093 This function checks if two sets of images have ranks at least 3, and if the 4094 last three dimensions match. 4095 4096 Args: 4097 img1: Tensor containing the first image batch. 4098 img2: Tensor containing the second image batch. 4099 4100 Returns: 4101 A tuple containing: the first tensor shape, the second tensor shape, and a 4102 list of control_flow_ops.Assert() ops implementing the checks. 4103 4104 Raises: 4105 ValueError: When static shape check fails. 4106 """ 4107 shape1 = img1.get_shape().with_rank_at_least(3) 4108 shape2 = img2.get_shape().with_rank_at_least(3) 4109 shape1[-3:].assert_is_compatible_with(shape2[-3:]) 4110 4111 if shape1.ndims is not None and shape2.ndims is not None: 4112 for dim1, dim2 in zip( 4113 reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])): 4114 if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): 4115 raise ValueError('Two images are not compatible: %s and %s' % 4116 (shape1, shape2)) 4117 4118 # Now assign shape tensors. 4119 shape1, shape2 = array_ops.shape_n([img1, img2]) 4120 4121 # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. 4122 checks = [] 4123 checks.append( 4124 control_flow_ops.Assert( 4125 math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], 4126 summarize=10)) 4127 checks.append( 4128 control_flow_ops.Assert( 4129 math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])), 4130 [shape1, shape2], 4131 summarize=10)) 4132 return shape1, shape2, checks 4133 4134 4135@tf_export('image.psnr') 4136@dispatch.add_dispatch_support 4137def psnr(a, b, max_val, name=None): 4138 """Returns the Peak Signal-to-Noise Ratio between a and b. 4139 4140 This is intended to be used on signals (or images). Produces a PSNR value for 4141 each image in batch. 4142 4143 The last three dimensions of input are expected to be [height, width, depth]. 4144 4145 Example: 4146 4147 ```python 4148 # Read images from file. 4149 im1 = tf.decode_png('path/to/im1.png') 4150 im2 = tf.decode_png('path/to/im2.png') 4151 # Compute PSNR over tf.uint8 Tensors. 4152 psnr1 = tf.image.psnr(im1, im2, max_val=255) 4153 4154 # Compute PSNR over tf.float32 Tensors. 4155 im1 = tf.image.convert_image_dtype(im1, tf.float32) 4156 im2 = tf.image.convert_image_dtype(im2, tf.float32) 4157 psnr2 = tf.image.psnr(im1, im2, max_val=1.0) 4158 # psnr1 and psnr2 both have type tf.float32 and are almost equal. 4159 ``` 4160 4161 Args: 4162 a: First set of images. 4163 b: Second set of images. 4164 max_val: The dynamic range of the images (i.e., the difference between the 4165 maximum the and minimum allowed values). 4166 name: Namespace to embed the computation in. 4167 4168 Returns: 4169 The scalar PSNR between a and b. The returned tensor has type `tf.float32` 4170 and shape [batch_size, 1]. 4171 """ 4172 with ops.name_scope(name, 'PSNR', [a, b]): 4173 # Need to convert the images to float32. Scale max_val accordingly so that 4174 # PSNR is computed correctly. 4175 max_val = math_ops.cast(max_val, a.dtype) 4176 max_val = convert_image_dtype(max_val, dtypes.float32) 4177 a = convert_image_dtype(a, dtypes.float32) 4178 b = convert_image_dtype(b, dtypes.float32) 4179 mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1]) 4180 psnr_val = math_ops.subtract( 4181 20 * math_ops.log(max_val) / math_ops.log(10.0), 4182 np.float32(10 / np.log(10)) * math_ops.log(mse), 4183 name='psnr') 4184 4185 _, _, checks = _verify_compatible_image_shapes(a, b) 4186 with ops.control_dependencies(checks): 4187 return array_ops.identity(psnr_val) 4188 4189 4190def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03): 4191 r"""Helper function for computing SSIM. 4192 4193 SSIM estimates covariances with weighted sums. The default parameters 4194 use a biased estimate of the covariance: 4195 Suppose `reducer` is a weighted sum, then the mean estimators are 4196 \mu_x = \sum_i w_i x_i, 4197 \mu_y = \sum_i w_i y_i, 4198 where w_i's are the weighted-sum weights, and covariance estimator is 4199 cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 4200 with assumption \sum_i w_i = 1. This covariance estimator is biased, since 4201 E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y). 4202 For SSIM measure with unbiased covariance estimators, pass as `compensation` 4203 argument (1 - \sum_i w_i ^ 2). 4204 4205 Args: 4206 x: First set of images. 4207 y: Second set of images. 4208 reducer: Function that computes 'local' averages from the set of images. For 4209 non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and 4210 for convolutional version, this is usually tf.nn.avg_pool2d or 4211 tf.nn.conv2d with weighted-sum kernel. 4212 max_val: The dynamic range (i.e., the difference between the maximum 4213 possible allowed value and the minimum allowed value). 4214 compensation: Compensation factor. See above. 4215 k1: Default value 0.01 4216 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4217 it would be better if we took the values in the range of 0 < K2 < 0.4). 4218 4219 Returns: 4220 A pair containing the luminance measure, and the contrast-structure measure. 4221 """ 4222 4223 c1 = (k1 * max_val)**2 4224 c2 = (k2 * max_val)**2 4225 4226 # SSIM luminance measure is 4227 # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1). 4228 mean0 = reducer(x) 4229 mean1 = reducer(y) 4230 num0 = mean0 * mean1 * 2.0 4231 den0 = math_ops.square(mean0) + math_ops.square(mean1) 4232 luminance = (num0 + c1) / (den0 + c1) 4233 4234 # SSIM contrast-structure measure is 4235 # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2). 4236 # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then 4237 # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 4238 # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j). 4239 num1 = reducer(x * y) * 2.0 4240 den1 = reducer(math_ops.square(x) + math_ops.square(y)) 4241 c2 *= compensation 4242 cs = (num1 - num0 + c2) / (den1 - den0 + c2) 4243 4244 # SSIM score is the product of the luminance and contrast-structure measures. 4245 return luminance, cs 4246 4247 4248def _fspecial_gauss(size, sigma): 4249 """Function to mimic the 'fspecial' gaussian MATLAB function.""" 4250 size = ops.convert_to_tensor(size, dtypes.int32) 4251 sigma = ops.convert_to_tensor(sigma) 4252 4253 coords = math_ops.cast(math_ops.range(size), sigma.dtype) 4254 coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0 4255 4256 g = math_ops.square(coords) 4257 g *= -0.5 / math_ops.square(sigma) 4258 4259 g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1]) 4260 g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax(). 4261 g = nn_ops.softmax(g) 4262 return array_ops.reshape(g, shape=[size, size, 1, 1]) 4263 4264 4265def _ssim_per_channel(img1, 4266 img2, 4267 max_val=1.0, 4268 filter_size=11, 4269 filter_sigma=1.5, 4270 k1=0.01, 4271 k2=0.03, 4272 return_index_map=False): 4273 """Computes SSIM index between img1 and img2 per color channel. 4274 4275 This function matches the standard SSIM implementation from: 4276 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 4277 quality assessment: from error visibility to structural similarity. IEEE 4278 transactions on image processing. 4279 4280 Details: 4281 - 11x11 Gaussian filter of width 1.5 is used. 4282 - k1 = 0.01, k2 = 0.03 as in the original paper. 4283 4284 Args: 4285 img1: First image batch. 4286 img2: Second image batch. 4287 max_val: The dynamic range of the images (i.e., the difference between the 4288 maximum the and minimum allowed values). 4289 filter_size: Default value 11 (size of gaussian filter). 4290 filter_sigma: Default value 1.5 (width of gaussian filter). 4291 k1: Default value 0.01 4292 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4293 it would be better if we took the values in the range of 0 < K2 < 0.4). 4294 return_index_map: If True returns local SSIM map instead of the global mean. 4295 4296 Returns: 4297 A pair of tensors containing and channel-wise SSIM and contrast-structure 4298 values. The shape is [..., channels]. 4299 """ 4300 filter_size = constant_op.constant(filter_size, dtype=dtypes.int32) 4301 filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype) 4302 4303 shape1, shape2 = array_ops.shape_n([img1, img2]) 4304 checks = [ 4305 control_flow_ops.Assert( 4306 math_ops.reduce_all( 4307 math_ops.greater_equal(shape1[-3:-1], filter_size)), 4308 [shape1, filter_size], 4309 summarize=8), 4310 control_flow_ops.Assert( 4311 math_ops.reduce_all( 4312 math_ops.greater_equal(shape2[-3:-1], filter_size)), 4313 [shape2, filter_size], 4314 summarize=8) 4315 ] 4316 4317 # Enforce the check to run before computation. 4318 with ops.control_dependencies(checks): 4319 img1 = array_ops.identity(img1) 4320 4321 # TODO(sjhwang): Try to cache kernels and compensation factor. 4322 kernel = _fspecial_gauss(filter_size, filter_sigma) 4323 kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) 4324 4325 # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, 4326 # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. 4327 compensation = 1.0 4328 4329 # TODO(sjhwang): Try FFT. 4330 # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying 4331 # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter. 4332 def reducer(x): 4333 shape = array_ops.shape(x) 4334 x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) 4335 y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') 4336 return array_ops.reshape( 4337 y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) 4338 4339 luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1, 4340 k2) 4341 4342 # Average over the second and the third from the last: height, width. 4343 if return_index_map: 4344 ssim_val = luminance * cs 4345 else: 4346 axes = constant_op.constant([-3, -2], dtype=dtypes.int32) 4347 ssim_val = math_ops.reduce_mean(luminance * cs, axes) 4348 cs = math_ops.reduce_mean(cs, axes) 4349 return ssim_val, cs 4350 4351 4352@tf_export('image.ssim') 4353@dispatch.add_dispatch_support 4354def ssim(img1, 4355 img2, 4356 max_val, 4357 filter_size=11, 4358 filter_sigma=1.5, 4359 k1=0.01, 4360 k2=0.03, 4361 return_index_map=False): 4362 """Computes SSIM index between img1 and img2. 4363 4364 This function is based on the standard SSIM implementation from: 4365 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 4366 quality assessment: from error visibility to structural similarity. IEEE 4367 transactions on image processing. 4368 4369 Note: The true SSIM is only defined on grayscale. This function does not 4370 perform any colorspace transform. (If the input is already YUV, then it will 4371 compute YUV SSIM average.) 4372 4373 Details: 4374 - 11x11 Gaussian filter of width 1.5 is used. 4375 - k1 = 0.01, k2 = 0.03 as in the original paper. 4376 4377 The image sizes must be at least 11x11 because of the filter size. 4378 4379 Example: 4380 4381 ```python 4382 # Read images (of size 255 x 255) from file. 4383 im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png')) 4384 im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png')) 4385 tf.shape(im1) # `img1.png` has 3 channels; shape is `(255, 255, 3)` 4386 tf.shape(im2) # `img2.png` has 3 channels; shape is `(255, 255, 3)` 4387 # Add an outer batch for each image. 4388 im1 = tf.expand_dims(im1, axis=0) 4389 im2 = tf.expand_dims(im2, axis=0) 4390 # Compute SSIM over tf.uint8 Tensors. 4391 ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11, 4392 filter_sigma=1.5, k1=0.01, k2=0.03) 4393 4394 # Compute SSIM over tf.float32 Tensors. 4395 im1 = tf.image.convert_image_dtype(im1, tf.float32) 4396 im2 = tf.image.convert_image_dtype(im2, tf.float32) 4397 ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11, 4398 filter_sigma=1.5, k1=0.01, k2=0.03) 4399 # ssim1 and ssim2 both have type tf.float32 and are almost equal. 4400 ``` 4401 4402 Args: 4403 img1: First image batch. 4-D Tensor of shape `[batch, height, width, 4404 channels]` with only Positive Pixel Values. 4405 img2: Second image batch. 4-D Tensor of shape `[batch, height, width, 4406 channels]` with only Positive Pixel Values. 4407 max_val: The dynamic range of the images (i.e., the difference between the 4408 maximum the and minimum allowed values). 4409 filter_size: Default value 11 (size of gaussian filter). 4410 filter_sigma: Default value 1.5 (width of gaussian filter). 4411 k1: Default value 0.01 4412 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4413 it would be better if we took the values in the range of 0 < K2 < 0.4). 4414 return_index_map: If True returns local SSIM map instead of the global mean. 4415 4416 Returns: 4417 A tensor containing an SSIM value for each image in batch or a tensor 4418 containing an SSIM value for each pixel for each image in batch if 4419 return_index_map is True. Returned SSIM values are in range (-1, 1], when 4420 pixel values are non-negative. Returns a tensor with shape: 4421 broadcast(img1.shape[:-3], img2.shape[:-3]) or broadcast(img1.shape[:-1], 4422 img2.shape[:-1]). 4423 """ 4424 with ops.name_scope(None, 'SSIM', [img1, img2]): 4425 # Convert to tensor if needed. 4426 img1 = ops.convert_to_tensor(img1, name='img1') 4427 img2 = ops.convert_to_tensor(img2, name='img2') 4428 # Shape checking. 4429 _, _, checks = _verify_compatible_image_shapes(img1, img2) 4430 with ops.control_dependencies(checks): 4431 img1 = array_ops.identity(img1) 4432 4433 # Need to convert the images to float32. Scale max_val accordingly so that 4434 # SSIM is computed correctly. 4435 max_val = math_ops.cast(max_val, img1.dtype) 4436 max_val = convert_image_dtype(max_val, dtypes.float32) 4437 img1 = convert_image_dtype(img1, dtypes.float32) 4438 img2 = convert_image_dtype(img2, dtypes.float32) 4439 ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size, 4440 filter_sigma, k1, k2, 4441 return_index_map) 4442 # Compute average over color channels. 4443 return math_ops.reduce_mean(ssim_per_channel, [-1]) 4444 4445 4446# Default values obtained by Wang et al. 4447_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333) 4448 4449 4450@tf_export('image.ssim_multiscale') 4451@dispatch.add_dispatch_support 4452def ssim_multiscale(img1, 4453 img2, 4454 max_val, 4455 power_factors=_MSSSIM_WEIGHTS, 4456 filter_size=11, 4457 filter_sigma=1.5, 4458 k1=0.01, 4459 k2=0.03): 4460 """Computes the MS-SSIM between img1 and img2. 4461 4462 This function assumes that `img1` and `img2` are image batches, i.e. the last 4463 three dimensions are [height, width, channels]. 4464 4465 Note: The true SSIM is only defined on grayscale. This function does not 4466 perform any colorspace transform. (If the input is already YUV, then it will 4467 compute YUV SSIM average.) 4468 4469 Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale 4470 structural similarity for image quality assessment." Signals, Systems and 4471 Computers, 2004. 4472 4473 Args: 4474 img1: First image batch with only Positive Pixel Values. 4475 img2: Second image batch with only Positive Pixel Values. Must have the 4476 same rank as img1. 4477 max_val: The dynamic range of the images (i.e., the difference between the 4478 maximum the and minimum allowed values). 4479 power_factors: Iterable of weights for each of the scales. The number of 4480 scales used is the length of the list. Index 0 is the unscaled 4481 resolution's weight and each increasing scale corresponds to the image 4482 being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363, 4483 0.1333), which are the values obtained in the original paper. 4484 filter_size: Default value 11 (size of gaussian filter). 4485 filter_sigma: Default value 1.5 (width of gaussian filter). 4486 k1: Default value 0.01 4487 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4488 it would be better if we took the values in the range of 0 < K2 < 0.4). 4489 4490 Returns: 4491 A tensor containing an MS-SSIM value for each image in batch. The values 4492 are in range [0, 1]. Returns a tensor with shape: 4493 broadcast(img1.shape[:-3], img2.shape[:-3]). 4494 """ 4495 with ops.name_scope(None, 'MS-SSIM', [img1, img2]): 4496 # Convert to tensor if needed. 4497 img1 = ops.convert_to_tensor(img1, name='img1') 4498 img2 = ops.convert_to_tensor(img2, name='img2') 4499 # Shape checking. 4500 shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2) 4501 with ops.control_dependencies(checks): 4502 img1 = array_ops.identity(img1) 4503 4504 # Need to convert the images to float32. Scale max_val accordingly so that 4505 # SSIM is computed correctly. 4506 max_val = math_ops.cast(max_val, img1.dtype) 4507 max_val = convert_image_dtype(max_val, dtypes.float32) 4508 img1 = convert_image_dtype(img1, dtypes.float32) 4509 img2 = convert_image_dtype(img2, dtypes.float32) 4510 4511 imgs = [img1, img2] 4512 shapes = [shape1, shape2] 4513 4514 # img1 and img2 are assumed to be a (multi-dimensional) batch of 4515 # 3-dimensional images (height, width, channels). `heads` contain the batch 4516 # dimensions, and `tails` contain the image dimensions. 4517 heads = [s[:-3] for s in shapes] 4518 tails = [s[-3:] for s in shapes] 4519 4520 divisor = [1, 2, 2, 1] 4521 divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32) 4522 4523 def do_pad(images, remainder): 4524 padding = array_ops.expand_dims(remainder, -1) 4525 padding = array_ops.pad(padding, [[1, 0], [1, 0]]) 4526 return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images] 4527 4528 mcs = [] 4529 for k in range(len(power_factors)): 4530 with ops.name_scope(None, 'Scale%d' % k, imgs): 4531 if k > 0: 4532 # Avg pool takes rank 4 tensors. Flatten leading dimensions. 4533 flat_imgs = [ 4534 array_ops.reshape(x, array_ops.concat([[-1], t], 0)) 4535 for x, t in zip(imgs, tails) 4536 ] 4537 4538 remainder = tails[0] % divisor_tensor 4539 need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0)) 4540 # pylint: disable=cell-var-from-loop 4541 padded = control_flow_ops.cond(need_padding, 4542 lambda: do_pad(flat_imgs, remainder), 4543 lambda: flat_imgs) 4544 # pylint: enable=cell-var-from-loop 4545 4546 downscaled = [ 4547 nn_ops.avg_pool( 4548 x, ksize=divisor, strides=divisor, padding='VALID') 4549 for x in padded 4550 ] 4551 tails = [x[1:] for x in array_ops.shape_n(downscaled)] 4552 imgs = [ 4553 array_ops.reshape(x, array_ops.concat([h, t], 0)) 4554 for x, h, t in zip(downscaled, heads, tails) 4555 ] 4556 4557 # Overwrite previous ssim value since we only need the last one. 4558 ssim_per_channel, cs = _ssim_per_channel( 4559 *imgs, 4560 max_val=max_val, 4561 filter_size=filter_size, 4562 filter_sigma=filter_sigma, 4563 k1=k1, 4564 k2=k2) 4565 mcs.append(nn_ops.relu(cs)) 4566 4567 # Remove the cs score for the last scale. In the MS-SSIM calculation, 4568 # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p). 4569 mcs.pop() # Remove the cs score for the last scale. 4570 mcs_and_ssim = array_ops.stack( 4571 mcs + [nn_ops.relu(ssim_per_channel)], axis=-1) 4572 # Take weighted geometric mean across the scale axis. 4573 ms_ssim = math_ops.reduce_prod( 4574 math_ops.pow(mcs_and_ssim, power_factors), [-1]) 4575 4576 return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels. 4577 4578 4579@tf_export('image.image_gradients') 4580@dispatch.add_dispatch_support 4581def image_gradients(image): 4582 """Returns image gradients (dy, dx) for each color channel. 4583 4584 Both output tensors have the same shape as the input: [batch_size, h, w, 4585 d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in 4586 location (x, y). That means that dy will always have zeros in the last row, 4587 and dx will always have zeros in the last column. 4588 4589 Usage Example: 4590 ```python 4591 BATCH_SIZE = 1 4592 IMAGE_HEIGHT = 5 4593 IMAGE_WIDTH = 5 4594 CHANNELS = 1 4595 image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS, 4596 delta=1, dtype=tf.float32), 4597 shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) 4598 dy, dx = tf.image.image_gradients(image) 4599 print(image[0, :,:,0]) 4600 tf.Tensor( 4601 [[ 0. 1. 2. 3. 4.] 4602 [ 5. 6. 7. 8. 9.] 4603 [10. 11. 12. 13. 14.] 4604 [15. 16. 17. 18. 19.] 4605 [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32) 4606 print(dy[0, :,:,0]) 4607 tf.Tensor( 4608 [[5. 5. 5. 5. 5.] 4609 [5. 5. 5. 5. 5.] 4610 [5. 5. 5. 5. 5.] 4611 [5. 5. 5. 5. 5.] 4612 [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32) 4613 print(dx[0, :,:,0]) 4614 tf.Tensor( 4615 [[1. 1. 1. 1. 0.] 4616 [1. 1. 1. 1. 0.] 4617 [1. 1. 1. 1. 0.] 4618 [1. 1. 1. 1. 0.] 4619 [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32) 4620 ``` 4621 4622 Args: 4623 image: Tensor with shape [batch_size, h, w, d]. 4624 4625 Returns: 4626 Pair of tensors (dy, dx) holding the vertical and horizontal image 4627 gradients (1-step finite difference). 4628 4629 Raises: 4630 ValueError: If `image` is not a 4D tensor. 4631 """ 4632 if image.get_shape().ndims != 4: 4633 raise ValueError('image_gradients expects a 4D tensor ' 4634 '[batch_size, h, w, d], not {}.'.format(image.get_shape())) 4635 image_shape = array_ops.shape(image) 4636 batch_size, height, width, depth = array_ops.unstack(image_shape) 4637 dy = image[:, 1:, :, :] - image[:, :-1, :, :] 4638 dx = image[:, :, 1:, :] - image[:, :, :-1, :] 4639 4640 # Return tensors with same size as original image by concatenating 4641 # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y). 4642 shape = array_ops.stack([batch_size, 1, width, depth]) 4643 dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1) 4644 dy = array_ops.reshape(dy, image_shape) 4645 4646 shape = array_ops.stack([batch_size, height, 1, depth]) 4647 dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2) 4648 dx = array_ops.reshape(dx, image_shape) 4649 4650 return dy, dx 4651 4652 4653@tf_export('image.sobel_edges') 4654@dispatch.add_dispatch_support 4655def sobel_edges(image): 4656 """Returns a tensor holding Sobel edge maps. 4657 4658 Example usage: 4659 4660 For general usage, `image` would be loaded from a file as below: 4661 4662 ```python 4663 image_bytes = tf.io.read_file(path_to_image_file) 4664 image = tf.image.decode_image(image_bytes) 4665 image = tf.cast(image, tf.float32) 4666 image = tf.expand_dims(image, 0) 4667 ``` 4668 But for demo purposes, we are using randomly generated values for `image`: 4669 4670 >>> image = tf.random.uniform( 4671 ... maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32) 4672 >>> sobel = tf.image.sobel_edges(image) 4673 >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction 4674 >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction 4675 4676 For displaying the sobel results, PIL's [Image Module]( 4677 https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used: 4678 4679 ```python 4680 # Display edge maps for the first channel (at index 0) 4681 Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show() 4682 Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show() 4683 ``` 4684 4685 Args: 4686 image: Image tensor with shape [batch_size, h, w, d] and type float32 or 4687 float64. The image(s) must be 2x2 or larger. 4688 4689 Returns: 4690 Tensor holding edge maps for each channel. Returns a tensor with shape 4691 [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]], 4692 [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter. 4693 """ 4694 # Define vertical and horizontal Sobel filters. 4695 static_image_shape = image.get_shape() 4696 image_shape = array_ops.shape(image) 4697 kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]], 4698 [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]] 4699 num_kernels = len(kernels) 4700 kernels = np.transpose(np.asarray(kernels), (1, 2, 0)) 4701 kernels = np.expand_dims(kernels, -2) 4702 kernels_tf = constant_op.constant(kernels, dtype=image.dtype) 4703 4704 kernels_tf = array_ops.tile( 4705 kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters') 4706 4707 # Use depth-wise convolution to calculate edge maps per channel. 4708 pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]] 4709 padded = array_ops.pad(image, pad_sizes, mode='REFLECT') 4710 4711 # Output tensor has shape [batch_size, h, w, d * num_kernels]. 4712 strides = [1, 1, 1, 1] 4713 output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID') 4714 4715 # Reshape to [batch_size, h, w, d, num_kernels]. 4716 shape = array_ops.concat([image_shape, [num_kernels]], 0) 4717 output = array_ops.reshape(output, shape=shape) 4718 output.set_shape(static_image_shape.concatenate([num_kernels])) 4719 return output 4720 4721 4722def resize_bicubic(images, 4723 size, 4724 align_corners=False, 4725 name=None, 4726 half_pixel_centers=False): 4727 return gen_image_ops.resize_bicubic( 4728 images=images, 4729 size=size, 4730 align_corners=align_corners, 4731 half_pixel_centers=half_pixel_centers, 4732 name=name) 4733 4734 4735def resize_bilinear(images, 4736 size, 4737 align_corners=False, 4738 name=None, 4739 half_pixel_centers=False): 4740 return gen_image_ops.resize_bilinear( 4741 images=images, 4742 size=size, 4743 align_corners=align_corners, 4744 half_pixel_centers=half_pixel_centers, 4745 name=name) 4746 4747 4748def resize_nearest_neighbor(images, 4749 size, 4750 align_corners=False, 4751 name=None, 4752 half_pixel_centers=False): 4753 return gen_image_ops.resize_nearest_neighbor( 4754 images=images, 4755 size=size, 4756 align_corners=align_corners, 4757 half_pixel_centers=half_pixel_centers, 4758 name=name) 4759 4760 4761resize_area_deprecation = deprecation.deprecated( 4762 date=None, 4763 instructions=( 4764 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.')) 4765tf_export(v1=['image.resize_area'])( 4766 resize_area_deprecation( 4767 dispatch.add_dispatch_support(gen_image_ops.resize_area))) 4768 4769resize_bicubic_deprecation = deprecation.deprecated( 4770 date=None, 4771 instructions=( 4772 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.')) 4773tf_export(v1=['image.resize_bicubic'])( 4774 dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic))) 4775 4776resize_bilinear_deprecation = deprecation.deprecated( 4777 date=None, 4778 instructions=( 4779 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.')) 4780tf_export(v1=['image.resize_bilinear'])( 4781 dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear))) 4782 4783resize_nearest_neighbor_deprecation = deprecation.deprecated( 4784 date=None, 4785 instructions=( 4786 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` ' 4787 'instead.')) 4788tf_export(v1=['image.resize_nearest_neighbor'])( 4789 dispatch.add_dispatch_support( 4790 resize_nearest_neighbor_deprecation(resize_nearest_neighbor))) 4791 4792 4793@tf_export('image.crop_and_resize', v1=[]) 4794@dispatch.add_dispatch_support 4795def crop_and_resize_v2(image, 4796 boxes, 4797 box_indices, 4798 crop_size, 4799 method='bilinear', 4800 extrapolation_value=.0, 4801 name=None): 4802 """Extracts crops from the input image tensor and resizes them. 4803 4804 Extracts crops from the input image tensor and resizes them using bilinear 4805 sampling or nearest neighbor sampling (possibly with aspect ratio change) to a 4806 common output size specified by `crop_size`. This is more general than the 4807 `crop_to_bounding_box` op which extracts a fixed size slice from the input 4808 image and does not allow resizing or aspect ratio change. 4809 4810 Returns a tensor with `crops` from the input `image` at positions defined at 4811 the bounding box locations in `boxes`. The cropped boxes are all resized (with 4812 bilinear or nearest neighbor interpolation) to a fixed 4813 `size = [crop_height, crop_width]`. The result is a 4-D tensor 4814 `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. 4815 In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical 4816 results to using `tf.compat.v1.image.resize_bilinear()` or 4817 `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method` 4818 argument) with 4819 `align_corners=True`. 4820 4821 Args: 4822 image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. 4823 Both `image_height` and `image_width` need to be positive. 4824 boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor 4825 specifies the coordinates of a box in the `box_ind[i]` image and is 4826 specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized 4827 coordinate value of `y` is mapped to the image coordinate at `y * 4828 (image_height - 1)`, so as the `[0, 1]` interval of normalized image 4829 height is mapped to `[0, image_height - 1]` in image height coordinates. 4830 We do allow `y1` > `y2`, in which case the sampled crop is an up-down 4831 flipped version of the original image. The width dimension is treated 4832 similarly. Normalized coordinates outside the `[0, 1]` range are allowed, 4833 in which case we use `extrapolation_value` to extrapolate the input image 4834 values. 4835 box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, 4836 batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box 4837 refers to. 4838 crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. 4839 All cropped image patches are resized to this size. The aspect ratio of 4840 the image content is not preserved. Both `crop_height` and `crop_width` 4841 need to be positive. 4842 method: An optional string specifying the sampling method for resizing. It 4843 can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`. 4844 Currently two sampling methods are supported: Bilinear and Nearest 4845 Neighbor. 4846 extrapolation_value: An optional `float`. Defaults to `0.0`. Value used for 4847 extrapolation, when applicable. 4848 name: A name for the operation (optional). 4849 4850 Returns: 4851 A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. 4852 4853 Example: 4854 4855 ```python 4856 import tensorflow as tf 4857 BATCH_SIZE = 1 4858 NUM_BOXES = 5 4859 IMAGE_HEIGHT = 256 4860 IMAGE_WIDTH = 256 4861 CHANNELS = 3 4862 CROP_SIZE = (24, 24) 4863 4864 image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 4865 CHANNELS) ) 4866 boxes = tf.random.uniform(shape=(NUM_BOXES, 4)) 4867 box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, 4868 maxval=BATCH_SIZE, dtype=tf.int32) 4869 output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE) 4870 output.shape #=> (5, 24, 24, 3) 4871 ``` 4872 """ 4873 return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size, 4874 method, extrapolation_value, name) 4875 4876 4877@tf_export(v1=['image.crop_and_resize']) 4878@dispatch.add_dispatch_support 4879@deprecation.deprecated_args(None, 4880 'box_ind is deprecated, use box_indices instead', 4881 'box_ind') 4882def crop_and_resize_v1( # pylint: disable=missing-docstring 4883 image, 4884 boxes, 4885 box_ind=None, 4886 crop_size=None, 4887 method='bilinear', 4888 extrapolation_value=0, 4889 name=None, 4890 box_indices=None): 4891 box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices, 4892 'box_ind', box_ind) 4893 return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, 4894 extrapolation_value, name) 4895 4896 4897crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__ 4898 4899 4900@tf_export(v1=['image.extract_glimpse']) 4901@dispatch.add_dispatch_support 4902def extract_glimpse( 4903 input, # pylint: disable=redefined-builtin 4904 size, 4905 offsets, 4906 centered=True, 4907 normalized=True, 4908 uniform_noise=True, 4909 name=None): 4910 """Extracts a glimpse from the input tensor. 4911 4912 Returns a set of windows called glimpses extracted at location 4913 `offsets` from the input tensor. If the windows only partially 4914 overlaps the inputs, the non-overlapping areas will be filled with 4915 random noise. 4916 4917 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 4918 glimpse_width, channels]`. The channels and batch dimensions are the 4919 same as that of the input tensor. The height and width of the output 4920 windows are specified in the `size` parameter. 4921 4922 The argument `normalized` and `centered` controls how the windows are built: 4923 4924 * If the coordinates are normalized but not centered, 0.0 and 1.0 4925 correspond to the minimum and maximum of each height and width 4926 dimension. 4927 * If the coordinates are both normalized and centered, they range from 4928 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 4929 left corner, the lower right corner is located at (1.0, 1.0) and the 4930 center is at (0, 0). 4931 * If the coordinates are not normalized they are interpreted as 4932 numbers of pixels. 4933 4934 Usage Example: 4935 4936 >>> x = [[[[0.0], 4937 ... [1.0], 4938 ... [2.0]], 4939 ... [[3.0], 4940 ... [4.0], 4941 ... [5.0]], 4942 ... [[6.0], 4943 ... [7.0], 4944 ... [8.0]]]] 4945 >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 4946 ... centered=False, normalized=False) 4947 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 4948 array([[[[0.], 4949 [1.]], 4950 [[3.], 4951 [4.]]]], dtype=float32)> 4952 4953 Args: 4954 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 4955 `[batch_size, height, width, channels]`. 4956 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 4957 size of the glimpses to extract. The glimpse height must be specified 4958 first, following by the glimpse width. 4959 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 4960 `[batch_size, 2]` containing the y, x locations of the center of each 4961 window. 4962 centered: An optional `bool`. Defaults to `True`. indicates if the offset 4963 coordinates are centered relative to the image, in which case the (0, 0) 4964 offset is relative to the center of the input images. If false, the (0,0) 4965 offset corresponds to the upper left corner of the input images. 4966 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 4967 coordinates are normalized. 4968 uniform_noise: An optional `bool`. Defaults to `True`. indicates if the 4969 noise should be generated using a uniform distribution or a Gaussian 4970 distribution. 4971 name: A name for the operation (optional). 4972 4973 Returns: 4974 A `Tensor` of type `float32`. 4975 """ 4976 return gen_image_ops.extract_glimpse( 4977 input=input, 4978 size=size, 4979 offsets=offsets, 4980 centered=centered, 4981 normalized=normalized, 4982 uniform_noise=uniform_noise, 4983 name=name) 4984 4985 4986@tf_export('image.extract_glimpse', v1=[]) 4987@dispatch.add_dispatch_support 4988def extract_glimpse_v2( 4989 input, # pylint: disable=redefined-builtin 4990 size, 4991 offsets, 4992 centered=True, 4993 normalized=True, 4994 noise='uniform', 4995 name=None): 4996 """Extracts a glimpse from the input tensor. 4997 4998 Returns a set of windows called glimpses extracted at location 4999 `offsets` from the input tensor. If the windows only partially 5000 overlaps the inputs, the non-overlapping areas will be filled with 5001 random noise. 5002 5003 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 5004 glimpse_width, channels]`. The channels and batch dimensions are the 5005 same as that of the input tensor. The height and width of the output 5006 windows are specified in the `size` parameter. 5007 5008 The argument `normalized` and `centered` controls how the windows are built: 5009 5010 * If the coordinates are normalized but not centered, 0.0 and 1.0 5011 correspond to the minimum and maximum of each height and width 5012 dimension. 5013 * If the coordinates are both normalized and centered, they range from 5014 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 5015 left corner, the lower right corner is located at (1.0, 1.0) and the 5016 center is at (0, 0). 5017 * If the coordinates are not normalized they are interpreted as 5018 numbers of pixels. 5019 5020 Usage Example: 5021 5022 >>> x = [[[[0.0], 5023 ... [1.0], 5024 ... [2.0]], 5025 ... [[3.0], 5026 ... [4.0], 5027 ... [5.0]], 5028 ... [[6.0], 5029 ... [7.0], 5030 ... [8.0]]]] 5031 >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 5032 ... centered=False, normalized=False) 5033 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 5034 array([[[[4.], 5035 [5.]], 5036 [[7.], 5037 [8.]]]], dtype=float32)> 5038 5039 Args: 5040 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 5041 `[batch_size, height, width, channels]`. 5042 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 5043 size of the glimpses to extract. The glimpse height must be specified 5044 first, following by the glimpse width. 5045 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 5046 `[batch_size, 2]` containing the y, x locations of the center of each 5047 window. 5048 centered: An optional `bool`. Defaults to `True`. indicates if the offset 5049 coordinates are centered relative to the image, in which case the (0, 0) 5050 offset is relative to the center of the input images. If false, the (0,0) 5051 offset corresponds to the upper left corner of the input images. 5052 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 5053 coordinates are normalized. 5054 noise: An optional `string`. Defaults to `uniform`. indicates if the noise 5055 should be `uniform` (uniform distribution), `gaussian` (gaussian 5056 distribution), or `zero` (zero padding). 5057 name: A name for the operation (optional). 5058 5059 Returns: 5060 A `Tensor` of type `float32`. 5061 """ 5062 return gen_image_ops.extract_glimpse_v2( 5063 input=input, 5064 size=size, 5065 offsets=offsets, 5066 centered=centered, 5067 normalized=normalized, 5068 noise=noise, 5069 uniform_noise=False, 5070 name=name) 5071 5072 5073@tf_export('image.combined_non_max_suppression') 5074@dispatch.add_dispatch_support 5075def combined_non_max_suppression(boxes, 5076 scores, 5077 max_output_size_per_class, 5078 max_total_size, 5079 iou_threshold=0.5, 5080 score_threshold=float('-inf'), 5081 pad_per_class=False, 5082 clip_boxes=True, 5083 name=None): 5084 """Greedily selects a subset of bounding boxes in descending order of score. 5085 5086 This operation performs non_max_suppression on the inputs per batch, across 5087 all classes. 5088 Prunes away boxes that have high intersection-over-union (IOU) overlap 5089 with previously selected boxes. Bounding boxes are supplied as 5090 [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any 5091 diagonal pair of box corners and the coordinates can be provided as normalized 5092 (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm 5093 is agnostic to where the origin is in the coordinate system. Also note that 5094 this algorithm is invariant to orthogonal transformations and translations 5095 of the coordinate system; thus translating or reflections of the coordinate 5096 system result in the same boxes being selected by the algorithm. 5097 The output of this operation is the final boxes, scores and classes tensor 5098 returned after performing non_max_suppression. 5099 5100 Args: 5101 boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` 5102 is 1 then same boxes are used for all classes otherwise, if `q` is equal 5103 to number of classes, class-specific boxes are used. 5104 scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` 5105 representing a single score corresponding to each box (each row of boxes). 5106 max_output_size_per_class: A scalar integer `Tensor` representing the 5107 maximum number of boxes to be selected by non-max suppression per class 5108 max_total_size: A int32 scalar representing maximum number of boxes retained 5109 over all classes. Note that setting this value to a large number may 5110 result in OOM error depending on the system workload. 5111 iou_threshold: A float representing the threshold for deciding whether boxes 5112 overlap too much with respect to IOU. 5113 score_threshold: A float representing the threshold for deciding when to 5114 remove boxes based on score. 5115 pad_per_class: If false, the output nmsed boxes, scores and classes are 5116 padded/clipped to `max_total_size`. If true, the output nmsed boxes, 5117 scores and classes are padded to be of length 5118 `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in 5119 which case it is clipped to `max_total_size`. Defaults to false. 5120 clip_boxes: If true, the coordinates of output nmsed boxes will be clipped 5121 to [0, 1]. If false, output the box coordinates as it is. Defaults to 5122 true. 5123 name: A name for the operation (optional). 5124 5125 Returns: 5126 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor 5127 containing the non-max suppressed boxes. 5128 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing 5129 the scores for the boxes. 5130 'nmsed_classes': A [batch_size, max_detections] float32 tensor 5131 containing the class for boxes. 5132 'valid_detections': A [batch_size] int32 tensor indicating the number of 5133 valid detections per batch item. Only the top valid_detections[i] entries 5134 in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the 5135 entries are zero paddings. 5136 """ 5137 with ops.name_scope(name, 'combined_non_max_suppression'): 5138 iou_threshold = ops.convert_to_tensor( 5139 iou_threshold, dtype=dtypes.float32, name='iou_threshold') 5140 score_threshold = ops.convert_to_tensor( 5141 score_threshold, dtype=dtypes.float32, name='score_threshold') 5142 5143 # Convert `max_total_size` to tensor *without* setting the `dtype` param. 5144 # This allows us to catch `int32` overflow case with `max_total_size` 5145 # whose expected dtype is `int32` by the op registration. Any number within 5146 # `int32` will get converted to `int32` tensor. Anything larger will get 5147 # converted to `int64`. Passing in `int64` for `max_total_size` to the op 5148 # will throw dtype mismatch exception. 5149 # TODO(b/173251596): Once there is a more general solution to warn against 5150 # int overflow conversions, revisit this check. 5151 max_total_size = ops.convert_to_tensor(max_total_size) 5152 5153 return gen_image_ops.combined_non_max_suppression( 5154 boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, 5155 score_threshold, pad_per_class, clip_boxes) 5156 5157 5158def _bbox_overlap(boxes_a, boxes_b): 5159 """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b. 5160 5161 Args: 5162 boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of 5163 boxes per image. The last dimension is the pixel coordinates in 5164 [ymin, xmin, ymax, xmax] form. 5165 boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of 5166 boxes. The last dimension is the pixel coordinates in 5167 [ymin, xmin, ymax, xmax] form. 5168 Returns: 5169 intersection_over_union: a tensor with as a shape of [batch_size, N, M], 5170 representing the ratio of intersection area over union area (IoU) between 5171 two boxes 5172 """ 5173 with ops.name_scope('bbox_overlap'): 5174 a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split( 5175 value=boxes_a, num_or_size_splits=4, axis=2) 5176 b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split( 5177 value=boxes_b, num_or_size_splits=4, axis=2) 5178 5179 # Calculates the intersection area. 5180 i_xmin = math_ops.maximum( 5181 a_x_min, array_ops.transpose(b_x_min, [0, 2, 1])) 5182 i_xmax = math_ops.minimum( 5183 a_x_max, array_ops.transpose(b_x_max, [0, 2, 1])) 5184 i_ymin = math_ops.maximum( 5185 a_y_min, array_ops.transpose(b_y_min, [0, 2, 1])) 5186 i_ymax = math_ops.minimum( 5187 a_y_max, array_ops.transpose(b_y_max, [0, 2, 1])) 5188 i_area = math_ops.maximum( 5189 (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0) 5190 5191 # Calculates the union area. 5192 a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min) 5193 b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min) 5194 EPSILON = 1e-8 5195 # Adds a small epsilon to avoid divide-by-zero. 5196 u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON 5197 5198 # Calculates IoU. 5199 intersection_over_union = i_area / u_area 5200 5201 return intersection_over_union 5202 5203 5204def _self_suppression(iou, _, iou_sum, iou_threshold): 5205 """Suppress boxes in the same tile. 5206 5207 Compute boxes that cannot be suppressed by others (i.e., 5208 can_suppress_others), and then use them to suppress boxes in the same tile. 5209 5210 Args: 5211 iou: a tensor of shape [batch_size, num_boxes_with_padding] representing 5212 intersection over union. 5213 iou_sum: a scalar tensor. 5214 iou_threshold: a scalar tensor. 5215 5216 Returns: 5217 iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding]. 5218 iou_diff: a scalar tensor representing whether any box is supressed in 5219 this step. 5220 iou_sum_new: a scalar tensor of shape [batch_size] that represents 5221 the iou sum after suppression. 5222 iou_threshold: a scalar tensor. 5223 """ 5224 batch_size = array_ops.shape(iou)[0] 5225 can_suppress_others = math_ops.cast( 5226 array_ops.reshape( 5227 math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]), 5228 iou.dtype) 5229 iou_after_suppression = array_ops.reshape( 5230 math_ops.cast( 5231 math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold, 5232 iou.dtype), 5233 [batch_size, -1, 1]) * iou 5234 iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2]) 5235 return [ 5236 iou_after_suppression, 5237 math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new, 5238 iou_threshold 5239 ] 5240 5241 5242def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size): 5243 """Suppress boxes between different tiles. 5244 5245 Args: 5246 boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4] 5247 box_slice: a tensor of shape [batch_size, tile_size, 4] 5248 iou_threshold: a scalar tensor 5249 inner_idx: a scalar tensor representing the tile index of the tile 5250 that is used to supress box_slice 5251 tile_size: an integer representing the number of boxes in a tile 5252 5253 Returns: 5254 boxes: unchanged boxes as input 5255 box_slice_after_suppression: box_slice after suppression 5256 iou_threshold: unchanged 5257 """ 5258 batch_size = array_ops.shape(boxes)[0] 5259 new_slice = array_ops.slice( 5260 boxes, [0, inner_idx * tile_size, 0], 5261 [batch_size, tile_size, 4]) 5262 iou = _bbox_overlap(new_slice, box_slice) 5263 box_slice_after_suppression = array_ops.expand_dims( 5264 math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]), 5265 box_slice.dtype), 5266 2) * box_slice 5267 return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1 5268 5269 5270def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size): 5271 """Process boxes in the range [idx*tile_size, (idx+1)*tile_size). 5272 5273 Args: 5274 boxes: a tensor with a shape of [batch_size, anchors, 4]. 5275 iou_threshold: a float representing the threshold for deciding whether boxes 5276 overlap too much with respect to IOU. 5277 output_size: an int32 tensor of size [batch_size]. Representing the number 5278 of selected boxes for each batch. 5279 idx: an integer scalar representing induction variable. 5280 tile_size: an integer representing the number of boxes in a tile 5281 5282 Returns: 5283 boxes: updated boxes. 5284 iou_threshold: pass down iou_threshold to the next iteration. 5285 output_size: the updated output_size. 5286 idx: the updated induction variable. 5287 """ 5288 with ops.name_scope('suppression_loop_body'): 5289 num_tiles = array_ops.shape(boxes)[1] // tile_size 5290 batch_size = array_ops.shape(boxes)[0] 5291 5292 def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx): 5293 return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, 5294 tile_size) 5295 5296 # Iterates over tiles that can possibly suppress the current tile. 5297 box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0], 5298 [batch_size, tile_size, 4]) 5299 _, box_slice, _, _ = control_flow_ops.while_loop( 5300 lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, 5301 cross_suppression_func, 5302 [boxes, box_slice, iou_threshold, constant_op.constant(0)]) 5303 5304 # Iterates over the current tile to compute self-suppression. 5305 iou = _bbox_overlap(box_slice, box_slice) 5306 mask = array_ops.expand_dims( 5307 array_ops.reshape( 5308 math_ops.range(tile_size), [1, -1]) > array_ops.reshape( 5309 math_ops.range(tile_size), [-1, 1]), 0) 5310 iou *= math_ops.cast( 5311 math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype) 5312 suppressed_iou, _, _, _ = control_flow_ops.while_loop( 5313 lambda _iou, loop_condition, _iou_sum, _: loop_condition, 5314 _self_suppression, 5315 [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]), 5316 iou_threshold]) 5317 suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0 5318 box_slice *= array_ops.expand_dims( 5319 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2) 5320 5321 # Uses box_slice to update the input boxes. 5322 mask = array_ops.reshape( 5323 math_ops.cast( 5324 math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype), 5325 [1, -1, 1, 1]) 5326 boxes = array_ops.tile(array_ops.expand_dims( 5327 box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape( 5328 boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask) 5329 boxes = array_ops.reshape(boxes, [batch_size, -1, 4]) 5330 5331 # Updates output_size. 5332 output_size += math_ops.reduce_sum( 5333 math_ops.cast( 5334 math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1]) 5335 return boxes, iou_threshold, output_size, idx + 1 5336 5337 5338@tf_export('image.non_max_suppression_padded') 5339@dispatch.add_dispatch_support 5340def non_max_suppression_padded(boxes, 5341 scores, 5342 max_output_size, 5343 iou_threshold=0.5, 5344 score_threshold=float('-inf'), 5345 pad_to_max_output_size=False, 5346 name=None, 5347 sorted_input=False, 5348 canonicalized_coordinates=False, 5349 tile_size=512): 5350 """Greedily selects a subset of bounding boxes in descending order of score. 5351 5352 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 5353 with the addition of an optional parameter which zero-pads the output to 5354 be of size `max_output_size`. 5355 The output of this operation is a tuple containing the set of integers 5356 indexing into the input collection of bounding boxes representing the selected 5357 boxes and the number of valid indices in the index set. The bounding box 5358 coordinates corresponding to the selected indices can then be obtained using 5359 the `tf.slice` and `tf.gather` operations. For example: 5360 ```python 5361 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 5362 boxes, scores, max_output_size, iou_threshold, 5363 score_threshold, pad_to_max_output_size=True) 5364 selected_indices = tf.slice( 5365 selected_indices_padded, tf.constant([0]), num_valid) 5366 selected_boxes = tf.gather(boxes, selected_indices) 5367 ``` 5368 5369 Args: 5370 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 5371 Dimensions except the last two are batch dimensions. 5372 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 5373 max_output_size: a scalar integer `Tensor` representing the maximum number 5374 of boxes to be selected by non max suppression. Note that setting this 5375 value to a large number may result in OOM error depending on the system 5376 workload. 5377 iou_threshold: a float representing the threshold for deciding whether boxes 5378 overlap too much with respect to IoU (intersection over union). 5379 score_threshold: a float representing the threshold for box scores. Boxes 5380 with a score that is not larger than this threshold will be suppressed. 5381 pad_to_max_output_size: whether to pad the output idx to max_output_size. 5382 Must be set to True when the input is a batch of images. 5383 name: name of operation. 5384 sorted_input: a boolean indicating whether the input boxes and scores 5385 are sorted in descending order by the score. 5386 canonicalized_coordinates: if box coordinates are given as 5387 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 5388 computation to canonicalize box coordinates. 5389 tile_size: an integer representing the number of boxes in a tile, i.e., 5390 the maximum number of boxes per image that can be used to suppress other 5391 boxes in parallel; larger tile_size means larger parallelism and 5392 potentially more redundant work. 5393 Returns: 5394 idx: a tensor with a shape of [..., num_boxes] representing the 5395 indices selected by non-max suppression. The leading dimensions 5396 are the batch dimensions of the input boxes. All numbers are within 5397 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 5398 indices (i.e., idx[i][:num_valid[i]]) are valid. 5399 num_valid: a tensor of rank 0 or higher with a shape of [...] 5400 representing the number of valid indices in idx. Its dimensions are the 5401 batch dimensions of the input boxes. 5402 Raises: 5403 ValueError: When set pad_to_max_output_size to False for batched input. 5404 """ 5405 with ops.name_scope(name, 'non_max_suppression_padded'): 5406 if not pad_to_max_output_size: 5407 # pad_to_max_output_size may be set to False only when the shape of 5408 # boxes is [num_boxes, 4], i.e., a single image. We make best effort to 5409 # detect violations at compile time. If `boxes` does not have a static 5410 # rank, the check allows computation to proceed. 5411 if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2: 5412 raise ValueError("'pad_to_max_output_size' (value {}) must be True for " 5413 'batched input'.format(pad_to_max_output_size)) 5414 if name is None: 5415 name = '' 5416 idx, num_valid = non_max_suppression_padded_v2( 5417 boxes, scores, max_output_size, iou_threshold, score_threshold, 5418 sorted_input, canonicalized_coordinates, tile_size) 5419 # def_function.function seems to lose shape information, so set it here. 5420 if not pad_to_max_output_size: 5421 idx = idx[0, :num_valid] 5422 else: 5423 batch_dims = array_ops.concat([ 5424 array_ops.shape(boxes)[:-2], 5425 array_ops.expand_dims(max_output_size, 0) 5426 ], 0) 5427 idx = array_ops.reshape(idx, batch_dims) 5428 return idx, num_valid 5429 5430 5431# TODO(b/158709815): Improve performance regression due to 5432# def_function.function. 5433@def_function.function( 5434 experimental_implements='non_max_suppression_padded_v2') 5435def non_max_suppression_padded_v2(boxes, 5436 scores, 5437 max_output_size, 5438 iou_threshold=0.5, 5439 score_threshold=float('-inf'), 5440 sorted_input=False, 5441 canonicalized_coordinates=False, 5442 tile_size=512): 5443 """Non-maximum suppression. 5444 5445 Prunes away boxes that have high intersection-over-union (IOU) overlap 5446 with previously selected boxes. Bounding boxes are supplied as 5447 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 5448 diagonal pair of box corners and the coordinates can be provided as normalized 5449 (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box 5450 coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`, 5451 where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower 5452 left and upper right corner. User may indiciate the input box coordinates are 5453 already canonicalized to eliminate redundant work by setting 5454 canonicalized_coordinates to `True`. Note that this algorithm is agnostic to 5455 where the origin is in the coordinate system. Note that this algorithm is 5456 invariant to orthogonal transformations and translations of the coordinate 5457 system; thus translating or reflections of the coordinate system result in the 5458 same boxes being selected by the algorithm. 5459 5460 Similar to tf.image.non_max_suppression, non_max_suppression_padded 5461 implements hard NMS but can operate on a batch of images and improves 5462 performance by titling the bounding boxes. Non_max_suppression_padded should 5463 be preferred over tf.image_non_max_suppression when running on devices with 5464 abundant parallelsim for higher computation speed. For soft NMS, refer to 5465 tf.image.non_max_suppression_with_scores. 5466 5467 While a serial NMS algorithm iteratively uses the highest-scored unprocessed 5468 box to suppress boxes, this algorithm uses many boxes to suppress other boxes 5469 in parallel. The key idea is to partition boxes into tiles based on their 5470 score and suppresses boxes tile by tile, thus achieving parallelism within a 5471 tile. The tile size determines the degree of parallelism. 5472 5473 In cross suppression (using boxes of tile A to suppress boxes of tile B), 5474 all boxes in A can independently suppress boxes in B. 5475 5476 Self suppression (suppressing boxes of the same tile) needs to be iteratively 5477 applied until there's no more suppression. In each iteration, boxes that 5478 cannot be suppressed are used to suppress boxes in the same tile. 5479 5480 boxes = boxes.pad_to_multiply_of(tile_size) 5481 num_tiles = len(boxes) // tile_size 5482 output_boxes = [] 5483 for i in range(num_tiles): 5484 box_tile = boxes[i*tile_size : (i+1)*tile_size] 5485 for j in range(i - 1): 5486 # in parallel suppress boxes in box_tile using boxes from suppressing_tile 5487 suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] 5488 iou = _bbox_overlap(box_tile, suppressing_tile) 5489 # if the box is suppressed in iou, clear it to a dot 5490 box_tile *= _update_boxes(iou) 5491 # Iteratively handle the diagnal tile. 5492 iou = _box_overlap(box_tile, box_tile) 5493 iou_changed = True 5494 while iou_changed: 5495 # boxes that are not suppressed by anything else 5496 suppressing_boxes = _get_suppressing_boxes(iou) 5497 # boxes that are suppressed by suppressing_boxes 5498 suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) 5499 # clear iou to 0 for boxes that are suppressed, as they cannot be used 5500 # to suppress other boxes any more 5501 new_iou = _clear_iou(iou, suppressed_boxes) 5502 iou_changed = (new_iou != iou) 5503 iou = new_iou 5504 # remaining boxes that can still suppress others, are selected boxes. 5505 output_boxes.append(_get_suppressing_boxes(iou)) 5506 if len(output_boxes) >= max_output_size: 5507 break 5508 5509 Args: 5510 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 5511 Dimensions except the last two are batch dimensions. The last dimension 5512 represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates 5513 on each dimension can be given in any order 5514 (see also `canonicalized_coordinates`) but must describe a box with 5515 a positive area. 5516 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 5517 max_output_size: a scalar integer `Tensor` representing the maximum number 5518 of boxes to be selected by non max suppression. 5519 iou_threshold: a float representing the threshold for deciding whether boxes 5520 overlap too much with respect to IoU (intersection over union). 5521 score_threshold: a float representing the threshold for box scores. Boxes 5522 with a score that is not larger than this threshold will be suppressed. 5523 sorted_input: a boolean indicating whether the input boxes and scores 5524 are sorted in descending order by the score. 5525 canonicalized_coordinates: if box coordinates are given as 5526 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 5527 computation to canonicalize box coordinates. 5528 tile_size: an integer representing the number of boxes in a tile, i.e., 5529 the maximum number of boxes per image that can be used to suppress other 5530 boxes in parallel; larger tile_size means larger parallelism and 5531 potentially more redundant work. 5532 Returns: 5533 idx: a tensor with a shape of [..., num_boxes] representing the 5534 indices selected by non-max suppression. The leading dimensions 5535 are the batch dimensions of the input boxes. All numbers are within 5536 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 5537 indices (i.e., idx[i][:num_valid[i]]) are valid. 5538 num_valid: a tensor of rank 0 or higher with a shape of [...] 5539 representing the number of valid indices in idx. Its dimensions are the 5540 batch dimensions of the input boxes. 5541 Raises: 5542 ValueError: When set pad_to_max_output_size to False for batched input. 5543 """ 5544 def _sort_scores_and_boxes(scores, boxes): 5545 """Sort boxes based their score from highest to lowest. 5546 5547 Args: 5548 scores: a tensor with a shape of [batch_size, num_boxes] representing 5549 the scores of boxes. 5550 boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing 5551 the boxes. 5552 Returns: 5553 sorted_scores: a tensor with a shape of [batch_size, num_boxes] 5554 representing the sorted scores. 5555 sorted_boxes: a tensor representing the sorted boxes. 5556 sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes] 5557 representing the index of the scores in a sorted descending order. 5558 """ 5559 with ops.name_scope('sort_scores_and_boxes'): 5560 batch_size = array_ops.shape(boxes)[0] 5561 num_boxes = array_ops.shape(boxes)[1] 5562 sorted_scores_indices = sort_ops.argsort( 5563 scores, axis=1, direction='DESCENDING') 5564 index_offsets = math_ops.range(batch_size) * num_boxes 5565 indices = array_ops.reshape( 5566 sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1]) 5567 sorted_scores = array_ops.reshape( 5568 array_ops.gather(array_ops.reshape(scores, [-1]), indices), 5569 [batch_size, -1]) 5570 sorted_boxes = array_ops.reshape( 5571 array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices), 5572 [batch_size, -1, 4]) 5573 return sorted_scores, sorted_boxes, sorted_scores_indices 5574 5575 batch_dims = array_ops.shape(boxes)[:-2] 5576 num_boxes = array_ops.shape(boxes)[-2] 5577 boxes = array_ops.reshape(boxes, [-1, num_boxes, 4]) 5578 scores = array_ops.reshape(scores, [-1, num_boxes]) 5579 batch_size = array_ops.shape(boxes)[0] 5580 if score_threshold != float('-inf'): 5581 with ops.name_scope('filter_by_score'): 5582 score_mask = math_ops.cast(scores > score_threshold, scores.dtype) 5583 scores *= score_mask 5584 box_mask = array_ops.expand_dims( 5585 math_ops.cast(score_mask, boxes.dtype), 2) 5586 boxes *= box_mask 5587 5588 if not canonicalized_coordinates: 5589 with ops.name_scope('canonicalize_coordinates'): 5590 y_1, x_1, y_2, x_2 = array_ops.split( 5591 value=boxes, num_or_size_splits=4, axis=2) 5592 y_1_is_min = math_ops.reduce_all( 5593 math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0])) 5594 y_min, y_max = control_flow_ops.cond( 5595 y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1)) 5596 x_1_is_min = math_ops.reduce_all( 5597 math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0])) 5598 x_min, x_max = control_flow_ops.cond( 5599 x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1)) 5600 boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2) 5601 5602 if not sorted_input: 5603 scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes) 5604 else: 5605 # Default value required for Autograph. 5606 sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32) 5607 5608 pad = math_ops.cast( 5609 math_ops.ceil( 5610 math_ops.cast( 5611 math_ops.maximum(num_boxes, max_output_size), dtypes.float32) / 5612 math_ops.cast(tile_size, dtypes.float32)), 5613 dtypes.int32) * tile_size - num_boxes 5614 boxes = array_ops.pad( 5615 math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]]) 5616 scores = array_ops.pad( 5617 math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]]) 5618 num_boxes_after_padding = num_boxes + pad 5619 num_iterations = num_boxes_after_padding // tile_size 5620 def _loop_cond(unused_boxes, unused_threshold, output_size, idx): 5621 return math_ops.logical_and( 5622 math_ops.reduce_min(output_size) < max_output_size, 5623 idx < num_iterations) 5624 5625 def suppression_loop_body(boxes, iou_threshold, output_size, idx): 5626 return _suppression_loop_body( 5627 boxes, iou_threshold, output_size, idx, tile_size) 5628 5629 selected_boxes, _, output_size, _ = control_flow_ops.while_loop( 5630 _loop_cond, 5631 suppression_loop_body, 5632 [ 5633 boxes, iou_threshold, 5634 array_ops.zeros([batch_size], dtypes.int32), 5635 constant_op.constant(0) 5636 ], 5637 shape_invariants=[ 5638 tensor_shape.TensorShape([None, None, 4]), 5639 tensor_shape.TensorShape([]), 5640 tensor_shape.TensorShape([None]), 5641 tensor_shape.TensorShape([]), 5642 ], 5643 ) 5644 num_valid = math_ops.minimum(output_size, max_output_size) 5645 idx = num_boxes_after_padding - math_ops.cast( 5646 nn_ops.top_k( 5647 math_ops.cast(math_ops.reduce_any( 5648 selected_boxes > 0, [2]), dtypes.int32) * 5649 array_ops.expand_dims( 5650 math_ops.range(num_boxes_after_padding, 0, -1), 0), 5651 max_output_size)[0], dtypes.int32) 5652 idx = math_ops.minimum(idx, num_boxes - 1) 5653 5654 if not sorted_input: 5655 index_offsets = math_ops.range(batch_size) * num_boxes 5656 gather_idx = array_ops.reshape( 5657 idx + array_ops.expand_dims(index_offsets, 1), [-1]) 5658 idx = array_ops.reshape( 5659 array_ops.gather(array_ops.reshape(sorted_indices, [-1]), 5660 gather_idx), 5661 [batch_size, -1]) 5662 invalid_index = array_ops.zeros([batch_size, max_output_size], 5663 dtype=dtypes.int32) 5664 idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0) 5665 num_valid_expanded = array_ops.expand_dims(num_valid, 1) 5666 idx = array_ops.where(idx_index < num_valid_expanded, 5667 idx, invalid_index) 5668 5669 num_valid = array_ops.reshape(num_valid, batch_dims) 5670 return idx, num_valid 5671 5672 5673def non_max_suppression_padded_v1(boxes, 5674 scores, 5675 max_output_size, 5676 iou_threshold=0.5, 5677 score_threshold=float('-inf'), 5678 pad_to_max_output_size=False, 5679 name=None): 5680 """Greedily selects a subset of bounding boxes in descending order of score. 5681 5682 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 5683 with the addition of an optional parameter which zero-pads the output to 5684 be of size `max_output_size`. 5685 The output of this operation is a tuple containing the set of integers 5686 indexing into the input collection of bounding boxes representing the selected 5687 boxes and the number of valid indices in the index set. The bounding box 5688 coordinates corresponding to the selected indices can then be obtained using 5689 the `tf.slice` and `tf.gather` operations. For example: 5690 ```python 5691 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 5692 boxes, scores, max_output_size, iou_threshold, 5693 score_threshold, pad_to_max_output_size=True) 5694 selected_indices = tf.slice( 5695 selected_indices_padded, tf.constant([0]), num_valid) 5696 selected_boxes = tf.gather(boxes, selected_indices) 5697 ``` 5698 5699 Args: 5700 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 5701 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 5702 score corresponding to each box (each row of boxes). 5703 max_output_size: A scalar integer `Tensor` representing the maximum number 5704 of boxes to be selected by non-max suppression. 5705 iou_threshold: A float representing the threshold for deciding whether boxes 5706 overlap too much with respect to IOU. 5707 score_threshold: A float representing the threshold for deciding when to 5708 remove boxes based on score. 5709 pad_to_max_output_size: bool. If True, size of `selected_indices` output is 5710 padded to `max_output_size`. 5711 name: A name for the operation (optional). 5712 5713 Returns: 5714 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 5715 selected indices from the boxes tensor, where `M <= max_output_size`. 5716 valid_outputs: A scalar integer `Tensor` denoting how many elements in 5717 `selected_indices` are valid. Valid elements occur first, then padding. 5718 """ 5719 with ops.name_scope(name, 'non_max_suppression_padded'): 5720 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 5721 score_threshold = ops.convert_to_tensor( 5722 score_threshold, name='score_threshold') 5723 return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size, 5724 iou_threshold, score_threshold, 5725 pad_to_max_output_size) 5726 5727 5728@tf_export('image.draw_bounding_boxes', v1=[]) 5729@dispatch.add_dispatch_support 5730def draw_bounding_boxes_v2(images, boxes, colors, name=None): 5731 """Draw bounding boxes on a batch of images. 5732 5733 Outputs a copy of `images` but draws on top of the pixels zero or more 5734 bounding boxes specified by the locations in `boxes`. The coordinates of the 5735 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 5736 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 5737 and the height of the underlying image. 5738 5739 For example, if an image is 100 x 200 pixels (height x width) and the bounding 5740 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 5741 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 5742 5743 Parts of the bounding box may fall outside the image. 5744 5745 Args: 5746 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 5747 4-D with shape `[batch, height, width, depth]`. A batch of images. 5748 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 5749 num_bounding_boxes, 4]` containing bounding boxes. 5750 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 5751 through for the boxes. 5752 name: A name for the operation (optional). 5753 5754 Returns: 5755 A `Tensor`. Has the same type as `images`. 5756 5757 Usage Example: 5758 5759 >>> # create an empty image 5760 >>> img = tf.zeros([1, 3, 3, 3]) 5761 >>> # draw a box around the image 5762 >>> box = np.array([0, 0, 1, 1]) 5763 >>> boxes = box.reshape([1, 1, 4]) 5764 >>> # alternate between red and blue 5765 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 5766 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 5767 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 5768 array([[[[1., 0., 0.], 5769 [1., 0., 0.], 5770 [1., 0., 0.]], 5771 [[1., 0., 0.], 5772 [0., 0., 0.], 5773 [1., 0., 0.]], 5774 [[1., 0., 0.], 5775 [1., 0., 0.], 5776 [1., 0., 0.]]]], dtype=float32)> 5777 """ 5778 if colors is None: 5779 return gen_image_ops.draw_bounding_boxes(images, boxes, name) 5780 return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name) 5781 5782 5783@tf_export(v1=['image.draw_bounding_boxes']) 5784@dispatch.add_dispatch_support 5785def draw_bounding_boxes(images, boxes, name=None, colors=None): 5786 """Draw bounding boxes on a batch of images. 5787 5788 Outputs a copy of `images` but draws on top of the pixels zero or more 5789 bounding boxes specified by the locations in `boxes`. The coordinates of the 5790 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 5791 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 5792 and the height of the underlying image. 5793 5794 For example, if an image is 100 x 200 pixels (height x width) and the bounding 5795 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 5796 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 5797 5798 Parts of the bounding box may fall outside the image. 5799 5800 Args: 5801 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 5802 4-D with shape `[batch, height, width, depth]`. A batch of images. 5803 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 5804 num_bounding_boxes, 4]` containing bounding boxes. 5805 name: A name for the operation (optional). 5806 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 5807 through for the boxes. 5808 5809 Returns: 5810 A `Tensor`. Has the same type as `images`. 5811 5812 Usage Example: 5813 5814 >>> # create an empty image 5815 >>> img = tf.zeros([1, 3, 3, 3]) 5816 >>> # draw a box around the image 5817 >>> box = np.array([0, 0, 1, 1]) 5818 >>> boxes = box.reshape([1, 1, 4]) 5819 >>> # alternate between red and blue 5820 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 5821 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 5822 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 5823 array([[[[1., 0., 0.], 5824 [1., 0., 0.], 5825 [1., 0., 0.]], 5826 [[1., 0., 0.], 5827 [0., 0., 0.], 5828 [1., 0., 0.]], 5829 [[1., 0., 0.], 5830 [1., 0., 0.], 5831 [1., 0., 0.]]]], dtype=float32)> 5832 """ 5833 return draw_bounding_boxes_v2(images, boxes, colors, name) 5834 5835 5836@tf_export('image.generate_bounding_box_proposals') 5837@dispatch.add_dispatch_support 5838def generate_bounding_box_proposals(scores, 5839 bbox_deltas, 5840 image_info, 5841 anchors, 5842 nms_threshold=0.7, 5843 pre_nms_topn=6000, 5844 min_size=16, 5845 post_nms_topn=300, 5846 name=None): 5847 """Generate bounding box proposals from encoded bounding boxes. 5848 5849 Args: 5850 scores: A 4-D float `Tensor` of shape 5851 `[num_images, height, width, num_achors]` containing scores of 5852 the boxes for given anchors, can be unsorted. 5853 bbox_deltas: A 4-D float `Tensor` of shape 5854 `[num_images, height, width, 4 x num_anchors]` encoding boxes 5855 with respect to each anchor. Coordinates are given 5856 in the form `[dy, dx, dh, dw]`. 5857 image_info: A 2-D float `Tensor` of shape `[num_images, 5]` 5858 containing image information Height, Width, Scale. 5859 anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]` 5860 describing the anchor boxes. 5861 Boxes are formatted in the form `[y1, x1, y2, x2]`. 5862 nms_threshold: A scalar float `Tensor` for non-maximal-suppression 5863 threshold. Defaults to 0.7. 5864 pre_nms_topn: A scalar int `Tensor` for the number of 5865 top scoring boxes to be used as input. Defaults to 6000. 5866 min_size: A scalar float `Tensor`. Any box that has a smaller size 5867 than min_size will be discarded. Defaults to 16. 5868 post_nms_topn: An integer. Maximum number of rois in the output. 5869 name: A name for this operation (optional). 5870 5871 Returns: 5872 rois: Region of interest boxes sorted by their scores. 5873 roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`. 5874 """ 5875 return gen_image_ops.generate_bounding_box_proposals( 5876 scores=scores, 5877 bbox_deltas=bbox_deltas, 5878 image_info=image_info, 5879 anchors=anchors, 5880 nms_threshold=nms_threshold, 5881 pre_nms_topn=pre_nms_topn, 5882 min_size=min_size, 5883 post_nms_topn=post_nms_topn, 5884 name=name) 5885