xref: /aosp_15_r20/external/tensorflow/tensorflow/python/ops/image_ops_impl.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of image ops."""
16
17import functools
18import numpy as np
19
20from tensorflow.python.eager import context
21from tensorflow.python.eager import def_function
22from tensorflow.python.framework import config
23from tensorflow.python.framework import constant_op
24from tensorflow.python.framework import dtypes
25from tensorflow.python.framework import ops
26from tensorflow.python.framework import random_seed
27from tensorflow.python.framework import tensor_shape
28from tensorflow.python.framework import tensor_util
29from tensorflow.python.ops import array_ops
30from tensorflow.python.ops import check_ops
31from tensorflow.python.ops import control_flow_ops
32from tensorflow.python.ops import gen_image_ops
33from tensorflow.python.ops import math_ops
34from tensorflow.python.ops import nn
35from tensorflow.python.ops import nn_ops
36from tensorflow.python.ops import random_ops
37from tensorflow.python.ops import sort_ops
38from tensorflow.python.ops import stateless_random_ops
39from tensorflow.python.ops import string_ops
40from tensorflow.python.ops import variables
41from tensorflow.python.util import deprecation
42from tensorflow.python.util import dispatch
43from tensorflow.python.util.tf_export import tf_export
44
45ops.NotDifferentiable('RandomCrop')
46# TODO(b/31222613): This op may be differentiable, and there may be
47# latent bugs here.
48ops.NotDifferentiable('HSVToRGB')
49ops.NotDifferentiable('DrawBoundingBoxes')
50ops.NotDifferentiable('SampleDistortedBoundingBox')
51ops.NotDifferentiable('SampleDistortedBoundingBoxV2')
52# TODO(bsteiner): Implement the gradient function for extract_glimpse
53# TODO(b/31222613): This op may be differentiable, and there may be
54# latent bugs here.
55ops.NotDifferentiable('ExtractGlimpse')
56ops.NotDifferentiable('NonMaxSuppression')
57ops.NotDifferentiable('NonMaxSuppressionV2')
58ops.NotDifferentiable('NonMaxSuppressionWithOverlaps')
59ops.NotDifferentiable('GenerateBoundingBoxProposals')
60
61
62# pylint: disable=invalid-name
63def _assert(cond, ex_type, msg):
64  """A polymorphic assert, works with tensors and boolean expressions.
65
66  If `cond` is not a tensor, behave like an ordinary assert statement, except
67  that a empty list is returned. If `cond` is a tensor, return a list
68  containing a single TensorFlow assert op.
69
70  Args:
71    cond: Something evaluates to a boolean value. May be a tensor.
72    ex_type: The exception class to use.
73    msg: The error message.
74
75  Returns:
76    A list, containing at most one assert op.
77  """
78  if _is_tensor(cond):
79    return [control_flow_ops.Assert(cond, [msg])]
80  else:
81    if not cond:
82      raise ex_type(msg)
83    else:
84      return []
85
86
87def _is_tensor(x):
88  """Returns `True` if `x` is a symbolic tensor-like object.
89
90  Args:
91    x: A python object to check.
92
93  Returns:
94    `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
95  """
96  return isinstance(x, (ops.Tensor, variables.Variable))
97
98
99def _ImageDimensions(image, rank):
100  """Returns the dimensions of an image tensor.
101
102  Args:
103    image: A rank-D Tensor. For 3-D  of shape: `[height, width, channels]`.
104    rank: The expected rank of the image
105
106  Returns:
107    A list of corresponding to the dimensions of the
108    input image.  Dimensions that are statically known are python integers,
109    otherwise, they are integer scalar tensors.
110  """
111  if image.get_shape().is_fully_defined():
112    return image.get_shape().as_list()
113  else:
114    static_shape = image.get_shape().with_rank(rank).as_list()
115    dynamic_shape = array_ops.unstack(array_ops.shape(image), rank)
116    return [
117        s if s is not None else d for s, d in zip(static_shape, dynamic_shape)
118    ]
119
120
121def _Check3DImage(image, require_static=True):
122  """Assert that we are working with a properly shaped image.
123
124  Args:
125    image: 3-D Tensor of shape [height, width, channels]
126    require_static: If `True`, requires that all dimensions of `image` are known
127      and non-zero.
128
129  Raises:
130    ValueError: if `image.shape` is not a 3-vector.
131
132  Returns:
133    An empty list, if `image` has fully defined dimensions. Otherwise, a list
134    containing an assert op is returned.
135  """
136  try:
137    image_shape = image.get_shape().with_rank(3)
138  except ValueError:
139    raise ValueError("'image' (shape %s) must be three-dimensional." %
140                     image.shape)
141  if require_static and not image_shape.is_fully_defined():
142    raise ValueError("'image' (shape %s) must be fully defined." % image_shape)
143  if any(x == 0 for x in image_shape):
144    raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape)
145  if not image_shape.is_fully_defined():
146    return [
147        check_ops.assert_positive(
148            array_ops.shape(image),
149            ["all dims of 'image.shape' "
150             'must be > 0.'])
151    ]
152  else:
153    return []
154
155
156def _Assert3DImage(image):
157  """Assert that we are working with a properly shaped image.
158
159  Performs the check statically if possible (i.e. if the shape
160  is statically known). Otherwise adds a control dependency
161  to an assert op that checks the dynamic shape.
162
163  Args:
164    image: 3-D Tensor of shape [height, width, channels]
165
166  Raises:
167    ValueError: if `image.shape` is not a 3-vector.
168
169  Returns:
170    If the shape of `image` could be verified statically, `image` is
171    returned unchanged, otherwise there will be a control dependency
172    added that asserts the correct dynamic shape.
173  """
174  return control_flow_ops.with_dependencies(
175      _Check3DImage(image, require_static=False), image)
176
177
178def _AssertAtLeast3DImage(image):
179  """Assert that we are working with a properly shaped image.
180
181  Performs the check statically if possible (i.e. if the shape
182  is statically known). Otherwise adds a control dependency
183  to an assert op that checks the dynamic shape.
184
185  Args:
186    image: >= 3-D Tensor of size [*, height, width, depth]
187
188  Raises:
189    ValueError: if image.shape is not a [>= 3] vector.
190
191  Returns:
192    If the shape of `image` could be verified statically, `image` is
193    returned unchanged, otherwise there will be a control dependency
194    added that asserts the correct dynamic shape.
195  """
196  return control_flow_ops.with_dependencies(
197      _CheckAtLeast3DImage(image, require_static=False), image)
198
199
200def _CheckAtLeast3DImage(image, require_static=True):
201  """Assert that we are working with a properly shaped image.
202
203  Args:
204    image: >= 3-D Tensor of size [*, height, width, depth]
205    require_static: If `True`, requires that all dimensions of `image` are known
206      and non-zero.
207
208  Raises:
209    ValueError: if image.shape is not a [>= 3] vector.
210
211  Returns:
212    An empty list, if `image` has fully defined dimensions. Otherwise, a list
213    containing an assert op is returned.
214  """
215  try:
216    if image.get_shape().ndims is None:
217      image_shape = image.get_shape().with_rank(3)
218    else:
219      image_shape = image.get_shape().with_rank_at_least(3)
220  except ValueError:
221    raise ValueError("'image' (shape %s) must be at least three-dimensional." %
222                     image.shape)
223  if require_static and not image_shape.is_fully_defined():
224    raise ValueError('\'image\' must be fully defined.')
225  if any(x == 0 for x in image_shape[-3:]):
226    raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' %
227                     image_shape)
228  if not image_shape[-3:].is_fully_defined():
229    return [
230        check_ops.assert_positive(
231            array_ops.shape(image)[-3:],
232            ["inner 3 dims of 'image.shape' "
233             'must be > 0.']),
234        check_ops.assert_greater_equal(
235            array_ops.rank(image),
236            3,
237            message="'image' must be at least three-dimensional.")
238    ]
239  else:
240    return []
241
242
243def _AssertGrayscaleImage(image):
244  """Assert that we are working with a properly shaped grayscale image.
245
246  Performs the check statically if possible (i.e. if the shape
247  is statically known). Otherwise adds a control dependency
248  to an assert op that checks the dynamic shape.
249
250  Args:
251    image: >= 2-D Tensor of size [*, 1]
252
253  Raises:
254    ValueError: if image.shape is not a [>= 2] vector or if
255              last dimension is not size 1.
256
257  Returns:
258    If the shape of `image` could be verified statically, `image` is
259    returned unchanged, otherwise there will be a control dependency
260    added that asserts the correct dynamic shape.
261  """
262  return control_flow_ops.with_dependencies(
263      _CheckGrayscaleImage(image, require_static=False), image)
264
265
266def _CheckGrayscaleImage(image, require_static=True):
267  """Assert that we are working with properly shaped grayscale image.
268
269  Args:
270    image: >= 2-D Tensor of size [*, 1]
271    require_static: Boolean, whether static shape is required.
272
273  Raises:
274    ValueError: if image.shape is not a [>= 2] vector or if
275              last dimension is not size 1.
276
277  Returns:
278    An empty list, if `image` has fully defined dimensions. Otherwise, a list
279    containing an assert op is returned.
280  """
281  try:
282    if image.get_shape().ndims is None:
283      image_shape = image.get_shape().with_rank(2)
284    else:
285      image_shape = image.get_shape().with_rank_at_least(2)
286  except ValueError:
287    raise ValueError('A grayscale image (shape %s) must be at least '
288                     'two-dimensional.' % image.shape)
289  if require_static and not image_shape.is_fully_defined():
290    raise ValueError('\'image\' must be fully defined.')
291  if image_shape.is_fully_defined():
292    if image_shape[-1] != 1:
293      raise ValueError('Last dimension of a grayscale image should be size 1.')
294  if not image_shape.is_fully_defined():
295    return [
296        check_ops.assert_equal(
297            array_ops.shape(image)[-1],
298            1,
299            message='Last dimension of a grayscale image should be size 1.'),
300        check_ops.assert_greater_equal(
301            array_ops.rank(image),
302            3,
303            message='A grayscale image must be at least two-dimensional.')
304    ]
305  else:
306    return []
307
308
309def fix_image_flip_shape(image, result):
310  """Set the shape to 3 dimensional if we don't know anything else.
311
312  Args:
313    image: original image size
314    result: flipped or transformed image
315
316  Returns:
317    An image whose shape is at least (None, None, None).
318  """
319
320  image_shape = image.get_shape()
321  if image_shape == tensor_shape.unknown_shape():
322    result.set_shape([None, None, None])
323  else:
324    result.set_shape(image_shape)
325  return result
326
327
328@tf_export('image.random_flip_up_down')
329@dispatch.add_dispatch_support
330def random_flip_up_down(image, seed=None):
331  """Randomly flips an image vertically (upside down).
332
333  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
334  dimension, which is `height`.  Otherwise, output the image as-is.
335  When passing a batch of images, each image will be randomly flipped
336  independent of other images.
337
338  Example usage:
339
340  >>> image = np.array([[[1], [2]], [[3], [4]]])
341  >>> tf.image.random_flip_up_down(image, 3).numpy().tolist()
342  [[[3], [4]], [[1], [2]]]
343
344  Randomly flip multiple images.
345
346  >>> images = np.array(
347  ... [
348  ...     [[[1], [2]], [[3], [4]]],
349  ...     [[[5], [6]], [[7], [8]]]
350  ... ])
351  >>> tf.image.random_flip_up_down(images, 4).numpy().tolist()
352  [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]]
353
354  For producing deterministic results given a `seed` value, use
355  `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param
356  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
357  same results given the same seed independent of how many times the function is
358  called, and independent of global seed settings (e.g. tf.random.set_seed).
359
360  Args:
361    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
362      of shape `[height, width, channels]`.
363    seed: A Python integer. Used to create a random seed. See
364      `tf.compat.v1.set_random_seed` for behavior.
365
366  Returns:
367    A tensor of the same type and shape as `image`.
368  Raises:
369    ValueError: if the shape of `image` not supported.
370  """
371  random_func = functools.partial(random_ops.random_uniform, seed=seed)
372  return _random_flip(image, 0, random_func, 'random_flip_up_down')
373
374
375@tf_export('image.random_flip_left_right')
376@dispatch.add_dispatch_support
377def random_flip_left_right(image, seed=None):
378  """Randomly flip an image horizontally (left to right).
379
380  With a 1 in 2 chance, outputs the contents of `image` flipped along the
381  second dimension, which is `width`.  Otherwise output the image as-is.
382  When passing a batch of images, each image will be randomly flipped
383  independent of other images.
384
385  Example usage:
386
387  >>> image = np.array([[[1], [2]], [[3], [4]]])
388  >>> tf.image.random_flip_left_right(image, 5).numpy().tolist()
389  [[[2], [1]], [[4], [3]]]
390
391  Randomly flip multiple images.
392
393  >>> images = np.array(
394  ... [
395  ...     [[[1], [2]], [[3], [4]]],
396  ...     [[[5], [6]], [[7], [8]]]
397  ... ])
398  >>> tf.image.random_flip_left_right(images, 6).numpy().tolist()
399  [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]]
400
401  For producing deterministic results given a `seed` value, use
402  `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param
403  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
404  same results given the same seed independent of how many times the function is
405  called, and independent of global seed settings (e.g. tf.random.set_seed).
406
407  Args:
408    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
409      of shape `[height, width, channels]`.
410    seed: A Python integer. Used to create a random seed. See
411      `tf.compat.v1.set_random_seed` for behavior.
412
413  Returns:
414    A tensor of the same type and shape as `image`.
415
416  Raises:
417    ValueError: if the shape of `image` not supported.
418  """
419  random_func = functools.partial(random_ops.random_uniform, seed=seed)
420  return _random_flip(image, 1, random_func, 'random_flip_left_right')
421
422
423@tf_export('image.stateless_random_flip_left_right', v1=[])
424@dispatch.add_dispatch_support
425def stateless_random_flip_left_right(image, seed):
426  """Randomly flip an image horizontally (left to right) deterministically.
427
428  Guarantees the same results given the same `seed` independent of how many
429  times the function is called, and independent of global seed settings (e.g.
430  `tf.random.set_seed`).
431
432  Example usage:
433
434  >>> image = np.array([[[1], [2]], [[3], [4]]])
435  >>> seed = (2, 3)
436  >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist()
437  [[[2], [1]], [[4], [3]]]
438
439  Args:
440    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
441      of shape `[height, width, channels]`.
442    seed: A shape [2] Tensor, the seed to the random number generator. Must have
443      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
444
445  Returns:
446    A tensor of the same type and shape as `image`.
447  """
448  random_func = functools.partial(
449      stateless_random_ops.stateless_random_uniform, seed=seed)
450  return _random_flip(
451      image, 1, random_func, 'stateless_random_flip_left_right')
452
453
454@tf_export('image.stateless_random_flip_up_down', v1=[])
455@dispatch.add_dispatch_support
456def stateless_random_flip_up_down(image, seed):
457  """Randomly flip an image vertically (upside down) deterministically.
458
459  Guarantees the same results given the same `seed` independent of how many
460  times the function is called, and independent of global seed settings (e.g.
461  `tf.random.set_seed`).
462
463  Example usage:
464
465  >>> image = np.array([[[1], [2]], [[3], [4]]])
466  >>> seed = (2, 3)
467  >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist()
468  [[[3], [4]], [[1], [2]]]
469
470  Args:
471    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
472      of shape `[height, width, channels]`.
473    seed: A shape [2] Tensor, the seed to the random number generator. Must have
474      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
475
476  Returns:
477    A tensor of the same type and shape as `image`.
478  """
479  random_func = functools.partial(
480      stateless_random_ops.stateless_random_uniform, seed=seed)
481  return _random_flip(
482      image, 0, random_func, 'stateless_random_flip_up_down')
483
484
485def _random_flip(image, flip_index, random_func, scope_name):
486  """Randomly (50% chance) flip an image along axis `flip_index`.
487
488  Args:
489    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
490      of shape `[height, width, channels]`.
491    flip_index: Dimension along which to flip the image.
492      Vertical is 0, Horizontal is 1.
493    random_func: partial function for calling either stateful or stateless
494      random ops with `seed` parameter specified.
495    scope_name: Name of the scope in which the ops are added.
496
497  Returns:
498    A tensor of the same type and shape as `image`.
499
500  Raises:
501    ValueError: if the shape of `image` not supported.
502  """
503  with ops.name_scope(None, scope_name, [image]) as scope:
504    image = ops.convert_to_tensor(image, name='image')
505    image = _AssertAtLeast3DImage(image)
506    shape = image.get_shape()
507
508    def f_rank3():
509      uniform_random = random_func(shape=[], minval=0, maxval=1.0)
510      mirror_cond = math_ops.less(uniform_random, .5)
511      result = control_flow_ops.cond(
512          mirror_cond,
513          lambda: array_ops.reverse(image, [flip_index]),
514          lambda: image,
515          name=scope)
516      return fix_image_flip_shape(image, result)
517
518    def f_rank4():
519      batch_size = array_ops.shape(image)[0]
520      uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0)
521      flips = math_ops.round(
522          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1]))
523      flips = math_ops.cast(flips, image.dtype)
524      flipped_input = array_ops.reverse(image, [flip_index + 1])
525      return flips * flipped_input + (1 - flips) * image
526
527    if shape.ndims is None:
528      rank = array_ops.rank(image)
529      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
530    if shape.ndims == 3:
531      return f_rank3()
532    elif shape.ndims == 4:
533      return f_rank4()
534    else:
535      raise ValueError(
536          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
537
538
539@tf_export('image.flip_left_right')
540@dispatch.add_dispatch_support
541def flip_left_right(image):
542  """Flip an image horizontally (left to right).
543
544  Outputs the contents of `image` flipped along the width dimension.
545
546  See also `tf.reverse`.
547
548  Usage Example:
549
550  >>> x = [[[1.0, 2.0, 3.0],
551  ...       [4.0, 5.0, 6.0]],
552  ...     [[7.0, 8.0, 9.0],
553  ...       [10.0, 11.0, 12.0]]]
554  >>> tf.image.flip_left_right(x)
555  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
556  array([[[ 4.,  5.,  6.],
557          [ 1.,  2.,  3.]],
558         [[10., 11., 12.],
559          [ 7.,  8.,  9.]]], dtype=float32)>
560
561  Args:
562    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
563      of shape `[height, width, channels]`.
564
565  Returns:
566    A tensor of the same type and shape as `image`.
567
568  Raises:
569    ValueError: if the shape of `image` not supported.
570  """
571  return _flip(image, 1, 'flip_left_right')
572
573
574@tf_export('image.flip_up_down')
575@dispatch.add_dispatch_support
576def flip_up_down(image):
577  """Flip an image vertically (upside down).
578
579  Outputs the contents of `image` flipped along the height dimension.
580
581  See also `reverse()`.
582
583  Usage Example:
584
585  >>> x = [[[1.0, 2.0, 3.0],
586  ...       [4.0, 5.0, 6.0]],
587  ...     [[7.0, 8.0, 9.0],
588  ...       [10.0, 11.0, 12.0]]]
589  >>> tf.image.flip_up_down(x)
590  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
591  array([[[ 7.,  8.,  9.],
592          [10., 11., 12.]],
593         [[ 1.,  2.,  3.],
594          [ 4.,  5.,  6.]]], dtype=float32)>
595
596  Args:
597    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
598      of shape `[height, width, channels]`.
599
600  Returns:
601    A `Tensor` of the same type and shape as `image`.
602
603  Raises:
604    ValueError: if the shape of `image` not supported.
605  """
606  return _flip(image, 0, 'flip_up_down')
607
608
609def _flip(image, flip_index, scope_name):
610  """Flip an image either horizontally or vertically.
611
612  Outputs the contents of `image` flipped along the dimension `flip_index`.
613
614  See also `reverse()`.
615
616  Args:
617    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
618      of shape `[height, width, channels]`.
619    flip_index: 0 For vertical, 1 for horizontal.
620    scope_name: string, scope name.
621
622  Returns:
623    A `Tensor` of the same type and shape as `image`.
624
625  Raises:
626    ValueError: if the shape of `image` not supported.
627  """
628  with ops.name_scope(None, scope_name, [image]):
629    image = ops.convert_to_tensor(image, name='image')
630    image = _AssertAtLeast3DImage(image)
631    shape = image.get_shape()
632
633    def f_rank3():
634      return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))
635
636    def f_rank4():
637      return array_ops.reverse(image, [flip_index + 1])
638
639    if shape.ndims is None:
640      rank = array_ops.rank(image)
641      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
642    elif shape.ndims == 3:
643      return f_rank3()
644    elif shape.ndims == 4:
645      return f_rank4()
646    else:
647      raise ValueError(
648          '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape)
649
650
651@tf_export('image.rot90')
652@dispatch.add_dispatch_support
653def rot90(image, k=1, name=None):
654  """Rotate image(s) counter-clockwise by 90 degrees.
655
656
657  For example:
658
659  >>> a=tf.constant([[[1],[2]],
660  ...                [[3],[4]]])
661  >>> # rotating `a` counter clockwise by 90 degrees
662  >>> a_rot=tf.image.rot90(a)
663  >>> print(a_rot[...,0].numpy())
664  [[2 4]
665   [1 3]]
666  >>> # rotating `a` counter clockwise by 270 degrees
667  >>> a_rot=tf.image.rot90(a, k=3)
668  >>> print(a_rot[...,0].numpy())
669  [[3 1]
670   [4 2]]
671
672  Args:
673    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
674      of shape `[height, width, channels]`.
675    k: A scalar integer tensor. The number of times the image(s) are
676      rotated by 90 degrees.
677    name: A name for this operation (optional).
678
679  Returns:
680    A rotated tensor of the same type and shape as `image`.
681
682  Raises:
683    ValueError: if the shape of `image` not supported.
684  """
685  with ops.name_scope(name, 'rot90', [image, k]) as scope:
686    image = ops.convert_to_tensor(image, name='image')
687    image = _AssertAtLeast3DImage(image)
688    k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
689    k.get_shape().assert_has_rank(0)
690    k = math_ops.mod(k, 4)
691
692    shape = image.get_shape()
693    if shape.ndims is None:
694      rank = array_ops.rank(image)
695
696      def f_rank3():
697        return _rot90_3D(image, k, scope)
698
699      def f_rank4():
700        return _rot90_4D(image, k, scope)
701
702      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
703    elif shape.ndims == 3:
704      return _rot90_3D(image, k, scope)
705    elif shape.ndims == 4:
706      return _rot90_4D(image, k, scope)
707    else:
708      raise ValueError(
709          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
710
711
712def _rot90_3D(image, k, name_scope):
713  """Rotate image counter-clockwise by 90 degrees `k` times.
714
715  Args:
716    image: 3-D Tensor of shape `[height, width, channels]`.
717    k: A scalar integer. The number of times the image is rotated by 90 degrees.
718    name_scope: A valid TensorFlow name scope.
719
720  Returns:
721    A 3-D tensor of the same type and shape as `image`.
722
723  """
724
725  def _rot90():
726    return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2])
727
728  def _rot180():
729    return array_ops.reverse_v2(image, [0, 1])
730
731  def _rot270():
732    return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1])
733
734  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
735           (math_ops.equal(k, 3), _rot270)]
736
737  result = control_flow_ops.case(
738      cases, default=lambda: image, exclusive=True, name=name_scope)
739  result.set_shape([None, None, image.get_shape()[2]])
740  return result
741
742
743def _rot90_4D(images, k, name_scope):
744  """Rotate batch of images counter-clockwise by 90 degrees `k` times.
745
746  Args:
747    images: 4-D Tensor of shape `[height, width, channels]`.
748    k: A scalar integer. The number of times the images are rotated by 90
749      degrees.
750    name_scope: A valid TensorFlow name scope.
751
752  Returns:
753    A 4-D `Tensor` of the same type and shape as `images`.
754  """
755
756  def _rot90():
757    return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3])
758
759  def _rot180():
760    return array_ops.reverse_v2(images, [1, 2])
761
762  def _rot270():
763    return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])
764
765  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
766           (math_ops.equal(k, 3), _rot270)]
767
768  result = control_flow_ops.case(
769      cases, default=lambda: images, exclusive=True, name=name_scope)
770  shape = result.get_shape()
771  result.set_shape([shape[0], None, None, shape[3]])
772  return result
773
774
775@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image'])
776@dispatch.add_dispatch_support
777def transpose(image, name=None):
778  """Transpose image(s) by swapping the height and width dimension.
779
780  Usage Example:
781
782  >>> x = [[[1.0, 2.0, 3.0],
783  ...       [4.0, 5.0, 6.0]],
784  ...     [[7.0, 8.0, 9.0],
785  ...       [10.0, 11.0, 12.0]]]
786  >>> tf.image.transpose(x)
787  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
788  array([[[ 1.,  2.,  3.],
789          [ 7.,  8.,  9.]],
790         [[ 4.,  5.,  6.],
791          [10., 11., 12.]]], dtype=float32)>
792
793  Args:
794    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
795      of shape `[height, width, channels]`.
796    name: A name for this operation (optional).
797
798  Returns:
799    If `image` was 4-D, a 4-D float Tensor of shape
800   `[batch, width, height, channels]`
801    If `image` was 3-D, a 3-D float Tensor of shape
802   `[width, height, channels]`
803
804  Raises:
805    ValueError: if the shape of `image` not supported.
806
807  Usage Example:
808
809  >>> image = [[[1, 2], [3, 4]],
810  ...         [[5, 6], [7, 8]],
811  ...         [[9, 10], [11, 12]]]
812  >>> image = tf.constant(image)
813  >>> tf.image.transpose(image)
814  <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=
815  array([[[ 1,  2],
816         [ 5,  6],
817         [ 9, 10]],
818        [[ 3,  4],
819         [ 7,  8],
820         [11, 12]]], dtype=int32)>
821  """
822  with ops.name_scope(name, 'transpose', [image]):
823    image = ops.convert_to_tensor(image, name='image')
824    image = _AssertAtLeast3DImage(image)
825    shape = image.get_shape()
826    if shape.ndims is None:
827      rank = array_ops.rank(image)
828
829      def f_rank3():
830        return array_ops.transpose(image, [1, 0, 2], name=name)
831
832      def f_rank4():
833        return array_ops.transpose(image, [0, 2, 1, 3], name=name)
834
835      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
836    elif shape.ndims == 3:
837      return array_ops.transpose(image, [1, 0, 2], name=name)
838    elif shape.ndims == 4:
839      return array_ops.transpose(image, [0, 2, 1, 3], name=name)
840    else:
841      raise ValueError(
842          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
843
844
845@tf_export('image.central_crop')
846@dispatch.add_dispatch_support
847def central_crop(image, central_fraction):
848  """Crop the central region of the image(s).
849
850  Remove the outer parts of an image but retain the central region of the image
851  along each dimension. If we specify `central_fraction = 0.5`, this function
852  returns the region marked with "X" in the below diagram. The larger the value
853  of `central_fraction`, the larger the dimension of the region to be cropped
854  and retained.
855
856       --------
857      |        |
858      |  XXXX  |
859      |  XXXX  |
860      |        |   where "X" is the central 50% of the image.
861       --------
862
863  This function works on either a single image (`image` is a 3-D Tensor), or a
864  batch of images (`image` is a 4-D Tensor).
865
866  Usage Example:
867
868  >>> x = [[[1.0, 2.0, 3.0],
869  ...       [4.0, 5.0, 6.0],
870  ...       [7.0, 8.0, 9.0],
871  ...       [10.0, 11.0, 12.0]],
872  ...     [[13.0, 14.0, 15.0],
873  ...       [16.0, 17.0, 18.0],
874  ...       [19.0, 20.0, 21.0],
875  ...       [22.0, 23.0, 24.0]],
876  ...     [[25.0, 26.0, 27.0],
877  ...       [28.0, 29.0, 30.0],
878  ...       [31.0, 32.0, 33.0],
879  ...       [34.0, 35.0, 36.0]],
880  ...     [[37.0, 38.0, 39.0],
881  ...       [40.0, 41.0, 42.0],
882  ...       [43.0, 44.0, 45.0],
883  ...       [46.0, 47.0, 48.0]]]
884  >>> tf.image.central_crop(x, 0.5)
885  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
886  array([[[16., 17., 18.],
887          [19., 20., 21.]],
888         [[28., 29., 30.],
889          [31., 32., 33.]]], dtype=float32)>
890
891  Args:
892    image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D
893      Tensor of shape [batch_size, height, width, depth].
894    central_fraction: float (0, 1], fraction of size to crop
895
896  Raises:
897    ValueError: if central_crop_fraction is not within (0, 1].
898
899  Returns:
900    3-D / 4-D float Tensor, as per the input.
901  """
902  with ops.name_scope(None, 'central_crop', [image]):
903    image = ops.convert_to_tensor(image, name='image')
904    central_fraction_static = tensor_util.constant_value(central_fraction)
905    if central_fraction_static is not None:
906      if central_fraction_static <= 0.0 or central_fraction_static > 1.0:
907        raise ValueError('central_fraction must be within (0, 1]')
908      if central_fraction_static == 1.0:
909        return image
910    else:
911      assert_ops = _assert(
912          math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0),
913          ValueError, 'central_fraction must be within (0, 1]')
914      image = control_flow_ops.with_dependencies(assert_ops, image)
915
916    _AssertAtLeast3DImage(image)
917    rank = image.get_shape().ndims
918    if rank != 3 and rank != 4:
919      raise ValueError('`image` should either be a Tensor with rank = 3 or '
920                       'rank = 4. Had rank = {}.'.format(rank))
921
922    # Helper method to return the `idx`-th dimension of `tensor`, along with
923    # a boolean signifying if the dimension is dynamic.
924    def _get_dim(tensor, idx):
925      static_shape = tensor.get_shape().dims[idx].value
926      if static_shape is not None:
927        return static_shape, False
928      return array_ops.shape(tensor)[idx], True
929
930    # Get the height, width, depth (and batch size, if the image is a 4-D
931    # tensor).
932    if rank == 3:
933      img_h, dynamic_h = _get_dim(image, 0)
934      img_w, dynamic_w = _get_dim(image, 1)
935      img_d = image.get_shape()[2]
936    else:
937      img_bs = image.get_shape()[0]
938      img_h, dynamic_h = _get_dim(image, 1)
939      img_w, dynamic_w = _get_dim(image, 2)
940      img_d = image.get_shape()[3]
941
942    dynamic_h = dynamic_h or (central_fraction_static is None)
943    dynamic_w = dynamic_w or (central_fraction_static is None)
944
945    # Compute the bounding boxes for the crop. The type and value of the
946    # bounding boxes depend on the `image` tensor's rank and whether / not the
947    # dimensions are statically defined.
948    if dynamic_h:
949      img_hd = math_ops.cast(img_h, dtypes.float64)
950      bbox_h_start = math_ops.cast(
951          (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) /
952          2, dtypes.int32)
953    else:
954      img_hd = float(img_h)
955      bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2)
956
957    if dynamic_w:
958      img_wd = math_ops.cast(img_w, dtypes.float64)
959      bbox_w_start = math_ops.cast(
960          (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) /
961          2, dtypes.int32)
962    else:
963      img_wd = float(img_w)
964      bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2)
965
966    bbox_h_size = img_h - bbox_h_start * 2
967    bbox_w_size = img_w - bbox_w_start * 2
968
969    if rank == 3:
970      bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
971      bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
972    else:
973      bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
974      bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1])
975
976    image = array_ops.slice(image, bbox_begin, bbox_size)
977
978    # Reshape the `image` tensor to the desired size.
979    if rank == 3:
980      image.set_shape([
981          None if dynamic_h else bbox_h_size,
982          None if dynamic_w else bbox_w_size, img_d
983      ])
984    else:
985      image.set_shape([
986          img_bs, None if dynamic_h else bbox_h_size,
987          None if dynamic_w else bbox_w_size, img_d
988      ])
989    return image
990
991
992@tf_export('image.pad_to_bounding_box')
993@dispatch.add_dispatch_support
994def pad_to_bounding_box(image, offset_height, offset_width, target_height,
995                        target_width):
996  """Pad `image` with zeros to the specified `height` and `width`.
997
998  Adds `offset_height` rows of zeros on top, `offset_width` columns of
999  zeros on the left, and then pads the image on the bottom and right
1000  with zeros until it has dimensions `target_height`, `target_width`.
1001
1002  This op does nothing if `offset_*` is zero and the image already has size
1003  `target_height` by `target_width`.
1004
1005  Usage Example:
1006
1007  >>> x = [[[1., 2., 3.],
1008  ...       [4., 5., 6.]],
1009  ...       [[7., 8., 9.],
1010  ...       [10., 11., 12.]]]
1011  >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4)
1012  >>> padded_image
1013  <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy=
1014  array([[[ 0.,  0.,  0.],
1015  [ 0.,  0.,  0.],
1016  [ 0.,  0.,  0.],
1017  [ 0.,  0.,  0.]],
1018  [[ 0.,  0.,  0.],
1019  [ 1.,  2.,  3.],
1020  [ 4.,  5.,  6.],
1021  [ 0.,  0.,  0.]],
1022  [[ 0.,  0.,  0.],
1023  [ 7.,  8.,  9.],
1024  [10., 11., 12.],
1025  [ 0.,  0.,  0.]],
1026  [[ 0.,  0.,  0.],
1027  [ 0.,  0.,  0.],
1028  [ 0.,  0.,  0.],
1029  [ 0.,  0.,  0.]]], dtype=float32)>
1030
1031  Args:
1032    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1033      of shape `[height, width, channels]`.
1034    offset_height: Number of rows of zeros to add on top.
1035    offset_width: Number of columns of zeros to add on the left.
1036    target_height: Height of output image.
1037    target_width: Width of output image.
1038
1039  Returns:
1040    If `image` was 4-D, a 4-D float Tensor of shape
1041    `[batch, target_height, target_width, channels]`
1042    If `image` was 3-D, a 3-D float Tensor of shape
1043    `[target_height, target_width, channels]`
1044
1045  Raises:
1046    ValueError: If the shape of `image` is incompatible with the `offset_*` or
1047      `target_*` arguments, or either `offset_height` or `offset_width` is
1048      negative.
1049  """
1050  return pad_to_bounding_box_internal(
1051      image,
1052      offset_height,
1053      offset_width,
1054      target_height,
1055      target_width,
1056      check_dims=True)
1057
1058
1059# TODO(b/190099338) Remove this internal method and remap call sites to call
1060# image_ops.pad_to_bounding_box when asserts are no longer serialized. See also
1061# b/204377079#comment6 for more context.
1062def pad_to_bounding_box_internal(image, offset_height, offset_width,
1063                                 target_height, target_width, check_dims):
1064  """Pad `image` with zeros to the specified `height` and `width`.
1065
1066  Adds `offset_height` rows of zeros on top, `offset_width` columns of
1067  zeros on the left, and then pads the image on the bottom and right
1068  with zeros until it has dimensions `target_height`, `target_width`.
1069
1070  This op does nothing if `offset_*` is zero and the image already has size
1071  `target_height` by `target_width`.
1072
1073  Args:
1074    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1075      of shape `[height, width, channels]`.
1076    offset_height: Number of rows of zeros to add on top.
1077    offset_width: Number of columns of zeros to add on the left.
1078    target_height: Height of output image.
1079    target_width: Width of output image.
1080    check_dims: If True, assert that dimensions are non-negative and in range.
1081      In multi-GPU distributed settings, assertions can cause program slowdown.
1082      Setting this parameter to `False` avoids this, resulting in faster speed
1083      in some situations, with the tradeoff being that some error checking is
1084      not happening.
1085
1086  Returns:
1087    If `image` was 4-D, a 4-D float Tensor of shape
1088    `[batch, target_height, target_width, channels]`
1089    If `image` was 3-D, a 3-D float Tensor of shape
1090    `[target_height, target_width, channels]`
1091
1092  Raises:
1093    ValueError: If the shape of `image` is incompatible with the `offset_*` or
1094      `target_*` arguments, or either `offset_height` or `offset_width` is
1095      negative. Not raised if `check_dims` is `False`.
1096  """
1097  with ops.name_scope(None, 'pad_to_bounding_box', [image]):
1098    image = ops.convert_to_tensor(image, name='image')
1099
1100    is_batch = True
1101    image_shape = image.get_shape()
1102    if image_shape.ndims == 3:
1103      is_batch = False
1104      image = array_ops.expand_dims(image, 0)
1105    elif image_shape.ndims is None:
1106      is_batch = False
1107      image = array_ops.expand_dims(image, 0)
1108      image.set_shape([None] * 4)
1109    elif image_shape.ndims != 4:
1110      raise ValueError(
1111          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1112          image_shape)
1113
1114    batch, height, width, depth = _ImageDimensions(image, rank=4)
1115
1116    after_padding_width = target_width - offset_width - width
1117
1118    after_padding_height = target_height - offset_height - height
1119
1120    if check_dims:
1121      assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1122      assert_ops += _assert(offset_height >= 0, ValueError,
1123                            'offset_height must be >= 0')
1124      assert_ops += _assert(offset_width >= 0, ValueError,
1125                            'offset_width must be >= 0')
1126      assert_ops += _assert(after_padding_width >= 0, ValueError,
1127                            'width must be <= target - offset')
1128      assert_ops += _assert(after_padding_height >= 0, ValueError,
1129                            'height must be <= target - offset')
1130      image = control_flow_ops.with_dependencies(assert_ops, image)
1131
1132    # Do not pad on the depth dimensions.
1133    paddings = array_ops.reshape(
1134        array_ops.stack([
1135            0, 0, offset_height, after_padding_height, offset_width,
1136            after_padding_width, 0, 0
1137        ]), [4, 2])
1138    padded = array_ops.pad(image, paddings)
1139
1140    padded_shape = [
1141        None if _is_tensor(i) else i
1142        for i in [batch, target_height, target_width, depth]
1143    ]
1144    padded.set_shape(padded_shape)
1145
1146    if not is_batch:
1147      padded = array_ops.squeeze(padded, axis=[0])
1148
1149    return padded
1150
1151
1152@tf_export('image.crop_to_bounding_box')
1153@dispatch.add_dispatch_support
1154def crop_to_bounding_box(image, offset_height, offset_width, target_height,
1155                         target_width):
1156  """Crops an `image` to a specified bounding box.
1157
1158  This op cuts a rectangular bounding box out of `image`. The top-left corner
1159  of the bounding box is at `offset_height, offset_width` in `image`, and the
1160  lower-right corner is at
1161  `offset_height + target_height, offset_width + target_width`.
1162
1163  Example Usage:
1164
1165  >>> image = tf.constant(np.arange(1, 28, dtype=np.float32), shape=[3, 3, 3])
1166  >>> image[:,:,0] # print the first channel of the 3-D tensor
1167  <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
1168  array([[ 1.,  4.,  7.],
1169         [10., 13., 16.],
1170         [19., 22., 25.]], dtype=float32)>
1171  >>> cropped_image = tf.image.crop_to_bounding_box(image, 0, 0, 2, 2)
1172  >>> cropped_image[:,:,0] # print the first channel of the cropped 3-D tensor
1173  <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
1174  array([[ 1.,  4.],
1175         [10., 13.]], dtype=float32)>
1176
1177  Args:
1178    image: 4-D `Tensor` of shape `[batch, height, width, channels]` or 3-D
1179      `Tensor` of shape `[height, width, channels]`.
1180    offset_height: Vertical coordinate of the top-left corner of the bounding
1181      box in `image`.
1182    offset_width: Horizontal coordinate of the top-left corner of the bounding
1183      box in `image`.
1184    target_height: Height of the bounding box.
1185    target_width: Width of the bounding box.
1186
1187  Returns:
1188    If `image` was 4-D, a 4-D `Tensor` of shape
1189    `[batch, target_height, target_width, channels]`.
1190    If `image` was 3-D, a 3-D `Tensor` of shape
1191    `[target_height, target_width, channels]`.
1192    It has the same dtype with `image`.
1193
1194  Raises:
1195    ValueError: `image` is not a 3-D or 4-D `Tensor`.
1196    ValueError: `offset_width < 0` or `offset_height < 0`.
1197    ValueError: `target_width <= 0` or `target_height <= 0`.
1198    ValueError: `width < offset_width + target_width` or
1199      `height < offset_height + target_height`.
1200  """
1201  with ops.name_scope(None, 'crop_to_bounding_box', [image]):
1202    image = ops.convert_to_tensor(image, name='image')
1203
1204    is_batch = True
1205    image_shape = image.get_shape()
1206    if image_shape.ndims == 3:
1207      is_batch = False
1208      image = array_ops.expand_dims(image, 0)
1209    elif image_shape.ndims is None:
1210      is_batch = False
1211      image = array_ops.expand_dims(image, 0)
1212      image.set_shape([None] * 4)
1213    elif image_shape.ndims != 4:
1214      raise ValueError(
1215          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1216          image_shape)
1217
1218    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1219
1220    batch, height, width, depth = _ImageDimensions(image, rank=4)
1221
1222    assert_ops += _assert(offset_width >= 0, ValueError,
1223                          'offset_width must be >= 0.')
1224    assert_ops += _assert(offset_height >= 0, ValueError,
1225                          'offset_height must be >= 0.')
1226    assert_ops += _assert(target_width > 0, ValueError,
1227                          'target_width must be > 0.')
1228    assert_ops += _assert(target_height > 0, ValueError,
1229                          'target_height must be > 0.')
1230    assert_ops += _assert(width >= (target_width + offset_width), ValueError,
1231                          'width must be >= target + offset.')
1232    assert_ops += _assert(height >= (target_height + offset_height), ValueError,
1233                          'height must be >= target + offset.')
1234    image = control_flow_ops.with_dependencies(assert_ops, image)
1235
1236    cropped = array_ops.slice(
1237        image, array_ops.stack([0, offset_height, offset_width, 0]),
1238        array_ops.stack([array_ops.shape(image)[0], target_height, target_width,
1239                         array_ops.shape(image)[3]]))
1240
1241    cropped_shape = [
1242        None if _is_tensor(i) else i
1243        for i in [batch, target_height, target_width, depth]
1244    ]
1245    cropped.set_shape(cropped_shape)
1246
1247    if not is_batch:
1248      cropped = array_ops.squeeze(cropped, axis=[0])
1249
1250    return cropped
1251
1252
1253@tf_export(
1254    'image.resize_with_crop_or_pad',
1255    v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad'])
1256@dispatch.add_dispatch_support
1257def resize_image_with_crop_or_pad(image, target_height, target_width):
1258  """Crops and/or pads an image to a target width and height.
1259
1260  Resizes an image to a target width and height by either centrally
1261  cropping the image or padding it evenly with zeros.
1262
1263  If `width` or `height` is greater than the specified `target_width` or
1264  `target_height` respectively, this op centrally crops along that dimension.
1265
1266  For example:
1267
1268  >>> image = np.arange(75).reshape(5, 5, 3)  # create 3-D image input
1269  >>> image[:,:,0]  # print first channel just for demo purposes
1270  array([[ 0,  3,  6,  9, 12],
1271         [15, 18, 21, 24, 27],
1272         [30, 33, 36, 39, 42],
1273         [45, 48, 51, 54, 57],
1274         [60, 63, 66, 69, 72]])
1275  >>> image = tf.image.resize_with_crop_or_pad(image, 3, 3)  # crop
1276  >>> # print first channel for demo purposes; centrally cropped output
1277  >>> image[:,:,0]
1278  <tf.Tensor: shape=(3, 3), dtype=int64, numpy=
1279  array([[18, 21, 24],
1280         [33, 36, 39],
1281         [48, 51, 54]])>
1282
1283  If `width` or `height` is smaller than the specified `target_width` or
1284  `target_height` respectively, this op centrally pads with 0 along that
1285  dimension.
1286
1287  For example:
1288
1289  >>> image = np.arange(1, 28).reshape(3, 3, 3)  # create 3-D image input
1290  >>> image[:,:,0]  # print first channel just for demo purposes
1291  array([[ 1,  4,  7],
1292         [10, 13, 16],
1293         [19, 22, 25]])
1294  >>> image = tf.image.resize_with_crop_or_pad(image, 5, 5)  # pad
1295  >>> # print first channel for demo purposes; we should see 0 paddings
1296  >>> image[:,:,0]
1297  <tf.Tensor: shape=(5, 5), dtype=int64, numpy=
1298  array([[ 0,  0,  0,  0,  0],
1299         [ 0,  1,  4,  7,  0],
1300         [ 0, 10, 13, 16,  0],
1301         [ 0, 19, 22, 25,  0],
1302         [ 0,  0,  0,  0,  0]])>
1303
1304  Args:
1305    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1306      of shape `[height, width, channels]`.
1307    target_height: Target height.
1308    target_width: Target width.
1309
1310  Raises:
1311    ValueError: if `target_height` or `target_width` are zero or negative.
1312
1313  Returns:
1314    Cropped and/or padded image.
1315    If `images` was 4-D, a 4-D float Tensor of shape
1316    `[batch, new_height, new_width, channels]`.
1317    If `images` was 3-D, a 3-D float Tensor of shape
1318    `[new_height, new_width, channels]`.
1319  """
1320  with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):
1321    image = ops.convert_to_tensor(image, name='image')
1322    image_shape = image.get_shape()
1323    is_batch = True
1324    if image_shape.ndims == 3:
1325      is_batch = False
1326      image = array_ops.expand_dims(image, 0)
1327    elif image_shape.ndims is None:
1328      is_batch = False
1329      image = array_ops.expand_dims(image, 0)
1330      image.set_shape([None] * 4)
1331    elif image_shape.ndims != 4:
1332      raise ValueError(
1333          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1334          image_shape)
1335
1336    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1337    assert_ops += _assert(target_width > 0, ValueError,
1338                          'target_width must be > 0.')
1339    assert_ops += _assert(target_height > 0, ValueError,
1340                          'target_height must be > 0.')
1341
1342    image = control_flow_ops.with_dependencies(assert_ops, image)
1343    # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
1344    # Make sure our checks come first, so that error messages are clearer.
1345    if _is_tensor(target_height):
1346      target_height = control_flow_ops.with_dependencies(
1347          assert_ops, target_height)
1348    if _is_tensor(target_width):
1349      target_width = control_flow_ops.with_dependencies(assert_ops,
1350                                                        target_width)
1351
1352    def max_(x, y):
1353      if _is_tensor(x) or _is_tensor(y):
1354        return math_ops.maximum(x, y)
1355      else:
1356        return max(x, y)
1357
1358    def min_(x, y):
1359      if _is_tensor(x) or _is_tensor(y):
1360        return math_ops.minimum(x, y)
1361      else:
1362        return min(x, y)
1363
1364    def equal_(x, y):
1365      if _is_tensor(x) or _is_tensor(y):
1366        return math_ops.equal(x, y)
1367      else:
1368        return x == y
1369
1370    _, height, width, _ = _ImageDimensions(image, rank=4)
1371    width_diff = target_width - width
1372    offset_crop_width = max_(-width_diff // 2, 0)
1373    offset_pad_width = max_(width_diff // 2, 0)
1374
1375    height_diff = target_height - height
1376    offset_crop_height = max_(-height_diff // 2, 0)
1377    offset_pad_height = max_(height_diff // 2, 0)
1378
1379    # Maybe crop if needed.
1380    cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
1381                                   min_(target_height, height),
1382                                   min_(target_width, width))
1383
1384    # Maybe pad if needed.
1385    resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
1386                                  target_height, target_width)
1387
1388    # In theory all the checks below are redundant.
1389    if resized.get_shape().ndims is None:
1390      raise ValueError('resized contains no shape.')
1391
1392    _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
1393
1394    assert_ops = []
1395    assert_ops += _assert(
1396        equal_(resized_height, target_height), ValueError,
1397        'resized height is not correct.')
1398    assert_ops += _assert(
1399        equal_(resized_width, target_width), ValueError,
1400        'resized width is not correct.')
1401
1402    resized = control_flow_ops.with_dependencies(assert_ops, resized)
1403
1404    if not is_batch:
1405      resized = array_ops.squeeze(resized, axis=[0])
1406
1407    return resized
1408
1409
1410@tf_export(v1=['image.ResizeMethod'])
1411class ResizeMethodV1:
1412  """See `v1.image.resize` for details."""
1413  BILINEAR = 0
1414  NEAREST_NEIGHBOR = 1
1415  BICUBIC = 2
1416  AREA = 3
1417
1418
1419@tf_export('image.ResizeMethod', v1=[])
1420class ResizeMethod:
1421  """See `tf.image.resize` for details."""
1422  BILINEAR = 'bilinear'
1423  NEAREST_NEIGHBOR = 'nearest'
1424  BICUBIC = 'bicubic'
1425  AREA = 'area'
1426  LANCZOS3 = 'lanczos3'
1427  LANCZOS5 = 'lanczos5'
1428  GAUSSIAN = 'gaussian'
1429  MITCHELLCUBIC = 'mitchellcubic'
1430
1431
1432def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
1433                          skip_resize_if_same):
1434  """Core functionality for v1 and v2 resize functions."""
1435  with ops.name_scope(name, 'resize', [images, size]):
1436    images = ops.convert_to_tensor(images, name='images')
1437    if images.get_shape().ndims is None:
1438      raise ValueError('\'images\' contains no shape.')
1439    # TODO(shlens): Migrate this functionality to the underlying Op's.
1440    is_batch = True
1441    if images.get_shape().ndims == 3:
1442      is_batch = False
1443      images = array_ops.expand_dims(images, 0)
1444    elif images.get_shape().ndims != 4:
1445      raise ValueError('\'images\' must have either 3 or 4 dimensions.')
1446
1447    _, height, width, _ = images.get_shape().as_list()
1448
1449    try:
1450      size = ops.convert_to_tensor(size, dtypes.int32, name='size')
1451    except (TypeError, ValueError):
1452      raise ValueError('\'size\' must be a 1-D int32 Tensor')
1453    if not size.get_shape().is_compatible_with([2]):
1454      raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
1455                       'new_height, new_width')
1456
1457    if preserve_aspect_ratio:
1458      # Get the current shapes of the image, even if dynamic.
1459      _, current_height, current_width, _ = _ImageDimensions(images, rank=4)
1460
1461      # do the computation to find the right scale and height/width.
1462      scale_factor_height = (
1463          math_ops.cast(size[0], dtypes.float32) /
1464          math_ops.cast(current_height, dtypes.float32))
1465      scale_factor_width = (
1466          math_ops.cast(size[1], dtypes.float32) /
1467          math_ops.cast(current_width, dtypes.float32))
1468      scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
1469      scaled_height_const = math_ops.cast(
1470          math_ops.round(scale_factor *
1471                         math_ops.cast(current_height, dtypes.float32)),
1472          dtypes.int32)
1473      scaled_width_const = math_ops.cast(
1474          math_ops.round(scale_factor *
1475                         math_ops.cast(current_width, dtypes.float32)),
1476          dtypes.int32)
1477
1478      # NOTE: Reset the size and other constants used later.
1479      size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
1480                                   dtypes.int32,
1481                                   name='size')
1482
1483    size_const_as_shape = tensor_util.constant_value_as_shape(size)
1484    new_height_const = tensor_shape.dimension_at_index(size_const_as_shape,
1485                                                       0).value
1486    new_width_const = tensor_shape.dimension_at_index(size_const_as_shape,
1487                                                      1).value
1488
1489    # If we can determine that the height and width will be unmodified by this
1490    # transformation, we avoid performing the resize.
1491    if skip_resize_if_same and all(
1492        x is not None
1493        for x in [new_width_const, width, new_height_const, height]) and (
1494            width == new_width_const and height == new_height_const):
1495      if not is_batch:
1496        images = array_ops.squeeze(images, axis=[0])
1497      return images
1498
1499    images = resizer_fn(images, size)
1500
1501    # NOTE(mrry): The shape functions for the resize ops cannot unpack
1502    # the packed values in `new_size`, so set the shape here.
1503    images.set_shape([None, new_height_const, new_width_const, None])
1504
1505    if not is_batch:
1506      images = array_ops.squeeze(images, axis=[0])
1507    return images
1508
1509
1510@tf_export(v1=['image.resize_images', 'image.resize'])
1511@dispatch.add_dispatch_support
1512def resize_images(images,
1513                  size,
1514                  method=ResizeMethodV1.BILINEAR,
1515                  align_corners=False,
1516                  preserve_aspect_ratio=False,
1517                  name=None):
1518  """Resize `images` to `size` using the specified `method`.
1519
1520  Resized images will be distorted if their original aspect ratio is not
1521  the same as `size`.  To avoid distortions see
1522  `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`.
1523
1524  The `method` can be one of:
1525
1526  *   <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.](
1527    https://en.wikipedia.org/wiki/Bilinear_interpolation)
1528  *   <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [
1529    Nearest neighbor interpolation.](
1530    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1531  *   <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.](
1532    https://en.wikipedia.org/wiki/Bicubic_interpolation)
1533  *   <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation.
1534
1535  The return value has the same type as `images` if `method` is
1536  `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type
1537  as `images` if the size of `images` can be statically determined to be the
1538  same as `size`, because `images` is returned in this case. Otherwise, the
1539  return value has type `float32`.
1540
1541  Args:
1542    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1543      of shape `[height, width, channels]`.
1544    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1545      size for the images.
1546    method: ResizeMethod.  Defaults to `tf.image.ResizeMethod.BILINEAR`.
1547    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1548      input and output tensors are aligned, preserving the values at the corner
1549      pixels. Defaults to `False`.
1550    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1551      then `images` will be resized to a size that fits in `size` while
1552      preserving the aspect ratio of the original image. Scales up the image if
1553      `size` is bigger than the current size of the `image`. Defaults to False.
1554    name: A name for this operation (optional).
1555
1556  Raises:
1557    ValueError: if the shape of `images` is incompatible with the
1558      shape arguments to this function
1559    ValueError: if `size` has invalid shape or type.
1560    ValueError: if an unsupported resize method is specified.
1561
1562  Returns:
1563    If `images` was 4-D, a 4-D float Tensor of shape
1564    `[batch, new_height, new_width, channels]`.
1565    If `images` was 3-D, a 3-D float Tensor of shape
1566    `[new_height, new_width, channels]`.
1567  """
1568
1569  def resize_fn(images_t, new_size):
1570    """Legacy resize core function, passed to _resize_images_common."""
1571    if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR:
1572      return gen_image_ops.resize_bilinear(
1573          images_t, new_size, align_corners=align_corners)
1574    elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or
1575          method == ResizeMethod.NEAREST_NEIGHBOR):
1576      return gen_image_ops.resize_nearest_neighbor(
1577          images_t, new_size, align_corners=align_corners)
1578    elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC:
1579      return gen_image_ops.resize_bicubic(
1580          images_t, new_size, align_corners=align_corners)
1581    elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA:
1582      return gen_image_ops.resize_area(
1583          images_t, new_size, align_corners=align_corners)
1584    else:
1585      raise ValueError('Resize method is not implemented: {}'.format(method))
1586
1587  return _resize_images_common(
1588      images,
1589      resize_fn,
1590      size,
1591      preserve_aspect_ratio=preserve_aspect_ratio,
1592      name=name,
1593      skip_resize_if_same=True)
1594
1595
1596@tf_export('image.resize', v1=[])
1597@dispatch.add_dispatch_support
1598def resize_images_v2(images,
1599                     size,
1600                     method=ResizeMethod.BILINEAR,
1601                     preserve_aspect_ratio=False,
1602                     antialias=False,
1603                     name=None):
1604  """Resize `images` to `size` using the specified `method`.
1605
1606  Resized images will be distorted if their original aspect ratio is not
1607  the same as `size`.  To avoid distortions see
1608  `tf.image.resize_with_pad`.
1609
1610  >>> image = tf.constant([
1611  ...  [1,0,0,0,0],
1612  ...  [0,1,0,0,0],
1613  ...  [0,0,1,0,0],
1614  ...  [0,0,0,1,0],
1615  ...  [0,0,0,0,1],
1616  ... ])
1617  >>> # Add "batch" and "channels" dimensions
1618  >>> image = image[tf.newaxis, ..., tf.newaxis]
1619  >>> image.shape.as_list()  # [batch, height, width, channels]
1620  [1, 5, 5, 1]
1621  >>> tf.image.resize(image, [3,5])[0,...,0].numpy()
1622  array([[0.6666667, 0.3333333, 0.       , 0.       , 0.       ],
1623         [0.       , 0.       , 1.       , 0.       , 0.       ],
1624         [0.       , 0.       , 0.       , 0.3333335, 0.6666665]],
1625        dtype=float32)
1626
1627  It works equally well with a single image instead of a batch of images:
1628
1629  >>> tf.image.resize(image[0], [3,5]).shape.as_list()
1630  [3, 5, 1]
1631
1632  When `antialias` is true, the sampling filter will anti-alias the input image
1633  as well as interpolate.  When downsampling an image with [anti-aliasing](
1634  https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter
1635  kernel is scaled in order to properly anti-alias the input image signal.
1636  `antialias` has no effect when upsampling an image:
1637
1638  >>> a = tf.image.resize(image, [5,10])
1639  >>> b = tf.image.resize(image, [5,10], antialias=True)
1640  >>> tf.reduce_max(abs(a - b)).numpy()
1641  0.0
1642
1643  The `method` argument expects an item from the `image.ResizeMethod` enum, or
1644  the string equivalent. The options are:
1645
1646  *   <b>`bilinear`</b>: [Bilinear interpolation.](
1647    https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is
1648    true, becomes a hat/tent filter function with radius 1 when downsampling.
1649  *   <b>`lanczos3`</b>:  [Lanczos kernel](
1650    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3.
1651    High-quality practical filter but may have some ringing, especially on
1652    synthetic images.
1653  *   <b>`lanczos5`</b>: [Lanczos kernel] (
1654    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5.
1655    Very-high-quality filter but may have stronger ringing.
1656  *   <b>`bicubic`</b>: [Cubic interpolant](
1657    https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to
1658    Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel,
1659    particularly when upsampling.
1660  *   <b>`gaussian`</b>: [Gaussian kernel](
1661    https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3,
1662    sigma = 1.5 / 3.0.
1663  *   <b>`nearest`</b>: [Nearest neighbor interpolation.](
1664    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1665    `antialias` has no effect when used with nearest neighbor interpolation.
1666  *   <b>`area`</b>: Anti-aliased resampling with area interpolation.
1667    `antialias` has no effect when used with area interpolation; it
1668    always anti-aliases.
1669  *   <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter.
1670    For synthetic images (especially those lacking proper prefiltering), less
1671    ringing than Keys cubic kernel but less sharp.
1672
1673  Note: Near image edges the filtering kernel may be partially outside the
1674  image boundaries. For these pixels, only input pixels inside the image will be
1675  included in the filter sum, and the output value will be appropriately
1676  normalized.
1677
1678  The return value has type `float32`, unless the `method` is
1679  `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype
1680  of `images`:
1681
1682  >>> nn = tf.image.resize(image, [5,7], method='nearest')
1683  >>> nn[0,...,0].numpy()
1684  array([[1, 0, 0, 0, 0, 0, 0],
1685         [0, 1, 1, 0, 0, 0, 0],
1686         [0, 0, 0, 1, 0, 0, 0],
1687         [0, 0, 0, 0, 1, 1, 0],
1688         [0, 0, 0, 0, 0, 0, 1]], dtype=int32)
1689
1690  With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size`
1691  is the maximum for each dimension:
1692
1693  >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True)
1694  >>> max_10_20.shape.as_list()
1695  [1, 10, 10, 1]
1696
1697  Args:
1698    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1699      of shape `[height, width, channels]`.
1700    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1701      size for the images.
1702    method: An `image.ResizeMethod`, or string equivalent.  Defaults to
1703      `bilinear`.
1704    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1705      then `images` will be resized to a size that fits in `size` while
1706      preserving the aspect ratio of the original image. Scales up the image if
1707      `size` is bigger than the current size of the `image`. Defaults to False.
1708    antialias: Whether to use an anti-aliasing filter when downsampling an
1709      image.
1710    name: A name for this operation (optional).
1711
1712  Raises:
1713    ValueError: if the shape of `images` is incompatible with the
1714      shape arguments to this function
1715    ValueError: if `size` has an invalid shape or type.
1716    ValueError: if an unsupported resize method is specified.
1717
1718  Returns:
1719    If `images` was 4-D, a 4-D float Tensor of shape
1720    `[batch, new_height, new_width, channels]`.
1721    If `images` was 3-D, a 3-D float Tensor of shape
1722    `[new_height, new_width, channels]`.
1723  """
1724
1725  def resize_fn(images_t, new_size):
1726    """Resize core function, passed to _resize_images_common."""
1727    scale_and_translate_methods = [
1728        ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN,
1729        ResizeMethod.MITCHELLCUBIC
1730    ]
1731
1732    def resize_with_scale_and_translate(method):
1733      scale = (
1734          math_ops.cast(new_size, dtype=dtypes.float32) /
1735          math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32))
1736      return gen_image_ops.scale_and_translate(
1737          images_t,
1738          new_size,
1739          scale,
1740          array_ops.zeros([2]),
1741          kernel_type=method,
1742          antialias=antialias)
1743
1744    if method == ResizeMethod.BILINEAR:
1745      if antialias:
1746        return resize_with_scale_and_translate('triangle')
1747      else:
1748        return gen_image_ops.resize_bilinear(
1749            images_t, new_size, half_pixel_centers=True)
1750    elif method == ResizeMethod.NEAREST_NEIGHBOR:
1751      return gen_image_ops.resize_nearest_neighbor(
1752          images_t, new_size, half_pixel_centers=True)
1753    elif method == ResizeMethod.BICUBIC:
1754      if antialias:
1755        return resize_with_scale_and_translate('keyscubic')
1756      else:
1757        return gen_image_ops.resize_bicubic(
1758            images_t, new_size, half_pixel_centers=True)
1759    elif method == ResizeMethod.AREA:
1760      return gen_image_ops.resize_area(images_t, new_size)
1761    elif method in scale_and_translate_methods:
1762      return resize_with_scale_and_translate(method)
1763    else:
1764      raise ValueError('Resize method is not implemented: {}'.format(method))
1765
1766  return _resize_images_common(
1767      images,
1768      resize_fn,
1769      size,
1770      preserve_aspect_ratio=preserve_aspect_ratio,
1771      name=name,
1772      skip_resize_if_same=False)
1773
1774
1775def _resize_image_with_pad_common(image, target_height, target_width,
1776                                  resize_fn):
1777  """Core functionality for v1 and v2 resize_image_with_pad functions."""
1778  with ops.name_scope(None, 'resize_image_with_pad', [image]):
1779    image = ops.convert_to_tensor(image, name='image')
1780    image_shape = image.get_shape()
1781    is_batch = True
1782    if image_shape.ndims == 3:
1783      is_batch = False
1784      image = array_ops.expand_dims(image, 0)
1785    elif image_shape.ndims is None:
1786      is_batch = False
1787      image = array_ops.expand_dims(image, 0)
1788      image.set_shape([None] * 4)
1789    elif image_shape.ndims != 4:
1790      raise ValueError(
1791          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1792          image_shape)
1793
1794    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1795    assert_ops += _assert(target_width > 0, ValueError,
1796                          'target_width must be > 0.')
1797    assert_ops += _assert(target_height > 0, ValueError,
1798                          'target_height must be > 0.')
1799
1800    image = control_flow_ops.with_dependencies(assert_ops, image)
1801
1802    def max_(x, y):
1803      if _is_tensor(x) or _is_tensor(y):
1804        return math_ops.maximum(x, y)
1805      else:
1806        return max(x, y)
1807
1808    _, height, width, _ = _ImageDimensions(image, rank=4)
1809
1810    # convert values to float, to ease divisions
1811    f_height = math_ops.cast(height, dtype=dtypes.float32)
1812    f_width = math_ops.cast(width, dtype=dtypes.float32)
1813    f_target_height = math_ops.cast(target_height, dtype=dtypes.float32)
1814    f_target_width = math_ops.cast(target_width, dtype=dtypes.float32)
1815
1816    # Find the ratio by which the image must be adjusted
1817    # to fit within the target
1818    ratio = max_(f_width / f_target_width, f_height / f_target_height)
1819    resized_height_float = f_height / ratio
1820    resized_width_float = f_width / ratio
1821    resized_height = math_ops.cast(
1822        math_ops.floor(resized_height_float), dtype=dtypes.int32)
1823    resized_width = math_ops.cast(
1824        math_ops.floor(resized_width_float), dtype=dtypes.int32)
1825
1826    padding_height = (f_target_height - resized_height_float) / 2
1827    padding_width = (f_target_width - resized_width_float) / 2
1828    f_padding_height = math_ops.floor(padding_height)
1829    f_padding_width = math_ops.floor(padding_width)
1830    p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32))
1831    p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32))
1832
1833    # Resize first, then pad to meet requested dimensions
1834    resized = resize_fn(image, [resized_height, resized_width])
1835
1836    padded = pad_to_bounding_box(resized, p_height, p_width, target_height,
1837                                 target_width)
1838
1839    if padded.get_shape().ndims is None:
1840      raise ValueError('padded contains no shape.')
1841
1842    _ImageDimensions(padded, rank=4)
1843
1844    if not is_batch:
1845      padded = array_ops.squeeze(padded, axis=[0])
1846
1847    return padded
1848
1849
1850@tf_export(v1=['image.resize_image_with_pad'])
1851@dispatch.add_dispatch_support
1852def resize_image_with_pad_v1(image,
1853                             target_height,
1854                             target_width,
1855                             method=ResizeMethodV1.BILINEAR,
1856                             align_corners=False):
1857  """Resizes and pads an image to a target width and height.
1858
1859  Resizes an image to a target width and height by keeping
1860  the aspect ratio the same without distortion. If the target
1861  dimensions don't match the image dimensions, the image
1862  is resized and then padded with zeroes to match requested
1863  dimensions.
1864
1865  Args:
1866    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1867      of shape `[height, width, channels]`.
1868    target_height: Target height.
1869    target_width: Target width.
1870    method: Method to use for resizing image. See `resize_images()`
1871    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1872      input and output tensors are aligned, preserving the values at the corner
1873      pixels. Defaults to `False`.
1874
1875  Raises:
1876    ValueError: if `target_height` or `target_width` are zero or negative.
1877
1878  Returns:
1879    Resized and padded image.
1880    If `images` was 4-D, a 4-D float Tensor of shape
1881    `[batch, new_height, new_width, channels]`.
1882    If `images` was 3-D, a 3-D float Tensor of shape
1883    `[new_height, new_width, channels]`.
1884  """
1885
1886  def _resize_fn(im, new_size):
1887    return resize_images(im, new_size, method, align_corners=align_corners)
1888
1889  return _resize_image_with_pad_common(image, target_height, target_width,
1890                                       _resize_fn)
1891
1892
1893@tf_export('image.resize_with_pad', v1=[])
1894@dispatch.add_dispatch_support
1895def resize_image_with_pad_v2(image,
1896                             target_height,
1897                             target_width,
1898                             method=ResizeMethod.BILINEAR,
1899                             antialias=False):
1900  """Resizes and pads an image to a target width and height.
1901
1902  Resizes an image to a target width and height by keeping
1903  the aspect ratio the same without distortion. If the target
1904  dimensions don't match the image dimensions, the image
1905  is resized and then padded with zeroes to match requested
1906  dimensions.
1907
1908  Args:
1909    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1910      of shape `[height, width, channels]`.
1911    target_height: Target height.
1912    target_width: Target width.
1913    method: Method to use for resizing image. See `image.resize()`
1914    antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'.
1915
1916  Raises:
1917    ValueError: if `target_height` or `target_width` are zero or negative.
1918
1919  Returns:
1920    Resized and padded image.
1921    If `images` was 4-D, a 4-D float Tensor of shape
1922    `[batch, new_height, new_width, channels]`.
1923    If `images` was 3-D, a 3-D float Tensor of shape
1924    `[new_height, new_width, channels]`.
1925  """
1926
1927  def _resize_fn(im, new_size):
1928    return resize_images_v2(im, new_size, method, antialias=antialias)
1929
1930  return _resize_image_with_pad_common(image, target_height, target_width,
1931                                       _resize_fn)
1932
1933
1934@tf_export('image.per_image_standardization')
1935@dispatch.add_dispatch_support
1936def per_image_standardization(image):
1937  """Linearly scales each image in `image` to have mean 0 and variance 1.
1938
1939  For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`,
1940  where
1941
1942  - `mean` is the average of all values in `x`
1943  - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to
1944    protect against division by 0 when handling uniform images
1945    - `N` is the number of elements in `x`
1946    - `stddev` is the standard deviation of all values in `x`
1947
1948  Example Usage:
1949
1950  >>> image = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3])
1951  >>> image # 3-D tensor
1952  <tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=
1953  array([[[ 1,  2,  3],
1954          [ 4,  5,  6]],
1955         [[ 7,  8,  9],
1956          [10, 11, 12]]], dtype=int32)>
1957  >>> new_image = tf.image.per_image_standardization(image)
1958  >>> new_image # 3-D tensor with mean ~= 0 and variance ~= 1
1959  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
1960  array([[[-1.593255  , -1.3035723 , -1.0138896 ],
1961          [-0.7242068 , -0.4345241 , -0.14484136]],
1962         [[ 0.14484136,  0.4345241 ,  0.7242068 ],
1963          [ 1.0138896 ,  1.3035723 ,  1.593255  ]]], dtype=float32)>
1964
1965  Args:
1966    image: An n-D `Tensor` with at least 3 dimensions, the last 3 of which are
1967      the dimensions of each image.
1968
1969  Returns:
1970    A `Tensor` with the same shape as `image` and its dtype is `float32`.
1971
1972  Raises:
1973    ValueError: The shape of `image` has fewer than 3 dimensions.
1974  """
1975  with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
1976    image = ops.convert_to_tensor(image, name='image')
1977    image = _AssertAtLeast3DImage(image)
1978
1979    image = math_ops.cast(image, dtype=dtypes.float32)
1980    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
1981    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
1982
1983    # Apply a minimum normalization that protects us against uniform images.
1984    stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True)
1985    min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
1986    adjusted_stddev = math_ops.maximum(stddev, min_stddev)
1987
1988    image -= image_mean
1989    image = math_ops.divide(image, adjusted_stddev, name=scope)
1990    return image
1991
1992
1993@tf_export('image.random_brightness')
1994@dispatch.register_unary_elementwise_api
1995@dispatch.add_dispatch_support
1996def random_brightness(image, max_delta, seed=None):
1997  """Adjust the brightness of images by a random factor.
1998
1999  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
2000  interval `[-max_delta, max_delta)`.
2001
2002  For producing deterministic results given a `seed` value, use
2003  `tf.image.stateless_random_brightness`. Unlike using the `seed` param
2004  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2005  same results given the same seed independent of how many times the function is
2006  called, and independent of global seed settings (e.g. tf.random.set_seed).
2007
2008  Args:
2009    image: An image or images to adjust.
2010    max_delta: float, must be non-negative.
2011    seed: A Python integer. Used to create a random seed. See
2012      `tf.compat.v1.set_random_seed` for behavior.
2013
2014  Usage Example:
2015
2016  >>> x = [[[1.0, 2.0, 3.0],
2017  ...       [4.0, 5.0, 6.0]],
2018  ...      [[7.0, 8.0, 9.0],
2019  ...       [10.0, 11.0, 12.0]]]
2020  >>> tf.image.random_brightness(x, 0.2)
2021  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2022
2023  Returns:
2024    The brightness-adjusted image(s).
2025
2026  Raises:
2027    ValueError: if `max_delta` is negative.
2028  """
2029  if max_delta < 0:
2030    raise ValueError('max_delta must be non-negative.')
2031
2032  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
2033  return adjust_brightness(image, delta)
2034
2035
2036@tf_export('image.stateless_random_brightness', v1=[])
2037@dispatch.register_unary_elementwise_api
2038@dispatch.add_dispatch_support
2039def stateless_random_brightness(image, max_delta, seed):
2040  """Adjust the brightness of images by a random factor deterministically.
2041
2042  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
2043  interval `[-max_delta, max_delta)`.
2044
2045  Guarantees the same results given the same `seed` independent of how many
2046  times the function is called, and independent of global seed settings (e.g.
2047  `tf.random.set_seed`).
2048
2049  Usage Example:
2050
2051  >>> x = [[[1.0, 2.0, 3.0],
2052  ...       [4.0, 5.0, 6.0]],
2053  ...      [[7.0, 8.0, 9.0],
2054  ...       [10.0, 11.0, 12.0]]]
2055  >>> seed = (1, 2)
2056  >>> tf.image.stateless_random_brightness(x, 0.2, seed)
2057  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2058  array([[[ 1.1376241,  2.1376243,  3.1376243],
2059          [ 4.1376243,  5.1376243,  6.1376243]],
2060         [[ 7.1376243,  8.137624 ,  9.137624 ],
2061          [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)>
2062
2063  Args:
2064    image: An image or images to adjust.
2065    max_delta: float, must be non-negative.
2066    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2067      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2068
2069  Returns:
2070    The brightness-adjusted image(s).
2071
2072  Raises:
2073    ValueError: if `max_delta` is negative.
2074  """
2075  if max_delta < 0:
2076    raise ValueError('max_delta must be non-negative.')
2077
2078  delta = stateless_random_ops.stateless_random_uniform(
2079      shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2080  return adjust_brightness(image, delta)
2081
2082
2083@tf_export('image.random_contrast')
2084@dispatch.add_dispatch_support
2085def random_contrast(image, lower, upper, seed=None):
2086  """Adjust the contrast of an image or images by a random factor.
2087
2088  Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly
2089  picked in the interval `[lower, upper)`.
2090
2091  For producing deterministic results given a `seed` value, use
2092  `tf.image.stateless_random_contrast`. Unlike using the `seed` param
2093  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2094  same results given the same seed independent of how many times the function is
2095  called, and independent of global seed settings (e.g. tf.random.set_seed).
2096
2097  Args:
2098    image: An image tensor with 3 or more dimensions.
2099    lower: float.  Lower bound for the random contrast factor.
2100    upper: float.  Upper bound for the random contrast factor.
2101    seed: A Python integer. Used to create a random seed. See
2102      `tf.compat.v1.set_random_seed` for behavior.
2103
2104  Usage Example:
2105
2106  >>> x = [[[1.0, 2.0, 3.0],
2107  ...       [4.0, 5.0, 6.0]],
2108  ...     [[7.0, 8.0, 9.0],
2109  ...       [10.0, 11.0, 12.0]]]
2110  >>> tf.image.random_contrast(x, 0.2, 0.5)
2111  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2112
2113  Returns:
2114    The contrast-adjusted image(s).
2115
2116  Raises:
2117    ValueError: if `upper <= lower` or if `lower < 0`.
2118  """
2119  if upper <= lower:
2120    raise ValueError('upper must be > lower.')
2121
2122  if lower < 0:
2123    raise ValueError('lower must be non-negative.')
2124
2125  contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)
2126  return adjust_contrast(image, contrast_factor)
2127
2128
2129@tf_export('image.stateless_random_contrast', v1=[])
2130@dispatch.add_dispatch_support
2131def stateless_random_contrast(image, lower, upper, seed):
2132  """Adjust the contrast of images by a random factor deterministically.
2133
2134  Guarantees the same results given the same `seed` independent of how many
2135  times the function is called, and independent of global seed settings (e.g.
2136  `tf.random.set_seed`).
2137
2138  Args:
2139    image: An image tensor with 3 or more dimensions.
2140    lower: float.  Lower bound for the random contrast factor.
2141    upper: float.  Upper bound for the random contrast factor.
2142    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2143      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2144
2145  Usage Example:
2146
2147  >>> x = [[[1.0, 2.0, 3.0],
2148  ...       [4.0, 5.0, 6.0]],
2149  ...      [[7.0, 8.0, 9.0],
2150  ...       [10.0, 11.0, 12.0]]]
2151  >>> seed = (1, 2)
2152  >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed)
2153  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2154  array([[[3.4605184, 4.4605184, 5.4605184],
2155          [4.820173 , 5.820173 , 6.820173 ]],
2156         [[6.179827 , 7.179827 , 8.179828 ],
2157          [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)>
2158
2159  Returns:
2160    The contrast-adjusted image(s).
2161
2162  Raises:
2163    ValueError: if `upper <= lower` or if `lower < 0`.
2164  """
2165  if upper <= lower:
2166    raise ValueError('upper must be > lower.')
2167
2168  if lower < 0:
2169    raise ValueError('lower must be non-negative.')
2170
2171  contrast_factor = stateless_random_ops.stateless_random_uniform(
2172      shape=[], minval=lower, maxval=upper, seed=seed)
2173  return adjust_contrast(image, contrast_factor)
2174
2175
2176@tf_export('image.adjust_brightness')
2177@dispatch.register_unary_elementwise_api
2178@dispatch.add_dispatch_support
2179def adjust_brightness(image, delta):
2180  """Adjust the brightness of RGB or Grayscale images.
2181
2182  This is a convenience method that converts RGB images to float
2183  representation, adjusts their brightness, and then converts them back to the
2184  original data type. If several adjustments are chained, it is advisable to
2185  minimize the number of redundant conversions.
2186
2187  The value `delta` is added to all components of the tensor `image`. `image` is
2188  converted to `float` and scaled appropriately if it is in fixed-point
2189  representation, and `delta` is converted to the same data type. For regular
2190  images, `delta` should be in the range `(-1,1)`, as it is added to the image
2191  in floating point representation, where pixel values are in the `[0,1)` range.
2192
2193  Usage Example:
2194
2195  >>> x = [[[1.0, 2.0, 3.0],
2196  ...       [4.0, 5.0, 6.0]],
2197  ...     [[7.0, 8.0, 9.0],
2198  ...       [10.0, 11.0, 12.0]]]
2199  >>> tf.image.adjust_brightness(x, delta=0.1)
2200  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2201  array([[[ 1.1,  2.1,  3.1],
2202          [ 4.1,  5.1,  6.1]],
2203         [[ 7.1,  8.1,  9.1],
2204          [10.1, 11.1, 12.1]]], dtype=float32)>
2205
2206  Args:
2207    image: RGB image or images to adjust.
2208    delta: A scalar. Amount to add to the pixel values.
2209
2210  Returns:
2211    A brightness-adjusted tensor of the same shape and type as `image`.
2212  """
2213  with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:
2214    image = ops.convert_to_tensor(image, name='image')
2215    # Remember original dtype to so we can convert back if needed
2216    orig_dtype = image.dtype
2217
2218    if orig_dtype in [dtypes.float16, dtypes.float32]:
2219      flt_image = image
2220    else:
2221      flt_image = convert_image_dtype(image, dtypes.float32)
2222
2223    adjusted = math_ops.add(
2224        flt_image, math_ops.cast(delta, flt_image.dtype), name=name)
2225
2226    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2227
2228
2229@tf_export('image.adjust_contrast')
2230@dispatch.add_dispatch_support
2231def adjust_contrast(images, contrast_factor):
2232  """Adjust contrast of RGB or grayscale images.
2233
2234  This is a convenience method that converts RGB images to float
2235  representation, adjusts their contrast, and then converts them back to the
2236  original data type. If several adjustments are chained, it is advisable to
2237  minimize the number of redundant conversions.
2238
2239  `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
2240  interpreted as `[height, width, channels]`.  The other dimensions only
2241  represent a collection of images, such as `[batch, height, width, channels].`
2242
2243  Contrast is adjusted independently for each channel of each image.
2244
2245  For each channel, this Op computes the mean of the image pixels in the
2246  channel and then adjusts each component `x` of each pixel to
2247  `(x - mean) * contrast_factor + mean`.
2248
2249  `contrast_factor` must be in the interval `(-inf, inf)`.
2250
2251  Usage Example:
2252
2253  >>> x = [[[1.0, 2.0, 3.0],
2254  ...       [4.0, 5.0, 6.0]],
2255  ...     [[7.0, 8.0, 9.0],
2256  ...       [10.0, 11.0, 12.0]]]
2257  >>> tf.image.adjust_contrast(x, 2.)
2258  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2259  array([[[-3.5, -2.5, -1.5],
2260          [ 2.5,  3.5,  4.5]],
2261         [[ 8.5,  9.5, 10.5],
2262          [14.5, 15.5, 16.5]]], dtype=float32)>
2263
2264  Args:
2265    images: Images to adjust.  At least 3-D.
2266    contrast_factor: A float multiplier for adjusting contrast.
2267
2268  Returns:
2269    The contrast-adjusted image or images.
2270  """
2271  with ops.name_scope(None, 'adjust_contrast',
2272                      [images, contrast_factor]) as name:
2273    images = ops.convert_to_tensor(images, name='images')
2274    # Remember original dtype to so we can convert back if needed
2275    orig_dtype = images.dtype
2276
2277    if orig_dtype in (dtypes.float16, dtypes.float32):
2278      flt_images = images
2279    else:
2280      flt_images = convert_image_dtype(images, dtypes.float32)
2281
2282    adjusted = gen_image_ops.adjust_contrastv2(
2283        flt_images, contrast_factor=contrast_factor, name=name)
2284
2285    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2286
2287
2288@tf_export('image.adjust_gamma')
2289@dispatch.register_unary_elementwise_api
2290@dispatch.add_dispatch_support
2291def adjust_gamma(image, gamma=1, gain=1):
2292  """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction).
2293
2294  on the input image.
2295
2296  Also known as Power Law Transform. This function converts the
2297  input images at first to float representation, then transforms them
2298  pixelwise according to the equation `Out = gain * In**gamma`,
2299  and then converts the back to the original data type.
2300
2301  Usage Example:
2302
2303  >>> x = [[[1.0, 2.0, 3.0],
2304  ...       [4.0, 5.0, 6.0]],
2305  ...     [[7.0, 8.0, 9.0],
2306  ...       [10.0, 11.0, 12.0]]]
2307  >>> tf.image.adjust_gamma(x, 0.2)
2308  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2309  array([[[1.       , 1.1486983, 1.2457309],
2310          [1.319508 , 1.3797297, 1.4309691]],
2311         [[1.4757731, 1.5157166, 1.5518456],
2312          [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)>
2313
2314  Args:
2315    image : RGB image or images to adjust.
2316    gamma : A scalar or tensor. Non-negative real number.
2317    gain  : A scalar or tensor. The constant multiplier.
2318
2319  Returns:
2320    A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`.
2321
2322  Raises:
2323    ValueError: If gamma is negative.
2324  Notes:
2325    For gamma greater than 1, the histogram will shift towards left and
2326    the output image will be darker than the input image.
2327    For gamma less than 1, the histogram will shift towards right and
2328    the output image will be brighter than the input image.
2329  References:
2330    [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction)
2331  """
2332
2333  with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
2334    image = ops.convert_to_tensor(image, name='image')
2335    # Remember original dtype to so we can convert back if needed
2336    orig_dtype = image.dtype
2337
2338    if orig_dtype in [dtypes.float16, dtypes.float32]:
2339      flt_image = image
2340    else:
2341      flt_image = convert_image_dtype(image, dtypes.float32)
2342
2343    assert_op = _assert(gamma >= 0, ValueError,
2344                        'Gamma should be a non-negative real number.')
2345    if assert_op:
2346      gamma = control_flow_ops.with_dependencies(assert_op, gamma)
2347
2348    # According to the definition of gamma correction.
2349    adjusted_img = gain * flt_image**gamma
2350
2351    return convert_image_dtype(adjusted_img, orig_dtype, saturate=True)
2352
2353
2354@tf_export('image.convert_image_dtype')
2355@dispatch.register_unary_elementwise_api
2356@dispatch.add_dispatch_support
2357def convert_image_dtype(image, dtype, saturate=False, name=None):
2358  """Convert `image` to `dtype`, scaling its values if needed.
2359
2360  The operation supports data types (for `image` and `dtype`) of
2361  `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`,
2362  `float16`, `float32`, `float64`, `bfloat16`.
2363
2364  Images that are represented using floating point values are expected to have
2365  values in the range [0,1). Image data stored in integer data types are
2366  expected to have values in the range `[0,MAX]`, where `MAX` is the largest
2367  positive representable number for the data type.
2368
2369  This op converts between data types, scaling the values appropriately before
2370  casting.
2371
2372  Usage Example:
2373
2374  >>> x = [[[1, 2, 3], [4, 5, 6]],
2375  ...      [[7, 8, 9], [10, 11, 12]]]
2376  >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8)
2377  >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False)
2378  <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy=
2379  array([[[0.00787, 0.01575, 0.02362],
2380          [0.0315 , 0.03937, 0.04724]],
2381         [[0.0551 , 0.063  , 0.07086],
2382          [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)>
2383
2384  Converting integer types to floating point types returns normalized floating
2385  point values in the range [0, 1); the values are normalized by the `MAX` value
2386  of the input dtype. Consider the following two examples:
2387
2388  >>> a = [[[1], [2]], [[3], [4]]]
2389  >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8)
2390  >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32)
2391  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2392  array([[[0.00787402],
2393          [0.01574803]],
2394         [[0.02362205],
2395          [0.03149606]]], dtype=float32)>
2396
2397  >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32)
2398  >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32)
2399  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2400  array([[[4.6566129e-10],
2401          [9.3132257e-10]],
2402         [[1.3969839e-09],
2403          [1.8626451e-09]]], dtype=float32)>
2404
2405  Despite having identical values of `a` and output dtype of `float32`, the
2406  outputs differ due to the different input dtypes (`int8` vs. `int32`). This
2407  is, again, because the values are normalized by the `MAX` value of the input
2408  dtype.
2409
2410  Note that converting floating point values to integer type may lose precision.
2411  In the example below, an image tensor `b` of dtype `float32` is converted to
2412  `int8` and back to `float32`. The final output, however, is different from
2413  the original input `b` due to precision loss.
2414
2415  >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]]
2416  >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32)
2417  >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8)
2418  >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32)
2419  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2420  array([[[0.11811024],
2421          [0.33858266]],
2422         [[0.5590551 ],
2423          [0.77952754]]], dtype=float32)>
2424
2425  Scaling up from an integer type (input dtype) to another integer type (output
2426  dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting
2427  back and forth should result in no change. For example, as shown below, the
2428  `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767)
2429  but, when scaled back, we get the same, original values of `c`.
2430
2431  >>> c = [[[1], [2]], [[127], [127]]]
2432  >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8)
2433  >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16)
2434  >>> print(c_int16)
2435  tf.Tensor(
2436  [[[  256]
2437    [  512]]
2438   [[32512]
2439    [32512]]], shape=(2, 2, 1), dtype=int16)
2440  >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8)
2441  >>> print(c_int8_back)
2442  tf.Tensor(
2443  [[[  1]
2444    [  2]]
2445   [[127]
2446    [127]]], shape=(2, 2, 1), dtype=int8)
2447
2448  Scaling down from an integer type to another integer type can be a lossy
2449  conversion. Notice in the example below that converting `int16` to `uint8` and
2450  back to `int16` has lost precision.
2451
2452  >>> d = [[[1000], [2000]], [[3000], [4000]]]
2453  >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16)
2454  >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8)
2455  >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16)
2456  >>> print(d_int16_back)
2457  tf.Tensor(
2458  [[[ 896]
2459    [1920]]
2460   [[2944]
2461    [3968]]], shape=(2, 2, 1), dtype=int16)
2462
2463  Note that converting from floating point inputs to integer types may lead to
2464  over/underflow problems. Set saturate to `True` to avoid such problem in
2465  problematic conversions. If enabled, saturation will clip the output into the
2466  allowed range before performing a potentially dangerous cast (and only before
2467  performing such a cast, i.e., when casting from a floating point to an integer
2468  type, and when casting from a signed to an unsigned type; `saturate` has no
2469  effect on casts between floats, or on casts that increase the type's range).
2470
2471  Args:
2472    image: An image.
2473    dtype: A `DType` to convert `image` to.
2474    saturate: If `True`, clip the input before casting (if necessary).
2475    name: A name for this operation (optional).
2476
2477  Returns:
2478    `image`, converted to `dtype`.
2479
2480  Raises:
2481    AttributeError: Raises an attribute error when dtype is neither
2482    float nor integer.
2483  """
2484  image = ops.convert_to_tensor(image, name='image')
2485  dtype = dtypes.as_dtype(dtype)
2486  if not dtype.is_floating and not dtype.is_integer:
2487    raise AttributeError('dtype must be either floating point or integer')
2488  if dtype == image.dtype:
2489    return array_ops.identity(image, name=name)
2490
2491  with ops.name_scope(name, 'convert_image', [image]) as name:
2492    # Both integer: use integer multiplication in the larger range
2493    if image.dtype.is_integer and dtype.is_integer:
2494      scale_in = image.dtype.max
2495      scale_out = dtype.max
2496      if scale_in > scale_out:
2497        # Scaling down, scale first, then cast. The scaling factor will
2498        # cause in.max to be mapped to above out.max but below out.max+1,
2499        # so that the output is safely in the supported range.
2500        scale = (scale_in + 1) // (scale_out + 1)
2501        scaled = math_ops.floordiv(image, scale)
2502
2503        if saturate:
2504          return math_ops.saturate_cast(scaled, dtype, name=name)
2505        else:
2506          return math_ops.cast(scaled, dtype, name=name)
2507      else:
2508        # Scaling up, cast first, then scale. The scale will not map in.max to
2509        # out.max, but converting back and forth should result in no change.
2510        if saturate:
2511          cast = math_ops.saturate_cast(image, dtype)
2512        else:
2513          cast = math_ops.cast(image, dtype)
2514        scale = (scale_out + 1) // (scale_in + 1)
2515        return math_ops.multiply(cast, scale, name=name)
2516    elif image.dtype.is_floating and dtype.is_floating:
2517      # Both float: Just cast, no possible overflows in the allowed ranges.
2518      # Note: We're ignoring float overflows. If your image dynamic range
2519      # exceeds float range, you're on your own.
2520      return math_ops.cast(image, dtype, name=name)
2521    else:
2522      if image.dtype.is_integer:
2523        # Converting to float: first cast, then scale. No saturation possible.
2524        cast = math_ops.cast(image, dtype)
2525        scale = 1. / image.dtype.max
2526        return math_ops.multiply(cast, scale, name=name)
2527      else:
2528        # Converting from float: first scale, then cast
2529        scale = dtype.max + 0.5  # avoid rounding problems in the cast
2530        scaled = math_ops.multiply(image, scale)
2531        if saturate:
2532          return math_ops.saturate_cast(scaled, dtype, name=name)
2533        else:
2534          return math_ops.cast(scaled, dtype, name=name)
2535
2536
2537@tf_export('image.rgb_to_grayscale')
2538@dispatch.add_dispatch_support
2539def rgb_to_grayscale(images, name=None):
2540  """Converts one or more images from RGB to Grayscale.
2541
2542  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
2543  last dimension of the output is 1, containing the Grayscale value of the
2544  pixels.
2545
2546  >>> original = tf.constant([[[1.0, 2.0, 3.0]]])
2547  >>> converted = tf.image.rgb_to_grayscale(original)
2548  >>> print(converted.numpy())
2549  [[[1.81...]]]
2550
2551  Args:
2552    images: The RGB tensor to convert. The last dimension must have size 3 and
2553      should contain RGB values.
2554    name: A name for the operation (optional).
2555
2556  Returns:
2557    The converted grayscale image(s).
2558  """
2559  with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name:
2560    images = ops.convert_to_tensor(images, name='images')
2561    # Remember original dtype to so we can convert back if needed
2562    orig_dtype = images.dtype
2563    flt_image = convert_image_dtype(images, dtypes.float32)
2564
2565    # Reference for converting between RGB and grayscale.
2566    # https://en.wikipedia.org/wiki/Luma_%28video%29
2567    rgb_weights = [0.2989, 0.5870, 0.1140]
2568    gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
2569    gray_float = array_ops.expand_dims(gray_float, -1)
2570    return convert_image_dtype(gray_float, orig_dtype, name=name)
2571
2572
2573@tf_export('image.grayscale_to_rgb')
2574@dispatch.add_dispatch_support
2575def grayscale_to_rgb(images, name=None):
2576  """Converts one or more images from Grayscale to RGB.
2577
2578  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
2579  last dimension of the output is 3, containing the RGB value of the pixels.
2580  The input images' last dimension must be size 1.
2581
2582  >>> original = tf.constant([[[1.0], [2.0], [3.0]]])
2583  >>> converted = tf.image.grayscale_to_rgb(original)
2584  >>> print(converted.numpy())
2585  [[[1. 1. 1.]
2586    [2. 2. 2.]
2587    [3. 3. 3.]]]
2588
2589  Args:
2590    images: The Grayscale tensor to convert. The last dimension must be size 1.
2591    name: A name for the operation (optional).
2592
2593  Returns:
2594    The converted grayscale image(s).
2595  """
2596  with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name:
2597    images = _AssertGrayscaleImage(images)
2598
2599    images = ops.convert_to_tensor(images, name='images')
2600    rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
2601    shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] +
2602                  [array_ops.expand_dims(3, 0)])
2603    multiples = array_ops.concat(shape_list, 0)
2604    rgb = array_ops.tile(images, multiples, name=name)
2605    rgb.set_shape(images.get_shape()[:-1].concatenate([3]))
2606    return rgb
2607
2608
2609# pylint: disable=invalid-name
2610@tf_export('image.random_hue')
2611@dispatch.add_dispatch_support
2612def random_hue(image, max_delta, seed=None):
2613  """Adjust the hue of RGB images by a random factor.
2614
2615  Equivalent to `adjust_hue()` but uses a `delta` randomly
2616  picked in the interval `[-max_delta, max_delta)`.
2617
2618  `max_delta` must be in the interval `[0, 0.5]`.
2619
2620  Usage Example:
2621
2622  >>> x = [[[1.0, 2.0, 3.0],
2623  ...       [4.0, 5.0, 6.0]],
2624  ...     [[7.0, 8.0, 9.0],
2625  ...       [10.0, 11.0, 12.0]]]
2626  >>> tf.image.random_hue(x, 0.2)
2627  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2628
2629  For producing deterministic results given a `seed` value, use
2630  `tf.image.stateless_random_hue`. Unlike using the `seed` param with
2631  `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same
2632  results given the same seed independent of how many times the function is
2633  called, and independent of global seed settings (e.g. tf.random.set_seed).
2634
2635  Args:
2636    image: RGB image or images. The size of the last dimension must be 3.
2637    max_delta: float. The maximum value for the random delta.
2638    seed: An operation-specific seed. It will be used in conjunction with the
2639      graph-level seed to determine the real seeds that will be used in this
2640      operation. Please see the documentation of set_random_seed for its
2641      interaction with the graph-level random seed.
2642
2643  Returns:
2644    Adjusted image(s), same shape and DType as `image`.
2645
2646  Raises:
2647    ValueError: if `max_delta` is invalid.
2648  """
2649  if max_delta > 0.5:
2650    raise ValueError('max_delta must be <= 0.5.')
2651
2652  if max_delta < 0:
2653    raise ValueError('max_delta must be non-negative.')
2654
2655  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
2656  return adjust_hue(image, delta)
2657
2658
2659@tf_export('image.stateless_random_hue', v1=[])
2660@dispatch.add_dispatch_support
2661def stateless_random_hue(image, max_delta, seed):
2662  """Adjust the hue of RGB images by a random factor deterministically.
2663
2664  Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the
2665  interval `[-max_delta, max_delta)`.
2666
2667  Guarantees the same results given the same `seed` independent of how many
2668  times the function is called, and independent of global seed settings (e.g.
2669  `tf.random.set_seed`).
2670
2671  `max_delta` must be in the interval `[0, 0.5]`.
2672
2673  Usage Example:
2674
2675  >>> x = [[[1.0, 2.0, 3.0],
2676  ...       [4.0, 5.0, 6.0]],
2677  ...      [[7.0, 8.0, 9.0],
2678  ...       [10.0, 11.0, 12.0]]]
2679  >>> seed = (1, 2)
2680  >>> tf.image.stateless_random_hue(x, 0.2, seed)
2681  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2682  array([[[ 1.6514902,  1.       ,  3.       ],
2683          [ 4.65149  ,  4.       ,  6.       ]],
2684         [[ 7.65149  ,  7.       ,  9.       ],
2685          [10.65149  , 10.       , 12.       ]]], dtype=float32)>
2686
2687  Args:
2688    image: RGB image or images. The size of the last dimension must be 3.
2689    max_delta: float. The maximum value for the random delta.
2690    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2691      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2692
2693  Returns:
2694    Adjusted image(s), same shape and DType as `image`.
2695
2696  Raises:
2697    ValueError: if `max_delta` is invalid.
2698  """
2699  if max_delta > 0.5:
2700    raise ValueError('max_delta must be <= 0.5.')
2701
2702  if max_delta < 0:
2703    raise ValueError('max_delta must be non-negative.')
2704
2705  delta = stateless_random_ops.stateless_random_uniform(
2706      shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2707  return adjust_hue(image, delta)
2708
2709
2710@tf_export('image.adjust_hue')
2711@dispatch.add_dispatch_support
2712def adjust_hue(image, delta, name=None):
2713  """Adjust hue of RGB images.
2714
2715  This is a convenience method that converts an RGB image to float
2716  representation, converts it to HSV, adds an offset to the
2717  hue channel, converts back to RGB and then back to the original
2718  data type. If several adjustments are chained it is advisable to minimize
2719  the number of redundant conversions.
2720
2721  `image` is an RGB image.  The image hue is adjusted by converting the
2722  image(s) to HSV and rotating the hue channel (H) by
2723  `delta`.  The image is then converted back to RGB.
2724
2725  `delta` must be in the interval `[-1, 1]`.
2726
2727  Usage Example:
2728
2729  >>> x = [[[1.0, 2.0, 3.0],
2730  ...       [4.0, 5.0, 6.0]],
2731  ...     [[7.0, 8.0, 9.0],
2732  ...       [10.0, 11.0, 12.0]]]
2733  >>> tf.image.adjust_hue(x, 0.2)
2734  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2735  array([[[ 2.3999996,  1.       ,  3.       ],
2736          [ 5.3999996,  4.       ,  6.       ]],
2737        [[ 8.4      ,  7.       ,  9.       ],
2738          [11.4      , 10.       , 12.       ]]], dtype=float32)>
2739
2740  Args:
2741    image: RGB image or images. The size of the last dimension must be 3.
2742    delta: float.  How much to add to the hue channel.
2743    name: A name for this operation (optional).
2744
2745  Returns:
2746    Adjusted image(s), same shape and DType as `image`.
2747
2748  Raises:
2749    InvalidArgumentError: image must have at least 3 dimensions.
2750    InvalidArgumentError: The size of the last dimension must be 3.
2751    ValueError: if `delta` is not in the interval of `[-1, 1]`.
2752
2753  Usage Example:
2754
2755  >>> image = [[[1, 2, 3], [4, 5, 6]],
2756  ...          [[7, 8, 9], [10, 11, 12]],
2757  ...          [[13, 14, 15], [16, 17, 18]]]
2758  >>> image = tf.constant(image)
2759  >>> tf.image.adjust_hue(image, 0.2)
2760  <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
2761  array([[[ 2,  1,  3],
2762        [ 5,  4,  6]],
2763       [[ 8,  7,  9],
2764        [11, 10, 12]],
2765       [[14, 13, 15],
2766        [17, 16, 18]]], dtype=int32)>
2767  """
2768  with ops.name_scope(name, 'adjust_hue', [image]) as name:
2769    if context.executing_eagerly():
2770      if delta < -1 or delta > 1:
2771        raise ValueError('delta must be in the interval [-1, 1]')
2772    image = ops.convert_to_tensor(image, name='image')
2773    # Remember original dtype to so we can convert back if needed
2774    orig_dtype = image.dtype
2775    if orig_dtype in (dtypes.float16, dtypes.float32):
2776      flt_image = image
2777    else:
2778      flt_image = convert_image_dtype(image, dtypes.float32)
2779
2780    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
2781
2782    return convert_image_dtype(rgb_altered, orig_dtype)
2783
2784
2785# pylint: disable=invalid-name
2786@tf_export('image.random_jpeg_quality')
2787@dispatch.add_dispatch_support
2788def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
2789  """Randomly changes jpeg encoding quality for inducing jpeg noise.
2790
2791  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2792  `max_jpeg_quality`.
2793  `max_jpeg_quality` must be in the interval `[0, 100]`.
2794
2795  Usage Example:
2796
2797  >>> x = tf.constant([[[1, 2, 3],
2798  ...                   [4, 5, 6]],
2799  ...                  [[7, 8, 9],
2800  ...                   [10, 11, 12]]], dtype=tf.uint8)
2801  >>> tf.image.random_jpeg_quality(x, 75, 95)
2802  <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=...>
2803
2804  For producing deterministic results given a `seed` value, use
2805  `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param
2806  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2807  same results given the same seed independent of how many times the function is
2808  called, and independent of global seed settings (e.g. tf.random.set_seed).
2809
2810  Args:
2811    image: 3D image. Size of the last dimension must be 1 or 3.
2812    min_jpeg_quality: Minimum jpeg encoding quality to use.
2813    max_jpeg_quality: Maximum jpeg encoding quality to use.
2814    seed: An operation-specific seed. It will be used in conjunction with the
2815      graph-level seed to determine the real seeds that will be used in this
2816      operation. Please see the documentation of set_random_seed for its
2817      interaction with the graph-level random seed.
2818
2819  Returns:
2820    Adjusted image(s), same shape and DType as `image`.
2821
2822  Raises:
2823    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2824  """
2825  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2826      max_jpeg_quality > 100):
2827    raise ValueError('jpeg encoding range must be between 0 and 100.')
2828
2829  if min_jpeg_quality >= max_jpeg_quality:
2830    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2831
2832  jpeg_quality = random_ops.random_uniform([],
2833                                           min_jpeg_quality,
2834                                           max_jpeg_quality,
2835                                           seed=seed,
2836                                           dtype=dtypes.int32)
2837  return adjust_jpeg_quality(image, jpeg_quality)
2838
2839
2840@tf_export('image.stateless_random_jpeg_quality', v1=[])
2841@dispatch.add_dispatch_support
2842def stateless_random_jpeg_quality(image,
2843                                  min_jpeg_quality,
2844                                  max_jpeg_quality,
2845                                  seed):
2846  """Deterministically radomize jpeg encoding quality for inducing jpeg noise.
2847
2848  Guarantees the same results given the same `seed` independent of how many
2849  times the function is called, and independent of global seed settings (e.g.
2850  `tf.random.set_seed`).
2851
2852  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2853  `max_jpeg_quality`.
2854  `max_jpeg_quality` must be in the interval `[0, 100]`.
2855
2856  Usage Example:
2857
2858  >>> x = tf.constant([[[1, 2, 3],
2859  ...                   [4, 5, 6]],
2860  ...                  [[7, 8, 9],
2861  ...                   [10, 11, 12]]], dtype=tf.uint8)
2862  >>> seed = (1, 2)
2863  >>> tf.image.stateless_random_jpeg_quality(x, 75, 95, seed)
2864  <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=
2865  array([[[ 0,  4,  5],
2866          [ 1,  5,  6]],
2867         [[ 5,  9, 10],
2868          [ 5,  9, 10]]], dtype=uint8)>
2869
2870  Args:
2871    image: 3D image. Size of the last dimension must be 1 or 3.
2872    min_jpeg_quality: Minimum jpeg encoding quality to use.
2873    max_jpeg_quality: Maximum jpeg encoding quality to use.
2874    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2875      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2876
2877  Returns:
2878    Adjusted image(s), same shape and DType as `image`.
2879
2880  Raises:
2881    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2882  """
2883  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2884      max_jpeg_quality > 100):
2885    raise ValueError('jpeg encoding range must be between 0 and 100.')
2886
2887  if min_jpeg_quality >= max_jpeg_quality:
2888    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2889
2890  jpeg_quality = stateless_random_ops.stateless_random_uniform(
2891      shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed,
2892      dtype=dtypes.int32)
2893  return adjust_jpeg_quality(image, jpeg_quality)
2894
2895
2896@tf_export('image.adjust_jpeg_quality')
2897@dispatch.add_dispatch_support
2898def adjust_jpeg_quality(image, jpeg_quality, name=None):
2899  """Adjust jpeg encoding quality of an image.
2900
2901  This is a convenience method that converts an image to uint8 representation,
2902  encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back
2903  to the original data type.
2904
2905  `jpeg_quality` must be in the interval `[0, 100]`.
2906
2907  Usage Examples:
2908
2909  >>> x = [[[0.01, 0.02, 0.03],
2910  ...       [0.04, 0.05, 0.06]],
2911  ...      [[0.07, 0.08, 0.09],
2912  ...       [0.10, 0.11, 0.12]]]
2913  >>> x_jpeg = tf.image.adjust_jpeg_quality(x, 75)
2914  >>> x_jpeg.numpy()
2915  array([[[0.00392157, 0.01960784, 0.03137255],
2916          [0.02745098, 0.04313726, 0.05490196]],
2917         [[0.05882353, 0.07450981, 0.08627451],
2918          [0.08235294, 0.09803922, 0.10980393]]], dtype=float32)
2919
2920  Note that floating point values are expected to have values in the range
2921  [0,1) and values outside this range are clipped.
2922
2923  >>> x = [[[1.0, 2.0, 3.0],
2924  ...       [4.0, 5.0, 6.0]],
2925  ...     [[7.0, 8.0, 9.0],
2926  ...       [10.0, 11.0, 12.0]]]
2927  >>> tf.image.adjust_jpeg_quality(x, 75)
2928  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2929  array([[[1., 1., 1.],
2930          [1., 1., 1.]],
2931         [[1., 1., 1.],
2932          [1., 1., 1.]]], dtype=float32)>
2933
2934  Note that `jpeg_quality` 100 is still lossy compresson.
2935
2936  >>> x = tf.constant([[[1, 2, 3],
2937  ...                   [4, 5, 6]],
2938  ...                  [[7, 8, 9],
2939  ...                   [10, 11, 12]]], dtype=tf.uint8)
2940  >>> tf.image.adjust_jpeg_quality(x, 100)
2941  <tf.Tensor: shape(2, 2, 3), dtype=uint8, numpy=
2942  array([[[ 0,  1,  3],
2943          [ 3,  4,  6]],
2944         [[ 6,  7,  9],
2945          [ 9, 10, 12]]], dtype=uint8)>
2946
2947  Args:
2948    image: 3D image. The size of the last dimension must be None, 1 or 3.
2949    jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality.
2950    name: A name for this operation (optional).
2951
2952  Returns:
2953    Adjusted image, same shape and DType as `image`.
2954
2955  Raises:
2956    InvalidArgumentError: quality must be in [0,100]
2957    InvalidArgumentError: image must have 1 or 3 channels
2958  """
2959  with ops.name_scope(name, 'adjust_jpeg_quality', [image]):
2960    image = ops.convert_to_tensor(image, name='image')
2961    channels = image.shape.as_list()[-1]
2962    # Remember original dtype to so we can convert back if needed
2963    orig_dtype = image.dtype
2964    image = convert_image_dtype(image, dtypes.uint8, saturate=True)
2965    if not _is_tensor(jpeg_quality):
2966      # If jpeg_quality is a int (not tensor).
2967      jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32)
2968    image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality)
2969
2970    image = gen_image_ops.decode_jpeg(image, channels=channels)
2971    return convert_image_dtype(image, orig_dtype, saturate=True)
2972
2973
2974@tf_export('image.random_saturation')
2975@dispatch.add_dispatch_support
2976def random_saturation(image, lower, upper, seed=None):
2977  """Adjust the saturation of RGB images by a random factor.
2978
2979  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
2980  picked in the interval `[lower, upper)`.
2981
2982  Usage Example:
2983
2984  >>> x = [[[1.0, 2.0, 3.0],
2985  ...       [4.0, 5.0, 6.0]],
2986  ...     [[7.0, 8.0, 9.0],
2987  ...       [10.0, 11.0, 12.0]]]
2988  >>> tf.image.random_saturation(x, 5, 10)
2989  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2990  array([[[ 0. ,  1.5,  3. ],
2991          [ 0. ,  3. ,  6. ]],
2992         [[ 0. ,  4.5,  9. ],
2993          [ 0. ,  6. , 12. ]]], dtype=float32)>
2994
2995  For producing deterministic results given a `seed` value, use
2996  `tf.image.stateless_random_saturation`. Unlike using the `seed` param
2997  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2998  same results given the same seed independent of how many times the function is
2999  called, and independent of global seed settings (e.g. tf.random.set_seed).
3000
3001  Args:
3002    image: RGB image or images. The size of the last dimension must be 3.
3003    lower: float.  Lower bound for the random saturation factor.
3004    upper: float.  Upper bound for the random saturation factor.
3005    seed: An operation-specific seed. It will be used in conjunction with the
3006      graph-level seed to determine the real seeds that will be used in this
3007      operation. Please see the documentation of set_random_seed for its
3008      interaction with the graph-level random seed.
3009
3010  Returns:
3011    Adjusted image(s), same shape and DType as `image`.
3012
3013  Raises:
3014    ValueError: if `upper <= lower` or if `lower < 0`.
3015  """
3016  if upper <= lower:
3017    raise ValueError('upper must be > lower.')
3018
3019  if lower < 0:
3020    raise ValueError('lower must be non-negative.')
3021
3022  saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)
3023  return adjust_saturation(image, saturation_factor)
3024
3025
3026@tf_export('image.stateless_random_saturation', v1=[])
3027@dispatch.add_dispatch_support
3028def stateless_random_saturation(image, lower, upper, seed=None):
3029  """Adjust the saturation of RGB images by a random factor deterministically.
3030
3031  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
3032  picked in the interval `[lower, upper)`.
3033
3034  Guarantees the same results given the same `seed` independent of how many
3035  times the function is called, and independent of global seed settings (e.g.
3036  `tf.random.set_seed`).
3037
3038  Usage Example:
3039
3040  >>> x = [[[1.0, 2.0, 3.0],
3041  ...       [4.0, 5.0, 6.0]],
3042  ...      [[7.0, 8.0, 9.0],
3043  ...       [10.0, 11.0, 12.0]]]
3044  >>> seed = (1, 2)
3045  >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed)
3046  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
3047  array([[[ 1.1559395,  2.0779698,  3.       ],
3048          [ 4.1559396,  5.07797  ,  6.       ]],
3049         [[ 7.1559396,  8.07797  ,  9.       ],
3050          [10.155939 , 11.07797  , 12.       ]]], dtype=float32)>
3051
3052  Args:
3053    image: RGB image or images. The size of the last dimension must be 3.
3054    lower: float.  Lower bound for the random saturation factor.
3055    upper: float.  Upper bound for the random saturation factor.
3056    seed: A shape [2] Tensor, the seed to the random number generator. Must have
3057      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
3058
3059  Returns:
3060    Adjusted image(s), same shape and DType as `image`.
3061
3062  Raises:
3063    ValueError: if `upper <= lower` or if `lower < 0`.
3064  """
3065  if upper <= lower:
3066    raise ValueError('upper must be > lower.')
3067
3068  if lower < 0:
3069    raise ValueError('lower must be non-negative.')
3070
3071  saturation_factor = stateless_random_ops.stateless_random_uniform(
3072      shape=[], minval=lower, maxval=upper, seed=seed)
3073  return adjust_saturation(image, saturation_factor)
3074
3075
3076@tf_export('image.adjust_saturation')
3077@dispatch.add_dispatch_support
3078def adjust_saturation(image, saturation_factor, name=None):
3079  """Adjust saturation of RGB images.
3080
3081  This is a convenience method that converts RGB images to float
3082  representation, converts them to HSV, adds an offset to the
3083  saturation channel, converts back to RGB and then back to the original
3084  data type. If several adjustments are chained it is advisable to minimize
3085  the number of redundant conversions.
3086
3087  `image` is an RGB image or images.  The image saturation is adjusted by
3088  converting the images to HSV and multiplying the saturation (S) channel by
3089  `saturation_factor` and clipping. The images are then converted back to RGB.
3090
3091  `saturation_factor` must be in the interval `[0, inf)`.
3092
3093  Usage Example:
3094
3095  >>> x = [[[1.0, 2.0, 3.0],
3096  ...       [4.0, 5.0, 6.0]],
3097  ...     [[7.0, 8.0, 9.0],
3098  ...       [10.0, 11.0, 12.0]]]
3099  >>> tf.image.adjust_saturation(x, 0.5)
3100  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
3101  array([[[ 2. ,  2.5,  3. ],
3102          [ 5. ,  5.5,  6. ]],
3103         [[ 8. ,  8.5,  9. ],
3104          [11. , 11.5, 12. ]]], dtype=float32)>
3105
3106  Args:
3107    image: RGB image or images. The size of the last dimension must be 3.
3108    saturation_factor: float. Factor to multiply the saturation by.
3109    name: A name for this operation (optional).
3110
3111  Returns:
3112    Adjusted image(s), same shape and DType as `image`.
3113
3114  Raises:
3115    InvalidArgumentError: input must have 3 channels
3116  """
3117  with ops.name_scope(name, 'adjust_saturation', [image]) as name:
3118    image = ops.convert_to_tensor(image, name='image')
3119    # Remember original dtype to so we can convert back if needed
3120    orig_dtype = image.dtype
3121    if orig_dtype in (dtypes.float16, dtypes.float32):
3122      flt_image = image
3123    else:
3124      flt_image = convert_image_dtype(image, dtypes.float32)
3125
3126    adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor)
3127
3128    return convert_image_dtype(adjusted, orig_dtype)
3129
3130
3131@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg'])
3132def is_jpeg(contents, name=None):
3133  r"""Convenience function to check if the 'contents' encodes a JPEG image.
3134
3135  Args:
3136    contents: 0-D `string`. The encoded image bytes.
3137    name: A name for the operation (optional)
3138
3139  Returns:
3140     A scalar boolean tensor indicating if 'contents' may be a JPEG image.
3141     is_jpeg is susceptible to false positives.
3142  """
3143  # Normal JPEGs start with \xff\xd8\xff\xe0
3144  # JPEG with EXIF starts with \xff\xd8\xff\xe1
3145  # Use \xff\xd8\xff to cover both.
3146  with ops.name_scope(name, 'is_jpeg'):
3147    substr = string_ops.substr(contents, 0, 3)
3148    return math_ops.equal(substr, b'\xff\xd8\xff', name=name)
3149
3150
3151def _is_png(contents, name=None):
3152  r"""Convenience function to check if the 'contents' encodes a PNG image.
3153
3154  Args:
3155    contents: 0-D `string`. The encoded image bytes.
3156    name: A name for the operation (optional)
3157
3158  Returns:
3159     A scalar boolean tensor indicating if 'contents' may be a PNG image.
3160     is_png is susceptible to false positives.
3161  """
3162  with ops.name_scope(name, 'is_png'):
3163    substr = string_ops.substr(contents, 0, 3)
3164    return math_ops.equal(substr, b'\211PN', name=name)
3165
3166
3167tf_export(
3168    'io.decode_and_crop_jpeg',
3169    'image.decode_and_crop_jpeg',
3170    v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
3171        dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg))
3172
3173tf_export(
3174    'io.decode_bmp',
3175    'image.decode_bmp',
3176    v1=['io.decode_bmp', 'image.decode_bmp'])(
3177        dispatch.add_dispatch_support(gen_image_ops.decode_bmp))
3178tf_export(
3179    'io.decode_gif',
3180    'image.decode_gif',
3181    v1=['io.decode_gif', 'image.decode_gif'])(
3182        dispatch.add_dispatch_support(gen_image_ops.decode_gif))
3183tf_export(
3184    'io.decode_jpeg',
3185    'image.decode_jpeg',
3186    v1=['io.decode_jpeg', 'image.decode_jpeg'])(
3187        dispatch.add_dispatch_support(gen_image_ops.decode_jpeg))
3188tf_export(
3189    'io.decode_png',
3190    'image.decode_png',
3191    v1=['io.decode_png', 'image.decode_png'])(
3192        dispatch.add_dispatch_support(gen_image_ops.decode_png))
3193
3194tf_export(
3195    'io.encode_jpeg',
3196    'image.encode_jpeg',
3197    v1=['io.encode_jpeg', 'image.encode_jpeg'])(
3198        dispatch.add_dispatch_support(gen_image_ops.encode_jpeg))
3199tf_export(
3200    'io.extract_jpeg_shape',
3201    'image.extract_jpeg_shape',
3202    v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
3203        dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape))
3204
3205
3206@tf_export('io.encode_png', 'image.encode_png')
3207@dispatch.add_dispatch_support
3208def encode_png(image, compression=-1, name=None):
3209  r"""PNG-encode an image.
3210
3211  `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
3212  where `channels` is:
3213
3214  *   1: for grayscale.
3215  *   2: for grayscale + alpha.
3216  *   3: for RGB.
3217  *   4: for RGBA.
3218
3219  The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
3220  default or a value from 0 to 9.  9 is the highest compression level,
3221  generating the smallest output, but is slower.
3222
3223  Args:
3224    image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`.
3225      3-D with shape `[height, width, channels]`.
3226    compression: An optional `int`. Defaults to `-1`. Compression level.
3227    name: A name for the operation (optional).
3228
3229  Returns:
3230    A `Tensor` of type `string`.
3231  """
3232  return gen_image_ops.encode_png(
3233      ops.convert_to_tensor(image), compression, name)
3234
3235
3236@tf_export(
3237    'io.decode_image',
3238    'image.decode_image',
3239    v1=['io.decode_image', 'image.decode_image'])
3240@dispatch.add_dispatch_support
3241def decode_image(contents,
3242                 channels=None,
3243                 dtype=dtypes.uint8,
3244                 name=None,
3245                 expand_animations=True):
3246  """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`.
3247
3248  Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the
3249  appropriate operation to convert the input bytes `string` into a `Tensor`
3250  of type `dtype`.
3251
3252  Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as
3253  opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D
3254  arrays `[height, width, num_channels]`. Make sure to take this into account
3255  when constructing your graph if you are intermixing GIF files with BMP, JPEG,
3256  and/or PNG files. Alternately, set the `expand_animations` argument of this
3257  function to `False`, in which case the op will return 3-dimensional tensors
3258  and will truncate animated GIF files to the first frame.
3259
3260  NOTE: If the first frame of an animated GIF does not occupy the entire
3261  canvas (maximum frame width x maximum frame height), then it fills the
3262  unoccupied areas (in the first frame) with zeros (black). For frames after the
3263  first frame that does not occupy the entire canvas, it uses the previous
3264  frame to fill the unoccupied areas.
3265
3266  Args:
3267    contents: A `Tensor` of type `string`. 0-D. The encoded image bytes.
3268    channels: An optional `int`. Defaults to `0`. Number of color channels for
3269      the decoded image.
3270    dtype: The desired DType of the returned `Tensor`.
3271    name: A name for the operation (optional)
3272    expand_animations: An optional `bool`. Defaults to `True`. Controls the
3273      shape of the returned op's output. If `True`, the returned op will produce
3274      a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs,
3275      whether animated or not. If, `False`, the returned op will produce a 3-D
3276      tensor for all file types and will truncate animated GIFs to the first
3277      frame.
3278
3279  Returns:
3280    `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on
3281    the file type and the value of the `expand_animations` parameter.
3282
3283  Raises:
3284    ValueError: On incorrect number of channels.
3285  """
3286  with ops.name_scope(name, 'decode_image'):
3287    channels = 0 if channels is None else channels
3288    if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]:
3289      dest_dtype = dtype
3290      dtype = dtypes.uint16
3291      return convert_image_dtype(
3292          gen_image_ops.decode_image(
3293              contents=contents,
3294              channels=channels,
3295              expand_animations=expand_animations,
3296              dtype=dtype), dest_dtype)
3297    else:
3298      return gen_image_ops.decode_image(
3299          contents=contents,
3300          channels=channels,
3301          expand_animations=expand_animations,
3302          dtype=dtype)
3303
3304
3305@tf_export('image.total_variation')
3306@dispatch.add_dispatch_support
3307def total_variation(images, name=None):
3308  """Calculate and return the total variation for one or more images.
3309
3310  The total variation is the sum of the absolute differences for neighboring
3311  pixel-values in the input images. This measures how much noise is in the
3312  images.
3313
3314  This can be used as a loss-function during optimization so as to suppress
3315  noise in images. If you have a batch of images, then you should calculate
3316  the scalar loss-value as the sum:
3317  `loss = tf.reduce_sum(tf.image.total_variation(images))`
3318
3319  This implements the anisotropic 2-D version of the formula described here:
3320
3321  https://en.wikipedia.org/wiki/Total_variation_denoising
3322
3323  Args:
3324    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
3325      of shape `[height, width, channels]`.
3326    name: A name for the operation (optional).
3327
3328  Raises:
3329    ValueError: if images.shape is not a 3-D or 4-D vector.
3330
3331  Returns:
3332    The total variation of `images`.
3333
3334    If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the
3335    total variation for each image in the batch.
3336    If `images` was 3-D, return a scalar float with the total variation for
3337    that image.
3338  """
3339
3340  with ops.name_scope(name, 'total_variation'):
3341    ndims = images.get_shape().ndims
3342
3343    if ndims == 3:
3344      # The input is a single image with shape [height, width, channels].
3345
3346      # Calculate the difference of neighboring pixel-values.
3347      # The images are shifted one pixel along the height and width by slicing.
3348      pixel_dif1 = images[1:, :, :] - images[:-1, :, :]
3349      pixel_dif2 = images[:, 1:, :] - images[:, :-1, :]
3350
3351      # Sum for all axis. (None is an alias for all axis.)
3352      sum_axis = None
3353    elif ndims == 4:
3354      # The input is a batch of images with shape:
3355      # [batch, height, width, channels].
3356
3357      # Calculate the difference of neighboring pixel-values.
3358      # The images are shifted one pixel along the height and width by slicing.
3359      pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
3360      pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
3361
3362      # Only sum for the last 3 axis.
3363      # This results in a 1-D tensor with the total variation for each image.
3364      sum_axis = [1, 2, 3]
3365    else:
3366      raise ValueError('\'images\' must be either 3 or 4-dimensional.')
3367
3368    # Calculate the total variation by taking the absolute value of the
3369    # pixel-differences and summing over the appropriate axis.
3370    tot_var = (
3371        math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) +
3372        math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis))
3373
3374  return tot_var
3375
3376
3377@tf_export('image.sample_distorted_bounding_box', v1=[])
3378@dispatch.add_dispatch_support
3379def sample_distorted_bounding_box_v2(image_size,
3380                                     bounding_boxes,
3381                                     seed=0,
3382                                     min_object_covered=0.1,
3383                                     aspect_ratio_range=None,
3384                                     area_range=None,
3385                                     max_attempts=None,
3386                                     use_image_if_no_bounding_boxes=None,
3387                                     name=None):
3388  """Generate a single randomly distorted bounding box for an image.
3389
3390  Bounding box annotations are often supplied in addition to ground-truth labels
3391  in image recognition or object localization tasks. A common technique for
3392  training such a system is to randomly distort an image while preserving
3393  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3394  localization of an object, i.e. bounding box, given an `image_size`,
3395  `bounding_boxes` and a series of constraints.
3396
3397  The output of this Op is a single bounding box that may be used to crop the
3398  original image. The output is returned as 3 tensors: `begin`, `size` and
3399  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3400  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3401  visualize what the bounding box looks like.
3402
3403  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3404  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3405  and the height of the underlying image.
3406
3407  For example,
3408
3409  ```python
3410      # Generate a single distorted bounding box.
3411      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3412          tf.shape(image),
3413          bounding_boxes=bounding_boxes,
3414          min_object_covered=0.1)
3415
3416      # Draw the bounding box in an image summary.
3417      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3418                                                    bbox_for_draw)
3419      tf.compat.v1.summary.image('images_with_box', image_with_box)
3420
3421      # Employ the bounding box to distort the image.
3422      distorted_image = tf.slice(image, begin, size)
3423  ```
3424
3425  Note that if no bounding box information is available, setting
3426  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3427  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3428  false and no bounding boxes are supplied, an error is raised.
3429
3430  For producing deterministic results given a `seed` value, use
3431  `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed`
3432  param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops
3433  guarantee the same results given the same seed independent of how many times
3434  the function is called, and independent of global seed settings
3435  (e.g. tf.random.set_seed).
3436
3437  Args:
3438    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3439      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3440    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3441      describing the N bounding boxes associated with the image.
3442    seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the
3443      random number generator is seeded by the given `seed`.  Otherwise, it is
3444      seeded by a random seed.
3445    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3446      cropped area of the image must contain at least this fraction of any
3447      bounding box supplied. The value of this parameter should be non-negative.
3448      In the case of 0, the cropped area does not need to overlap any of the
3449      bounding boxes supplied.
3450    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3451      1.33]`. The cropped area of the image must have an aspect `ratio = width /
3452      height` within this range.
3453    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3454      cropped area of the image must contain a fraction of the supplied image
3455      within this range.
3456    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3457      generating a cropped region of the image of the specified constraints.
3458      After `max_attempts` failures, return the entire image.
3459    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3460      Controls behavior if no bounding boxes supplied. If true, assume an
3461      implicit bounding box covering the whole input. If false, raise an error.
3462    name: A name for the operation (optional).
3463
3464  Returns:
3465    A tuple of `Tensor` objects (begin, size, bboxes).
3466
3467    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3468    `[offset_height, offset_width, 0]`. Provide as input to
3469      `tf.slice`.
3470    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3471    `[target_height, target_width, -1]`. Provide as input to
3472      `tf.slice`.
3473    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3474    the distorted bounding box.
3475    Provide as input to `tf.image.draw_bounding_boxes`.
3476
3477  Raises:
3478    ValueError: If no seed is specified and op determinism is enabled.
3479  """
3480  if seed:
3481    seed1, seed2 = random_seed.get_seed(seed)
3482  else:
3483    if config.is_op_determinism_enabled():
3484      raise ValueError(
3485          f'tf.image.sample_distorted_bounding_box requires a non-zero seed to '
3486          f'be passed in when determinism is enabled, but got seed={seed}. '
3487          f'Please pass in a non-zero seed, e.g. by passing "seed=1".')
3488    seed1, seed2 = (0, 0)
3489  with ops.name_scope(name, 'sample_distorted_bounding_box'):
3490    return gen_image_ops.sample_distorted_bounding_box_v2(
3491        image_size,
3492        bounding_boxes,
3493        seed=seed1,
3494        seed2=seed2,
3495        min_object_covered=min_object_covered,
3496        aspect_ratio_range=aspect_ratio_range,
3497        area_range=area_range,
3498        max_attempts=max_attempts,
3499        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3500        name=name)
3501
3502
3503@tf_export('image.stateless_sample_distorted_bounding_box', v1=[])
3504@dispatch.add_dispatch_support
3505def stateless_sample_distorted_bounding_box(image_size,
3506                                            bounding_boxes,
3507                                            seed,
3508                                            min_object_covered=0.1,
3509                                            aspect_ratio_range=None,
3510                                            area_range=None,
3511                                            max_attempts=None,
3512                                            use_image_if_no_bounding_boxes=None,
3513                                            name=None):
3514  """Generate a randomly distorted bounding box for an image deterministically.
3515
3516  Bounding box annotations are often supplied in addition to ground-truth labels
3517  in image recognition or object localization tasks. A common technique for
3518  training such a system is to randomly distort an image while preserving
3519  its content, i.e. *data augmentation*. This Op, given the same `seed`,
3520  deterministically outputs a randomly distorted localization of an object, i.e.
3521  bounding box, given an `image_size`, `bounding_boxes` and a series of
3522  constraints.
3523
3524  The output of this Op is a single bounding box that may be used to crop the
3525  original image. The output is returned as 3 tensors: `begin`, `size` and
3526  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3527  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3528  visualize what the bounding box looks like.
3529
3530  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3531  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3532  and the height of the underlying image.
3533
3534  The output of this Op is guaranteed to be the same given the same `seed` and
3535  is independent of how many times the function is called, and independent of
3536  global seed settings (e.g. `tf.random.set_seed`).
3537
3538  Example usage:
3539
3540  >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]])
3541  >>> bbox = tf.constant(
3542  ...   [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
3543  >>> seed = (1, 2)
3544  >>> # Generate a single distorted bounding box.
3545  >>> bbox_begin, bbox_size, bbox_draw = (
3546  ...   tf.image.stateless_sample_distorted_bounding_box(
3547  ...     tf.shape(image), bounding_boxes=bbox, seed=seed))
3548  >>> # Employ the bounding box to distort the image.
3549  >>> tf.slice(image, bbox_begin, bbox_size)
3550  <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy=
3551  array([[[1],
3552          [2]],
3553         [[4],
3554          [5]]])>
3555  >>> # Draw the bounding box in an image summary.
3556  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
3557  >>> tf.image.draw_bounding_boxes(
3558  ...   tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors)
3559  <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
3560  array([[[[1.],
3561           [1.],
3562           [3.]],
3563          [[1.],
3564           [1.],
3565           [6.]],
3566          [[7.],
3567           [8.],
3568           [9.]]]], dtype=float32)>
3569
3570  Note that if no bounding box information is available, setting
3571  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3572  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3573  false and no bounding boxes are supplied, an error is raised.
3574
3575  Args:
3576    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3577      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3578    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3579      describing the N bounding boxes associated with the image.
3580    seed: A shape [2] Tensor, the seed to the random number generator. Must have
3581      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
3582    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3583      cropped area of the image must contain at least this fraction of any
3584      bounding box supplied. The value of this parameter should be non-negative.
3585      In the case of 0, the cropped area does not need to overlap any of the
3586      bounding boxes supplied.
3587    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3588      1.33]`. The cropped area of the image must have an aspect `ratio = width /
3589      height` within this range.
3590    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3591      cropped area of the image must contain a fraction of the supplied image
3592      within this range.
3593    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3594      generating a cropped region of the image of the specified constraints.
3595      After `max_attempts` failures, return the entire image.
3596    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3597      Controls behavior if no bounding boxes supplied. If true, assume an
3598      implicit bounding box covering the whole input. If false, raise an error.
3599    name: A name for the operation (optional).
3600
3601  Returns:
3602    A tuple of `Tensor` objects (begin, size, bboxes).
3603
3604    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3605    `[offset_height, offset_width, 0]`. Provide as input to
3606      `tf.slice`.
3607    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3608    `[target_height, target_width, -1]`. Provide as input to
3609      `tf.slice`.
3610    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3611    the distorted bounding box.
3612    Provide as input to `tf.image.draw_bounding_boxes`.
3613  """
3614  with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'):
3615    return gen_image_ops.stateless_sample_distorted_bounding_box(
3616        image_size=image_size,
3617        bounding_boxes=bounding_boxes,
3618        seed=seed,
3619        min_object_covered=min_object_covered,
3620        aspect_ratio_range=aspect_ratio_range,
3621        area_range=area_range,
3622        max_attempts=max_attempts,
3623        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3624        name=name)
3625
3626
3627@tf_export(v1=['image.sample_distorted_bounding_box'])
3628@dispatch.add_dispatch_support
3629@deprecation.deprecated(
3630    date=None,
3631    instructions='`seed2` arg is deprecated.'
3632    'Use sample_distorted_bounding_box_v2 instead.')
3633def sample_distorted_bounding_box(image_size,
3634                                  bounding_boxes,
3635                                  seed=None,
3636                                  seed2=None,
3637                                  min_object_covered=0.1,
3638                                  aspect_ratio_range=None,
3639                                  area_range=None,
3640                                  max_attempts=None,
3641                                  use_image_if_no_bounding_boxes=None,
3642                                  name=None):
3643  """Generate a single randomly distorted bounding box for an image.
3644
3645  Bounding box annotations are often supplied in addition to ground-truth labels
3646  in image recognition or object localization tasks. A common technique for
3647  training such a system is to randomly distort an image while preserving
3648  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3649  localization of an object, i.e. bounding box, given an `image_size`,
3650  `bounding_boxes` and a series of constraints.
3651
3652  The output of this Op is a single bounding box that may be used to crop the
3653  original image. The output is returned as 3 tensors: `begin`, `size` and
3654  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3655  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3656  visualize what the bounding box looks like.
3657
3658  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3659  The
3660  bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
3661  height of the underlying image.
3662
3663  For example,
3664
3665  ```python
3666      # Generate a single distorted bounding box.
3667      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3668          tf.shape(image),
3669          bounding_boxes=bounding_boxes,
3670          min_object_covered=0.1)
3671
3672      # Draw the bounding box in an image summary.
3673      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3674                                                    bbox_for_draw)
3675      tf.compat.v1.summary.image('images_with_box', image_with_box)
3676
3677      # Employ the bounding box to distort the image.
3678      distorted_image = tf.slice(image, begin, size)
3679  ```
3680
3681  Note that if no bounding box information is available, setting
3682  `use_image_if_no_bounding_boxes = True` will assume there is a single implicit
3683  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3684  false and no bounding boxes are supplied, an error is raised.
3685
3686  Args:
3687    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3688      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3689    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3690      describing the N bounding boxes associated with the image.
3691    seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are
3692      set to non-zero, the random number generator is seeded by the given
3693      `seed`.  Otherwise, it is seeded by a random seed.
3694    seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed
3695      collision.
3696    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3697      cropped area of the image must contain at least this fraction of any
3698      bounding box supplied. The value of this parameter should be non-negative.
3699      In the case of 0, the cropped area does not need to overlap any of the
3700      bounding boxes supplied.
3701    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3702      1.33]`. The cropped area of the image must have an aspect ratio = width /
3703      height within this range.
3704    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3705      cropped area of the image must contain a fraction of the supplied image
3706      within this range.
3707    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3708      generating a cropped region of the image of the specified constraints.
3709      After `max_attempts` failures, return the entire image.
3710    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3711      Controls behavior if no bounding boxes supplied. If true, assume an
3712      implicit bounding box covering the whole input. If false, raise an error.
3713    name: A name for the operation (optional).
3714
3715  Returns:
3716    A tuple of `Tensor` objects (begin, size, bboxes).
3717
3718    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3719    `[offset_height, offset_width, 0]`. Provide as input to
3720      `tf.slice`.
3721    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3722    `[target_height, target_width, -1]`. Provide as input to
3723      `tf.slice`.
3724    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3725    the distorted bounding box.
3726      Provide as input to `tf.image.draw_bounding_boxes`.
3727
3728  Raises:
3729    ValueError: If no seed is specified and op determinism is enabled.
3730  """
3731  if not seed and not seed2 and config.is_op_determinism_enabled():
3732    raise ValueError(
3733        f'tf.compat.v1.image.sample_distorted_bounding_box requires "seed" or '
3734        f'"seed2" to be non-zero when determinism is enabled. Please pass in '
3735        f'a non-zero seed, e.g. by passing "seed=1". Got seed={seed} and '
3736        f"seed2={seed2}")
3737  with ops.name_scope(name, 'sample_distorted_bounding_box'):
3738    return gen_image_ops.sample_distorted_bounding_box_v2(
3739        image_size,
3740        bounding_boxes,
3741        seed=seed,
3742        seed2=seed2,
3743        min_object_covered=min_object_covered,
3744        aspect_ratio_range=aspect_ratio_range,
3745        area_range=area_range,
3746        max_attempts=max_attempts,
3747        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3748        name=name)
3749
3750
3751@tf_export('image.non_max_suppression')
3752@dispatch.add_dispatch_support
3753def non_max_suppression(boxes,
3754                        scores,
3755                        max_output_size,
3756                        iou_threshold=0.5,
3757                        score_threshold=float('-inf'),
3758                        name=None):
3759  """Greedily selects a subset of bounding boxes in descending order of score.
3760
3761  Prunes away boxes that have high intersection-over-union (IOU) overlap
3762  with previously selected boxes.  Bounding boxes are supplied as
3763  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3764  diagonal pair of box corners and the coordinates can be provided as normalized
3765  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
3766  is agnostic to where the origin is in the coordinate system.  Note that this
3767  algorithm is invariant to orthogonal transformations and translations
3768  of the coordinate system; thus translating or reflections of the coordinate
3769  system result in the same boxes being selected by the algorithm.
3770  The output of this operation is a set of integers indexing into the input
3771  collection of bounding boxes representing the selected boxes.  The bounding
3772  box coordinates corresponding to the selected indices can then be obtained
3773  using the `tf.gather` operation.  For example:
3774    ```python
3775    selected_indices = tf.image.non_max_suppression(
3776        boxes, scores, max_output_size, iou_threshold)
3777    selected_boxes = tf.gather(boxes, selected_indices)
3778    ```
3779
3780  Args:
3781    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3782    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3783      score corresponding to each box (each row of boxes).
3784    max_output_size: A scalar integer `Tensor` representing the maximum number
3785      of boxes to be selected by non-max suppression.
3786    iou_threshold: A 0-D float tensor representing the threshold for deciding
3787      whether boxes overlap too much with respect to IOU.
3788    score_threshold: A 0-D float tensor representing the threshold for deciding
3789      when to remove boxes based on score.
3790    name: A name for the operation (optional).
3791
3792  Returns:
3793    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3794      selected indices from the boxes tensor, where `M <= max_output_size`.
3795  """
3796  with ops.name_scope(name, 'non_max_suppression'):
3797    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3798    score_threshold = ops.convert_to_tensor(
3799        score_threshold, name='score_threshold')
3800    return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size,
3801                                                iou_threshold, score_threshold)
3802
3803
3804@tf_export('image.non_max_suppression_with_scores')
3805@dispatch.add_dispatch_support
3806def non_max_suppression_with_scores(boxes,
3807                                    scores,
3808                                    max_output_size,
3809                                    iou_threshold=0.5,
3810                                    score_threshold=float('-inf'),
3811                                    soft_nms_sigma=0.0,
3812                                    name=None):
3813  """Greedily selects a subset of bounding boxes in descending order of score.
3814
3815  Prunes away boxes that have high intersection-over-union (IOU) overlap
3816  with previously selected boxes.  Bounding boxes are supplied as
3817  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3818  diagonal pair of box corners and the coordinates can be provided as normalized
3819  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
3820  is agnostic to where the origin is in the coordinate system.  Note that this
3821  algorithm is invariant to orthogonal transformations and translations
3822  of the coordinate system; thus translating or reflections of the coordinate
3823  system result in the same boxes being selected by the algorithm.
3824  The output of this operation is a set of integers indexing into the input
3825  collection of bounding boxes representing the selected boxes.  The bounding
3826  box coordinates corresponding to the selected indices can then be obtained
3827  using the `tf.gather` operation.  For example:
3828    ```python
3829    selected_indices, selected_scores = tf.image.non_max_suppression_padded(
3830        boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1,
3831        soft_nms_sigma=0.5)
3832    selected_boxes = tf.gather(boxes, selected_indices)
3833    ```
3834
3835  This function generalizes the `tf.image.non_max_suppression` op by also
3836  supporting a Soft-NMS (with Gaussian weighting) mode (c.f.
3837  Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
3838  of other overlapping boxes instead of directly causing them to be pruned.
3839  Consequently, in contrast to `tf.image.non_max_suppression`,
3840  `tf.image.non_max_suppression_with_scores` returns the new scores of each
3841  input box in the second output, `selected_scores`.
3842
3843  To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
3844  larger than 0.  When `soft_nms_sigma` equals 0, the behavior of
3845  `tf.image.non_max_suppression_with_scores` is identical to that of
3846  `tf.image.non_max_suppression` (except for the extra output) both in function
3847  and in running time.
3848
3849  Note that when `soft_nms_sigma` > 0, Soft-NMS is performed and `iou_threshold`
3850  is ignored. `iou_threshold` is only used for standard NMS.
3851
3852  Args:
3853    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3854    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3855      score corresponding to each box (each row of boxes).
3856    max_output_size: A scalar integer `Tensor` representing the maximum number
3857      of boxes to be selected by non-max suppression.
3858    iou_threshold: A 0-D float tensor representing the threshold for deciding
3859      whether boxes overlap too much with respect to IOU.
3860    score_threshold: A 0-D float tensor representing the threshold for deciding
3861      when to remove boxes based on score.
3862    soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft
3863      NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503).  When
3864      `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
3865      NMS.
3866    name: A name for the operation (optional).
3867
3868  Returns:
3869    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3870      selected indices from the boxes tensor, where `M <= max_output_size`.
3871    selected_scores: A 1-D float tensor of shape `[M]` representing the
3872      corresponding scores for each selected box, where `M <= max_output_size`.
3873      Scores only differ from corresponding input scores when using Soft NMS
3874      (i.e. when `soft_nms_sigma>0`)
3875  """
3876  with ops.name_scope(name, 'non_max_suppression_with_scores'):
3877    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3878    score_threshold = ops.convert_to_tensor(
3879        score_threshold, name='score_threshold')
3880    soft_nms_sigma = ops.convert_to_tensor(
3881        soft_nms_sigma, name='soft_nms_sigma')
3882    (selected_indices, selected_scores,
3883     _) = gen_image_ops.non_max_suppression_v5(
3884         boxes,
3885         scores,
3886         max_output_size,
3887         iou_threshold,
3888         score_threshold,
3889         soft_nms_sigma,
3890         pad_to_max_output_size=False)
3891    return selected_indices, selected_scores
3892
3893
3894@tf_export('image.non_max_suppression_overlaps')
3895@dispatch.add_dispatch_support
3896def non_max_suppression_with_overlaps(overlaps,
3897                                      scores,
3898                                      max_output_size,
3899                                      overlap_threshold=0.5,
3900                                      score_threshold=float('-inf'),
3901                                      name=None):
3902  """Greedily selects a subset of bounding boxes in descending order of score.
3903
3904  Prunes away boxes that have high overlap with previously selected boxes.
3905  N-by-n overlap values are supplied as square matrix.
3906  The output of this operation is a set of integers indexing into the input
3907  collection of bounding boxes representing the selected boxes.  The bounding
3908  box coordinates corresponding to the selected indices can then be obtained
3909  using the `tf.gather` operation.  For example:
3910    ```python
3911    selected_indices = tf.image.non_max_suppression_overlaps(
3912        overlaps, scores, max_output_size, iou_threshold)
3913    selected_boxes = tf.gather(boxes, selected_indices)
3914    ```
3915
3916  Args:
3917    overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`
3918      representing the n-by-n box overlap values.
3919    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3920      score corresponding to each box (each row of boxes).
3921    max_output_size: A scalar integer `Tensor` representing the maximum number
3922      of boxes to be selected by non-max suppression.
3923    overlap_threshold: A 0-D float tensor representing the threshold for
3924      deciding whether boxes overlap too much with respect to the provided
3925      overlap values.
3926    score_threshold: A 0-D float tensor representing the threshold for deciding
3927      when to remove boxes based on score.
3928    name: A name for the operation (optional).
3929
3930  Returns:
3931    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3932      selected indices from the overlaps tensor, where `M <= max_output_size`.
3933  """
3934  with ops.name_scope(name, 'non_max_suppression_overlaps'):
3935    overlap_threshold = ops.convert_to_tensor(
3936        overlap_threshold, name='overlap_threshold')
3937    # pylint: disable=protected-access
3938    return gen_image_ops.non_max_suppression_with_overlaps(
3939        overlaps, scores, max_output_size, overlap_threshold, score_threshold)
3940    # pylint: enable=protected-access
3941
3942
3943_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115],
3944                      [0.587, -0.27455667, -0.52273617],
3945                      [0.114, -0.32134392, 0.31119955]]
3946
3947
3948@tf_export('image.rgb_to_yiq')
3949@dispatch.add_dispatch_support
3950def rgb_to_yiq(images):
3951  """Converts one or more images from RGB to YIQ.
3952
3953  Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
3954  value of the pixels.
3955  The output is only well defined if the value in images are in [0,1].
3956
3957  Usage Example:
3958
3959  >>> x = tf.constant([[[1.0, 2.0, 3.0]]])
3960  >>> tf.image.rgb_to_yiq(x)
3961  <tf.Tensor: shape=(1, 1, 3), dtype=float32,
3962  numpy=array([[[ 1.815     , -0.91724455,  0.09962624]]], dtype=float32)>
3963
3964  Args:
3965    images: 2-D or higher rank. Image data to convert. Last dimension must be
3966      size 3.
3967
3968  Returns:
3969    images: tensor with the same shape as `images`.
3970  """
3971  images = ops.convert_to_tensor(images, name='images')
3972  kernel = ops.convert_to_tensor(
3973      _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')
3974  ndims = images.get_shape().ndims
3975  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3976
3977
3978_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],
3979                      [0.6208248, -0.64720424, 1.70423049]]
3980
3981
3982@tf_export('image.yiq_to_rgb')
3983@dispatch.add_dispatch_support
3984def yiq_to_rgb(images):
3985  """Converts one or more images from YIQ to RGB.
3986
3987  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
3988  value of the pixels.
3989  The output is only well defined if the Y value in images are in [0,1],
3990  I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].
3991
3992  Args:
3993    images: 2-D or higher rank. Image data to convert. Last dimension must be
3994      size 3.
3995
3996  Returns:
3997    images: tensor with the same shape as `images`.
3998  """
3999  images = ops.convert_to_tensor(images, name='images')
4000  kernel = ops.convert_to_tensor(
4001      _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')
4002  ndims = images.get_shape().ndims
4003  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
4004
4005
4006_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538],
4007                      [0.587, -0.28886916, -0.51496512],
4008                      [0.114, 0.43601035, -0.10001026]]
4009
4010
4011@tf_export('image.rgb_to_yuv')
4012@dispatch.add_dispatch_support
4013def rgb_to_yuv(images):
4014  """Converts one or more images from RGB to YUV.
4015
4016  Outputs a tensor of the same shape as the `images` tensor, containing the YUV
4017  value of the pixels.
4018  The output is only well defined if the value in images are in [0, 1].
4019  There are two ways of representing an image: [0, 255] pixel values range or
4020  [0, 1] (as float) pixel values range. Users need to convert the input image
4021  into a float [0, 1] range.
4022
4023  Args:
4024    images: 2-D or higher rank. Image data to convert. Last dimension must be
4025      size 3.
4026
4027  Returns:
4028    images: tensor with the same shape as `images`.
4029  """
4030  images = ops.convert_to_tensor(images, name='images')
4031  kernel = ops.convert_to_tensor(
4032      _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')
4033  ndims = images.get_shape().ndims
4034  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
4035
4036
4037_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],
4038                      [1.13988303, -0.58062185, 0]]
4039
4040
4041@tf_export('image.yuv_to_rgb')
4042@dispatch.add_dispatch_support
4043def yuv_to_rgb(images):
4044  """Converts one or more images from YUV to RGB.
4045
4046  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
4047  value of the pixels.
4048  The output is only well defined if the Y value in images are in [0,1],
4049  U and V value are in [-0.5,0.5].
4050
4051  As per the above description, you need to scale your YUV images if their
4052  pixel values are not in the required range. Below given example illustrates
4053  preprocessing of each channel of images before feeding them to `yuv_to_rgb`.
4054
4055  ```python
4056  yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255)
4057  last_dimension_axis = len(yuv_images.shape) - 1
4058  yuv_tensor_images = tf.truediv(
4059      tf.subtract(
4060          yuv_images,
4061          tf.reduce_min(yuv_images)
4062      ),
4063      tf.subtract(
4064          tf.reduce_max(yuv_images),
4065          tf.reduce_min(yuv_images)
4066       )
4067  )
4068  y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis)
4069  target_uv_min, target_uv_max = -0.5, 0.5
4070  u = u * (target_uv_max - target_uv_min) + target_uv_min
4071  v = v * (target_uv_max - target_uv_min) + target_uv_min
4072  preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis)
4073  rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images)
4074  ```
4075
4076  Args:
4077    images: 2-D or higher rank. Image data to convert. Last dimension must be
4078      size 3.
4079
4080  Returns:
4081    images: tensor with the same shape as `images`.
4082  """
4083  images = ops.convert_to_tensor(images, name='images')
4084  kernel = ops.convert_to_tensor(
4085      _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')
4086  ndims = images.get_shape().ndims
4087  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
4088
4089
4090def _verify_compatible_image_shapes(img1, img2):
4091  """Checks if two image tensors are compatible for applying SSIM or PSNR.
4092
4093  This function checks if two sets of images have ranks at least 3, and if the
4094  last three dimensions match.
4095
4096  Args:
4097    img1: Tensor containing the first image batch.
4098    img2: Tensor containing the second image batch.
4099
4100  Returns:
4101    A tuple containing: the first tensor shape, the second tensor shape, and a
4102    list of control_flow_ops.Assert() ops implementing the checks.
4103
4104  Raises:
4105    ValueError: When static shape check fails.
4106  """
4107  shape1 = img1.get_shape().with_rank_at_least(3)
4108  shape2 = img2.get_shape().with_rank_at_least(3)
4109  shape1[-3:].assert_is_compatible_with(shape2[-3:])
4110
4111  if shape1.ndims is not None and shape2.ndims is not None:
4112    for dim1, dim2 in zip(
4113        reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])):
4114      if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
4115        raise ValueError('Two images are not compatible: %s and %s' %
4116                         (shape1, shape2))
4117
4118  # Now assign shape tensors.
4119  shape1, shape2 = array_ops.shape_n([img1, img2])
4120
4121  # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
4122  checks = []
4123  checks.append(
4124      control_flow_ops.Assert(
4125          math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2],
4126          summarize=10))
4127  checks.append(
4128      control_flow_ops.Assert(
4129          math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
4130          [shape1, shape2],
4131          summarize=10))
4132  return shape1, shape2, checks
4133
4134
4135@tf_export('image.psnr')
4136@dispatch.add_dispatch_support
4137def psnr(a, b, max_val, name=None):
4138  """Returns the Peak Signal-to-Noise Ratio between a and b.
4139
4140  This is intended to be used on signals (or images). Produces a PSNR value for
4141  each image in batch.
4142
4143  The last three dimensions of input are expected to be [height, width, depth].
4144
4145  Example:
4146
4147  ```python
4148      # Read images from file.
4149      im1 = tf.decode_png('path/to/im1.png')
4150      im2 = tf.decode_png('path/to/im2.png')
4151      # Compute PSNR over tf.uint8 Tensors.
4152      psnr1 = tf.image.psnr(im1, im2, max_val=255)
4153
4154      # Compute PSNR over tf.float32 Tensors.
4155      im1 = tf.image.convert_image_dtype(im1, tf.float32)
4156      im2 = tf.image.convert_image_dtype(im2, tf.float32)
4157      psnr2 = tf.image.psnr(im1, im2, max_val=1.0)
4158      # psnr1 and psnr2 both have type tf.float32 and are almost equal.
4159  ```
4160
4161  Args:
4162    a: First set of images.
4163    b: Second set of images.
4164    max_val: The dynamic range of the images (i.e., the difference between the
4165      maximum the and minimum allowed values).
4166    name: Namespace to embed the computation in.
4167
4168  Returns:
4169    The scalar PSNR between a and b. The returned tensor has type `tf.float32`
4170    and shape [batch_size, 1].
4171  """
4172  with ops.name_scope(name, 'PSNR', [a, b]):
4173    # Need to convert the images to float32.  Scale max_val accordingly so that
4174    # PSNR is computed correctly.
4175    max_val = math_ops.cast(max_val, a.dtype)
4176    max_val = convert_image_dtype(max_val, dtypes.float32)
4177    a = convert_image_dtype(a, dtypes.float32)
4178    b = convert_image_dtype(b, dtypes.float32)
4179    mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1])
4180    psnr_val = math_ops.subtract(
4181        20 * math_ops.log(max_val) / math_ops.log(10.0),
4182        np.float32(10 / np.log(10)) * math_ops.log(mse),
4183        name='psnr')
4184
4185    _, _, checks = _verify_compatible_image_shapes(a, b)
4186    with ops.control_dependencies(checks):
4187      return array_ops.identity(psnr_val)
4188
4189
4190def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03):
4191  r"""Helper function for computing SSIM.
4192
4193  SSIM estimates covariances with weighted sums.  The default parameters
4194  use a biased estimate of the covariance:
4195  Suppose `reducer` is a weighted sum, then the mean estimators are
4196    \mu_x = \sum_i w_i x_i,
4197    \mu_y = \sum_i w_i y_i,
4198  where w_i's are the weighted-sum weights, and covariance estimator is
4199    cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4200  with assumption \sum_i w_i = 1. This covariance estimator is biased, since
4201    E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y).
4202  For SSIM measure with unbiased covariance estimators, pass as `compensation`
4203  argument (1 - \sum_i w_i ^ 2).
4204
4205  Args:
4206    x: First set of images.
4207    y: Second set of images.
4208    reducer: Function that computes 'local' averages from the set of images. For
4209      non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and
4210      for convolutional version, this is usually tf.nn.avg_pool2d or
4211      tf.nn.conv2d with weighted-sum kernel.
4212    max_val: The dynamic range (i.e., the difference between the maximum
4213      possible allowed value and the minimum allowed value).
4214    compensation: Compensation factor. See above.
4215    k1: Default value 0.01
4216    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4217      it would be better if we took the values in the range of 0 < K2 < 0.4).
4218
4219  Returns:
4220    A pair containing the luminance measure, and the contrast-structure measure.
4221  """
4222
4223  c1 = (k1 * max_val)**2
4224  c2 = (k2 * max_val)**2
4225
4226  # SSIM luminance measure is
4227  # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).
4228  mean0 = reducer(x)
4229  mean1 = reducer(y)
4230  num0 = mean0 * mean1 * 2.0
4231  den0 = math_ops.square(mean0) + math_ops.square(mean1)
4232  luminance = (num0 + c1) / (den0 + c1)
4233
4234  # SSIM contrast-structure measure is
4235  #   (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2).
4236  # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then
4237  #   cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4238  #          = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).
4239  num1 = reducer(x * y) * 2.0
4240  den1 = reducer(math_ops.square(x) + math_ops.square(y))
4241  c2 *= compensation
4242  cs = (num1 - num0 + c2) / (den1 - den0 + c2)
4243
4244  # SSIM score is the product of the luminance and contrast-structure measures.
4245  return luminance, cs
4246
4247
4248def _fspecial_gauss(size, sigma):
4249  """Function to mimic the 'fspecial' gaussian MATLAB function."""
4250  size = ops.convert_to_tensor(size, dtypes.int32)
4251  sigma = ops.convert_to_tensor(sigma)
4252
4253  coords = math_ops.cast(math_ops.range(size), sigma.dtype)
4254  coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0
4255
4256  g = math_ops.square(coords)
4257  g *= -0.5 / math_ops.square(sigma)
4258
4259  g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1])
4260  g = array_ops.reshape(g, shape=[1, -1])  # For tf.nn.softmax().
4261  g = nn_ops.softmax(g)
4262  return array_ops.reshape(g, shape=[size, size, 1, 1])
4263
4264
4265def _ssim_per_channel(img1,
4266                      img2,
4267                      max_val=1.0,
4268                      filter_size=11,
4269                      filter_sigma=1.5,
4270                      k1=0.01,
4271                      k2=0.03,
4272                      return_index_map=False):
4273  """Computes SSIM index between img1 and img2 per color channel.
4274
4275  This function matches the standard SSIM implementation from:
4276  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4277  quality assessment: from error visibility to structural similarity. IEEE
4278  transactions on image processing.
4279
4280  Details:
4281    - 11x11 Gaussian filter of width 1.5 is used.
4282    - k1 = 0.01, k2 = 0.03 as in the original paper.
4283
4284  Args:
4285    img1: First image batch.
4286    img2: Second image batch.
4287    max_val: The dynamic range of the images (i.e., the difference between the
4288      maximum the and minimum allowed values).
4289    filter_size: Default value 11 (size of gaussian filter).
4290    filter_sigma: Default value 1.5 (width of gaussian filter).
4291    k1: Default value 0.01
4292    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4293      it would be better if we took the values in the range of 0 < K2 < 0.4).
4294    return_index_map: If True returns local SSIM map instead of the global mean.
4295
4296  Returns:
4297    A pair of tensors containing and channel-wise SSIM and contrast-structure
4298    values. The shape is [..., channels].
4299  """
4300  filter_size = constant_op.constant(filter_size, dtype=dtypes.int32)
4301  filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype)
4302
4303  shape1, shape2 = array_ops.shape_n([img1, img2])
4304  checks = [
4305      control_flow_ops.Assert(
4306          math_ops.reduce_all(
4307              math_ops.greater_equal(shape1[-3:-1], filter_size)),
4308          [shape1, filter_size],
4309          summarize=8),
4310      control_flow_ops.Assert(
4311          math_ops.reduce_all(
4312              math_ops.greater_equal(shape2[-3:-1], filter_size)),
4313          [shape2, filter_size],
4314          summarize=8)
4315  ]
4316
4317  # Enforce the check to run before computation.
4318  with ops.control_dependencies(checks):
4319    img1 = array_ops.identity(img1)
4320
4321  # TODO(sjhwang): Try to cache kernels and compensation factor.
4322  kernel = _fspecial_gauss(filter_size, filter_sigma)
4323  kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1])
4324
4325  # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`,
4326  # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead.
4327  compensation = 1.0
4328
4329  # TODO(sjhwang): Try FFT.
4330  # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying
4331  #   1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter.
4332  def reducer(x):
4333    shape = array_ops.shape(x)
4334    x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))
4335    y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
4336    return array_ops.reshape(
4337        y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0))
4338
4339  luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1,
4340                               k2)
4341
4342  # Average over the second and the third from the last: height, width.
4343  if return_index_map:
4344    ssim_val = luminance * cs
4345  else:
4346    axes = constant_op.constant([-3, -2], dtype=dtypes.int32)
4347    ssim_val = math_ops.reduce_mean(luminance * cs, axes)
4348    cs = math_ops.reduce_mean(cs, axes)
4349  return ssim_val, cs
4350
4351
4352@tf_export('image.ssim')
4353@dispatch.add_dispatch_support
4354def ssim(img1,
4355         img2,
4356         max_val,
4357         filter_size=11,
4358         filter_sigma=1.5,
4359         k1=0.01,
4360         k2=0.03,
4361         return_index_map=False):
4362  """Computes SSIM index between img1 and img2.
4363
4364  This function is based on the standard SSIM implementation from:
4365  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4366  quality assessment: from error visibility to structural similarity. IEEE
4367  transactions on image processing.
4368
4369  Note: The true SSIM is only defined on grayscale.  This function does not
4370  perform any colorspace transform.  (If the input is already YUV, then it will
4371  compute YUV SSIM average.)
4372
4373  Details:
4374    - 11x11 Gaussian filter of width 1.5 is used.
4375    - k1 = 0.01, k2 = 0.03 as in the original paper.
4376
4377  The image sizes must be at least 11x11 because of the filter size.
4378
4379  Example:
4380
4381  ```python
4382      # Read images (of size 255 x 255) from file.
4383      im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png'))
4384      im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png'))
4385      tf.shape(im1)  # `img1.png` has 3 channels; shape is `(255, 255, 3)`
4386      tf.shape(im2)  # `img2.png` has 3 channels; shape is `(255, 255, 3)`
4387      # Add an outer batch for each image.
4388      im1 = tf.expand_dims(im1, axis=0)
4389      im2 = tf.expand_dims(im2, axis=0)
4390      # Compute SSIM over tf.uint8 Tensors.
4391      ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11,
4392                            filter_sigma=1.5, k1=0.01, k2=0.03)
4393
4394      # Compute SSIM over tf.float32 Tensors.
4395      im1 = tf.image.convert_image_dtype(im1, tf.float32)
4396      im2 = tf.image.convert_image_dtype(im2, tf.float32)
4397      ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11,
4398                            filter_sigma=1.5, k1=0.01, k2=0.03)
4399      # ssim1 and ssim2 both have type tf.float32 and are almost equal.
4400  ```
4401
4402  Args:
4403    img1: First image batch. 4-D Tensor of shape `[batch, height, width,
4404      channels]` with only Positive Pixel Values.
4405    img2: Second image batch. 4-D Tensor of shape `[batch, height, width,
4406      channels]` with only Positive Pixel Values.
4407    max_val: The dynamic range of the images (i.e., the difference between the
4408      maximum the and minimum allowed values).
4409    filter_size: Default value 11 (size of gaussian filter).
4410    filter_sigma: Default value 1.5 (width of gaussian filter).
4411    k1: Default value 0.01
4412    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4413      it would be better if we took the values in the range of 0 < K2 < 0.4).
4414    return_index_map: If True returns local SSIM map instead of the global mean.
4415
4416  Returns:
4417    A tensor containing an SSIM value for each image in batch or a tensor
4418    containing an SSIM value for each pixel for each image in batch if
4419    return_index_map is True. Returned SSIM values are in range (-1, 1], when
4420    pixel values are non-negative. Returns a tensor with shape:
4421    broadcast(img1.shape[:-3], img2.shape[:-3]) or broadcast(img1.shape[:-1],
4422    img2.shape[:-1]).
4423  """
4424  with ops.name_scope(None, 'SSIM', [img1, img2]):
4425    # Convert to tensor if needed.
4426    img1 = ops.convert_to_tensor(img1, name='img1')
4427    img2 = ops.convert_to_tensor(img2, name='img2')
4428    # Shape checking.
4429    _, _, checks = _verify_compatible_image_shapes(img1, img2)
4430    with ops.control_dependencies(checks):
4431      img1 = array_ops.identity(img1)
4432
4433    # Need to convert the images to float32.  Scale max_val accordingly so that
4434    # SSIM is computed correctly.
4435    max_val = math_ops.cast(max_val, img1.dtype)
4436    max_val = convert_image_dtype(max_val, dtypes.float32)
4437    img1 = convert_image_dtype(img1, dtypes.float32)
4438    img2 = convert_image_dtype(img2, dtypes.float32)
4439    ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size,
4440                                            filter_sigma, k1, k2,
4441                                            return_index_map)
4442    # Compute average over color channels.
4443    return math_ops.reduce_mean(ssim_per_channel, [-1])
4444
4445
4446# Default values obtained by Wang et al.
4447_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
4448
4449
4450@tf_export('image.ssim_multiscale')
4451@dispatch.add_dispatch_support
4452def ssim_multiscale(img1,
4453                    img2,
4454                    max_val,
4455                    power_factors=_MSSSIM_WEIGHTS,
4456                    filter_size=11,
4457                    filter_sigma=1.5,
4458                    k1=0.01,
4459                    k2=0.03):
4460  """Computes the MS-SSIM between img1 and img2.
4461
4462  This function assumes that `img1` and `img2` are image batches, i.e. the last
4463  three dimensions are [height, width, channels].
4464
4465  Note: The true SSIM is only defined on grayscale.  This function does not
4466  perform any colorspace transform.  (If the input is already YUV, then it will
4467  compute YUV SSIM average.)
4468
4469  Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
4470  structural similarity for image quality assessment." Signals, Systems and
4471  Computers, 2004.
4472
4473  Args:
4474    img1: First image batch with only Positive Pixel Values.
4475    img2: Second image batch with only Positive Pixel Values. Must have the
4476    same rank as img1.
4477    max_val: The dynamic range of the images (i.e., the difference between the
4478      maximum the and minimum allowed values).
4479    power_factors: Iterable of weights for each of the scales. The number of
4480      scales used is the length of the list. Index 0 is the unscaled
4481      resolution's weight and each increasing scale corresponds to the image
4482      being downsampled by 2.  Defaults to (0.0448, 0.2856, 0.3001, 0.2363,
4483      0.1333), which are the values obtained in the original paper.
4484    filter_size: Default value 11 (size of gaussian filter).
4485    filter_sigma: Default value 1.5 (width of gaussian filter).
4486    k1: Default value 0.01
4487    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4488      it would be better if we took the values in the range of 0 < K2 < 0.4).
4489
4490  Returns:
4491    A tensor containing an MS-SSIM value for each image in batch.  The values
4492    are in range [0, 1].  Returns a tensor with shape:
4493    broadcast(img1.shape[:-3], img2.shape[:-3]).
4494  """
4495  with ops.name_scope(None, 'MS-SSIM', [img1, img2]):
4496    # Convert to tensor if needed.
4497    img1 = ops.convert_to_tensor(img1, name='img1')
4498    img2 = ops.convert_to_tensor(img2, name='img2')
4499    # Shape checking.
4500    shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2)
4501    with ops.control_dependencies(checks):
4502      img1 = array_ops.identity(img1)
4503
4504    # Need to convert the images to float32.  Scale max_val accordingly so that
4505    # SSIM is computed correctly.
4506    max_val = math_ops.cast(max_val, img1.dtype)
4507    max_val = convert_image_dtype(max_val, dtypes.float32)
4508    img1 = convert_image_dtype(img1, dtypes.float32)
4509    img2 = convert_image_dtype(img2, dtypes.float32)
4510
4511    imgs = [img1, img2]
4512    shapes = [shape1, shape2]
4513
4514    # img1 and img2 are assumed to be a (multi-dimensional) batch of
4515    # 3-dimensional images (height, width, channels). `heads` contain the batch
4516    # dimensions, and `tails` contain the image dimensions.
4517    heads = [s[:-3] for s in shapes]
4518    tails = [s[-3:] for s in shapes]
4519
4520    divisor = [1, 2, 2, 1]
4521    divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32)
4522
4523    def do_pad(images, remainder):
4524      padding = array_ops.expand_dims(remainder, -1)
4525      padding = array_ops.pad(padding, [[1, 0], [1, 0]])
4526      return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images]
4527
4528    mcs = []
4529    for k in range(len(power_factors)):
4530      with ops.name_scope(None, 'Scale%d' % k, imgs):
4531        if k > 0:
4532          # Avg pool takes rank 4 tensors. Flatten leading dimensions.
4533          flat_imgs = [
4534              array_ops.reshape(x, array_ops.concat([[-1], t], 0))
4535              for x, t in zip(imgs, tails)
4536          ]
4537
4538          remainder = tails[0] % divisor_tensor
4539          need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0))
4540          # pylint: disable=cell-var-from-loop
4541          padded = control_flow_ops.cond(need_padding,
4542                                         lambda: do_pad(flat_imgs, remainder),
4543                                         lambda: flat_imgs)
4544          # pylint: enable=cell-var-from-loop
4545
4546          downscaled = [
4547              nn_ops.avg_pool(
4548                  x, ksize=divisor, strides=divisor, padding='VALID')
4549              for x in padded
4550          ]
4551          tails = [x[1:] for x in array_ops.shape_n(downscaled)]
4552          imgs = [
4553              array_ops.reshape(x, array_ops.concat([h, t], 0))
4554              for x, h, t in zip(downscaled, heads, tails)
4555          ]
4556
4557        # Overwrite previous ssim value since we only need the last one.
4558        ssim_per_channel, cs = _ssim_per_channel(
4559            *imgs,
4560            max_val=max_val,
4561            filter_size=filter_size,
4562            filter_sigma=filter_sigma,
4563            k1=k1,
4564            k2=k2)
4565        mcs.append(nn_ops.relu(cs))
4566
4567    # Remove the cs score for the last scale. In the MS-SSIM calculation,
4568    # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).
4569    mcs.pop()  # Remove the cs score for the last scale.
4570    mcs_and_ssim = array_ops.stack(
4571        mcs + [nn_ops.relu(ssim_per_channel)], axis=-1)
4572    # Take weighted geometric mean across the scale axis.
4573    ms_ssim = math_ops.reduce_prod(
4574        math_ops.pow(mcs_and_ssim, power_factors), [-1])
4575
4576    return math_ops.reduce_mean(ms_ssim, [-1])  # Avg over color channels.
4577
4578
4579@tf_export('image.image_gradients')
4580@dispatch.add_dispatch_support
4581def image_gradients(image):
4582  """Returns image gradients (dy, dx) for each color channel.
4583
4584  Both output tensors have the same shape as the input: [batch_size, h, w,
4585  d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in
4586  location (x, y). That means that dy will always have zeros in the last row,
4587  and dx will always have zeros in the last column.
4588
4589  Usage Example:
4590    ```python
4591    BATCH_SIZE = 1
4592    IMAGE_HEIGHT = 5
4593    IMAGE_WIDTH = 5
4594    CHANNELS = 1
4595    image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,
4596      delta=1, dtype=tf.float32),
4597      shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
4598    dy, dx = tf.image.image_gradients(image)
4599    print(image[0, :,:,0])
4600    tf.Tensor(
4601      [[ 0.  1.  2.  3.  4.]
4602      [ 5.  6.  7.  8.  9.]
4603      [10. 11. 12. 13. 14.]
4604      [15. 16. 17. 18. 19.]
4605      [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32)
4606    print(dy[0, :,:,0])
4607    tf.Tensor(
4608      [[5. 5. 5. 5. 5.]
4609      [5. 5. 5. 5. 5.]
4610      [5. 5. 5. 5. 5.]
4611      [5. 5. 5. 5. 5.]
4612      [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32)
4613    print(dx[0, :,:,0])
4614    tf.Tensor(
4615      [[1. 1. 1. 1. 0.]
4616      [1. 1. 1. 1. 0.]
4617      [1. 1. 1. 1. 0.]
4618      [1. 1. 1. 1. 0.]
4619      [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32)
4620    ```
4621
4622  Args:
4623    image: Tensor with shape [batch_size, h, w, d].
4624
4625  Returns:
4626    Pair of tensors (dy, dx) holding the vertical and horizontal image
4627    gradients (1-step finite difference).
4628
4629  Raises:
4630    ValueError: If `image` is not a 4D tensor.
4631  """
4632  if image.get_shape().ndims != 4:
4633    raise ValueError('image_gradients expects a 4D tensor '
4634                     '[batch_size, h, w, d], not {}.'.format(image.get_shape()))
4635  image_shape = array_ops.shape(image)
4636  batch_size, height, width, depth = array_ops.unstack(image_shape)
4637  dy = image[:, 1:, :, :] - image[:, :-1, :, :]
4638  dx = image[:, :, 1:, :] - image[:, :, :-1, :]
4639
4640  # Return tensors with same size as original image by concatenating
4641  # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y).
4642  shape = array_ops.stack([batch_size, 1, width, depth])
4643  dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1)
4644  dy = array_ops.reshape(dy, image_shape)
4645
4646  shape = array_ops.stack([batch_size, height, 1, depth])
4647  dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2)
4648  dx = array_ops.reshape(dx, image_shape)
4649
4650  return dy, dx
4651
4652
4653@tf_export('image.sobel_edges')
4654@dispatch.add_dispatch_support
4655def sobel_edges(image):
4656  """Returns a tensor holding Sobel edge maps.
4657
4658  Example usage:
4659
4660  For general usage, `image` would be loaded from a file as below:
4661
4662  ```python
4663  image_bytes = tf.io.read_file(path_to_image_file)
4664  image = tf.image.decode_image(image_bytes)
4665  image = tf.cast(image, tf.float32)
4666  image = tf.expand_dims(image, 0)
4667  ```
4668  But for demo purposes, we are using randomly generated values for `image`:
4669
4670  >>> image = tf.random.uniform(
4671  ...   maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32)
4672  >>> sobel = tf.image.sobel_edges(image)
4673  >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction
4674  >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction
4675
4676  For displaying the sobel results, PIL's [Image Module](
4677  https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used:
4678
4679  ```python
4680  # Display edge maps for the first channel (at index 0)
4681  Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show()
4682  Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show()
4683  ```
4684
4685  Args:
4686    image: Image tensor with shape [batch_size, h, w, d] and type float32 or
4687      float64.  The image(s) must be 2x2 or larger.
4688
4689  Returns:
4690    Tensor holding edge maps for each channel. Returns a tensor with shape
4691    [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]],
4692    [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter.
4693  """
4694  # Define vertical and horizontal Sobel filters.
4695  static_image_shape = image.get_shape()
4696  image_shape = array_ops.shape(image)
4697  kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
4698             [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]
4699  num_kernels = len(kernels)
4700  kernels = np.transpose(np.asarray(kernels), (1, 2, 0))
4701  kernels = np.expand_dims(kernels, -2)
4702  kernels_tf = constant_op.constant(kernels, dtype=image.dtype)
4703
4704  kernels_tf = array_ops.tile(
4705      kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters')
4706
4707  # Use depth-wise convolution to calculate edge maps per channel.
4708  pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]
4709  padded = array_ops.pad(image, pad_sizes, mode='REFLECT')
4710
4711  # Output tensor has shape [batch_size, h, w, d * num_kernels].
4712  strides = [1, 1, 1, 1]
4713  output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID')
4714
4715  # Reshape to [batch_size, h, w, d, num_kernels].
4716  shape = array_ops.concat([image_shape, [num_kernels]], 0)
4717  output = array_ops.reshape(output, shape=shape)
4718  output.set_shape(static_image_shape.concatenate([num_kernels]))
4719  return output
4720
4721
4722def resize_bicubic(images,
4723                   size,
4724                   align_corners=False,
4725                   name=None,
4726                   half_pixel_centers=False):
4727  return gen_image_ops.resize_bicubic(
4728      images=images,
4729      size=size,
4730      align_corners=align_corners,
4731      half_pixel_centers=half_pixel_centers,
4732      name=name)
4733
4734
4735def resize_bilinear(images,
4736                    size,
4737                    align_corners=False,
4738                    name=None,
4739                    half_pixel_centers=False):
4740  return gen_image_ops.resize_bilinear(
4741      images=images,
4742      size=size,
4743      align_corners=align_corners,
4744      half_pixel_centers=half_pixel_centers,
4745      name=name)
4746
4747
4748def resize_nearest_neighbor(images,
4749                            size,
4750                            align_corners=False,
4751                            name=None,
4752                            half_pixel_centers=False):
4753  return gen_image_ops.resize_nearest_neighbor(
4754      images=images,
4755      size=size,
4756      align_corners=align_corners,
4757      half_pixel_centers=half_pixel_centers,
4758      name=name)
4759
4760
4761resize_area_deprecation = deprecation.deprecated(
4762    date=None,
4763    instructions=(
4764        'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.'))
4765tf_export(v1=['image.resize_area'])(
4766    resize_area_deprecation(
4767        dispatch.add_dispatch_support(gen_image_ops.resize_area)))
4768
4769resize_bicubic_deprecation = deprecation.deprecated(
4770    date=None,
4771    instructions=(
4772        'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.'))
4773tf_export(v1=['image.resize_bicubic'])(
4774    dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic)))
4775
4776resize_bilinear_deprecation = deprecation.deprecated(
4777    date=None,
4778    instructions=(
4779        'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.'))
4780tf_export(v1=['image.resize_bilinear'])(
4781    dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear)))
4782
4783resize_nearest_neighbor_deprecation = deprecation.deprecated(
4784    date=None,
4785    instructions=(
4786        'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` '
4787        'instead.'))
4788tf_export(v1=['image.resize_nearest_neighbor'])(
4789    dispatch.add_dispatch_support(
4790        resize_nearest_neighbor_deprecation(resize_nearest_neighbor)))
4791
4792
4793@tf_export('image.crop_and_resize', v1=[])
4794@dispatch.add_dispatch_support
4795def crop_and_resize_v2(image,
4796                       boxes,
4797                       box_indices,
4798                       crop_size,
4799                       method='bilinear',
4800                       extrapolation_value=.0,
4801                       name=None):
4802  """Extracts crops from the input image tensor and resizes them.
4803
4804  Extracts crops from the input image tensor and resizes them using bilinear
4805  sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
4806  common output size specified by `crop_size`. This is more general than the
4807  `crop_to_bounding_box` op which extracts a fixed size slice from the input
4808  image and does not allow resizing or aspect ratio change.
4809
4810  Returns a tensor with `crops` from the input `image` at positions defined at
4811  the bounding box locations in `boxes`. The cropped boxes are all resized (with
4812  bilinear or nearest neighbor interpolation) to a fixed
4813  `size = [crop_height, crop_width]`. The result is a 4-D tensor
4814  `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
4815  In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
4816  results to using `tf.compat.v1.image.resize_bilinear()` or
4817  `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method`
4818  argument) with
4819  `align_corners=True`.
4820
4821  Args:
4822    image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
4823      Both `image_height` and `image_width` need to be positive.
4824    boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
4825      specifies the coordinates of a box in the `box_ind[i]` image and is
4826      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
4827      coordinate value of `y` is mapped to the image coordinate at `y *
4828      (image_height - 1)`, so as the `[0, 1]` interval of normalized image
4829      height is mapped to `[0, image_height - 1]` in image height coordinates.
4830      We do allow `y1` > `y2`, in which case the sampled crop is an up-down
4831      flipped version of the original image. The width dimension is treated
4832      similarly. Normalized coordinates outside the `[0, 1]` range are allowed,
4833      in which case we use `extrapolation_value` to extrapolate the input image
4834      values.
4835    box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,
4836      batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box
4837      refers to.
4838    crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.
4839      All cropped image patches are resized to this size. The aspect ratio of
4840      the image content is not preserved. Both `crop_height` and `crop_width`
4841      need to be positive.
4842    method: An optional string specifying the sampling method for resizing. It
4843      can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.
4844      Currently two sampling methods are supported: Bilinear and Nearest
4845        Neighbor.
4846    extrapolation_value: An optional `float`. Defaults to `0.0`. Value used for
4847      extrapolation, when applicable.
4848    name: A name for the operation (optional).
4849
4850  Returns:
4851    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
4852
4853  Example:
4854
4855  ```python
4856  import tensorflow as tf
4857  BATCH_SIZE = 1
4858  NUM_BOXES = 5
4859  IMAGE_HEIGHT = 256
4860  IMAGE_WIDTH = 256
4861  CHANNELS = 3
4862  CROP_SIZE = (24, 24)
4863
4864  image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH,
4865  CHANNELS) )
4866  boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
4867  box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0,
4868  maxval=BATCH_SIZE, dtype=tf.int32)
4869  output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
4870  output.shape  #=> (5, 24, 24, 3)
4871  ```
4872  """
4873  return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size,
4874                                       method, extrapolation_value, name)
4875
4876
4877@tf_export(v1=['image.crop_and_resize'])
4878@dispatch.add_dispatch_support
4879@deprecation.deprecated_args(None,
4880                             'box_ind is deprecated, use box_indices instead',
4881                             'box_ind')
4882def crop_and_resize_v1(  # pylint: disable=missing-docstring
4883    image,
4884    boxes,
4885    box_ind=None,
4886    crop_size=None,
4887    method='bilinear',
4888    extrapolation_value=0,
4889    name=None,
4890    box_indices=None):
4891  box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices,
4892                                                   'box_ind', box_ind)
4893  return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method,
4894                                       extrapolation_value, name)
4895
4896
4897crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__
4898
4899
4900@tf_export(v1=['image.extract_glimpse'])
4901@dispatch.add_dispatch_support
4902def extract_glimpse(
4903    input,  # pylint: disable=redefined-builtin
4904    size,
4905    offsets,
4906    centered=True,
4907    normalized=True,
4908    uniform_noise=True,
4909    name=None):
4910  """Extracts a glimpse from the input tensor.
4911
4912  Returns a set of windows called glimpses extracted at location
4913  `offsets` from the input tensor. If the windows only partially
4914  overlaps the inputs, the non-overlapping areas will be filled with
4915  random noise.
4916
4917  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
4918  glimpse_width, channels]`. The channels and batch dimensions are the
4919  same as that of the input tensor. The height and width of the output
4920  windows are specified in the `size` parameter.
4921
4922  The argument `normalized` and `centered` controls how the windows are built:
4923
4924  * If the coordinates are normalized but not centered, 0.0 and 1.0
4925    correspond to the minimum and maximum of each height and width
4926    dimension.
4927  * If the coordinates are both normalized and centered, they range from
4928    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
4929    left corner, the lower right corner is located at (1.0, 1.0) and the
4930    center is at (0, 0).
4931  * If the coordinates are not normalized they are interpreted as
4932    numbers of pixels.
4933
4934  Usage Example:
4935
4936  >>> x = [[[[0.0],
4937  ...           [1.0],
4938  ...           [2.0]],
4939  ...          [[3.0],
4940  ...           [4.0],
4941  ...           [5.0]],
4942  ...          [[6.0],
4943  ...           [7.0],
4944  ...           [8.0]]]]
4945  >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
4946  ...                                    centered=False, normalized=False)
4947  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
4948  array([[[[0.],
4949           [1.]],
4950          [[3.],
4951           [4.]]]], dtype=float32)>
4952
4953  Args:
4954    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
4955      `[batch_size, height, width, channels]`.
4956    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
4957      size of the glimpses to extract.  The glimpse height must be specified
4958      first, following by the glimpse width.
4959    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
4960      `[batch_size, 2]` containing the y, x locations of the center of each
4961      window.
4962    centered: An optional `bool`. Defaults to `True`. indicates if the offset
4963      coordinates are centered relative to the image, in which case the (0, 0)
4964      offset is relative to the center of the input images. If false, the (0,0)
4965      offset corresponds to the upper left corner of the input images.
4966    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
4967      coordinates are normalized.
4968    uniform_noise: An optional `bool`. Defaults to `True`. indicates if the
4969      noise should be generated using a uniform distribution or a Gaussian
4970      distribution.
4971    name: A name for the operation (optional).
4972
4973  Returns:
4974    A `Tensor` of type `float32`.
4975  """
4976  return gen_image_ops.extract_glimpse(
4977      input=input,
4978      size=size,
4979      offsets=offsets,
4980      centered=centered,
4981      normalized=normalized,
4982      uniform_noise=uniform_noise,
4983      name=name)
4984
4985
4986@tf_export('image.extract_glimpse', v1=[])
4987@dispatch.add_dispatch_support
4988def extract_glimpse_v2(
4989    input,  # pylint: disable=redefined-builtin
4990    size,
4991    offsets,
4992    centered=True,
4993    normalized=True,
4994    noise='uniform',
4995    name=None):
4996  """Extracts a glimpse from the input tensor.
4997
4998  Returns a set of windows called glimpses extracted at location
4999  `offsets` from the input tensor. If the windows only partially
5000  overlaps the inputs, the non-overlapping areas will be filled with
5001  random noise.
5002
5003  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
5004  glimpse_width, channels]`. The channels and batch dimensions are the
5005  same as that of the input tensor. The height and width of the output
5006  windows are specified in the `size` parameter.
5007
5008  The argument `normalized` and `centered` controls how the windows are built:
5009
5010  * If the coordinates are normalized but not centered, 0.0 and 1.0
5011    correspond to the minimum and maximum of each height and width
5012    dimension.
5013  * If the coordinates are both normalized and centered, they range from
5014    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
5015    left corner, the lower right corner is located at (1.0, 1.0) and the
5016    center is at (0, 0).
5017  * If the coordinates are not normalized they are interpreted as
5018    numbers of pixels.
5019
5020  Usage Example:
5021
5022  >>> x = [[[[0.0],
5023  ...           [1.0],
5024  ...           [2.0]],
5025  ...          [[3.0],
5026  ...           [4.0],
5027  ...           [5.0]],
5028  ...          [[6.0],
5029  ...           [7.0],
5030  ...           [8.0]]]]
5031  >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
5032  ...                         centered=False, normalized=False)
5033  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
5034  array([[[[4.],
5035           [5.]],
5036          [[7.],
5037           [8.]]]], dtype=float32)>
5038
5039  Args:
5040    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
5041      `[batch_size, height, width, channels]`.
5042    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
5043      size of the glimpses to extract.  The glimpse height must be specified
5044      first, following by the glimpse width.
5045    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
5046      `[batch_size, 2]` containing the y, x locations of the center of each
5047      window.
5048    centered: An optional `bool`. Defaults to `True`. indicates if the offset
5049      coordinates are centered relative to the image, in which case the (0, 0)
5050      offset is relative to the center of the input images. If false, the (0,0)
5051      offset corresponds to the upper left corner of the input images.
5052    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
5053      coordinates are normalized.
5054    noise: An optional `string`. Defaults to `uniform`. indicates if the noise
5055      should be `uniform` (uniform distribution), `gaussian` (gaussian
5056      distribution), or `zero` (zero padding).
5057    name: A name for the operation (optional).
5058
5059  Returns:
5060    A `Tensor` of type `float32`.
5061  """
5062  return gen_image_ops.extract_glimpse_v2(
5063      input=input,
5064      size=size,
5065      offsets=offsets,
5066      centered=centered,
5067      normalized=normalized,
5068      noise=noise,
5069      uniform_noise=False,
5070      name=name)
5071
5072
5073@tf_export('image.combined_non_max_suppression')
5074@dispatch.add_dispatch_support
5075def combined_non_max_suppression(boxes,
5076                                 scores,
5077                                 max_output_size_per_class,
5078                                 max_total_size,
5079                                 iou_threshold=0.5,
5080                                 score_threshold=float('-inf'),
5081                                 pad_per_class=False,
5082                                 clip_boxes=True,
5083                                 name=None):
5084  """Greedily selects a subset of bounding boxes in descending order of score.
5085
5086  This operation performs non_max_suppression on the inputs per batch, across
5087  all classes.
5088  Prunes away boxes that have high intersection-over-union (IOU) overlap
5089  with previously selected boxes.  Bounding boxes are supplied as
5090  [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
5091  diagonal pair of box corners and the coordinates can be provided as normalized
5092  (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
5093  is agnostic to where the origin is in the coordinate system. Also note that
5094  this algorithm is invariant to orthogonal transformations and translations
5095  of the coordinate system; thus translating or reflections of the coordinate
5096  system result in the same boxes being selected by the algorithm.
5097  The output of this operation is the final boxes, scores and classes tensor
5098  returned after performing non_max_suppression.
5099
5100  Args:
5101    boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q`
5102      is 1 then same boxes are used for all classes otherwise, if `q` is equal
5103      to number of classes, class-specific boxes are used.
5104    scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]`
5105      representing a single score corresponding to each box (each row of boxes).
5106    max_output_size_per_class: A scalar integer `Tensor` representing the
5107      maximum number of boxes to be selected by non-max suppression per class
5108    max_total_size: A int32 scalar representing maximum number of boxes retained
5109      over all classes. Note that setting this value to a large number may
5110      result in OOM error depending on the system workload.
5111    iou_threshold: A float representing the threshold for deciding whether boxes
5112      overlap too much with respect to IOU.
5113    score_threshold: A float representing the threshold for deciding when to
5114      remove boxes based on score.
5115    pad_per_class: If false, the output nmsed boxes, scores and classes are
5116      padded/clipped to `max_total_size`. If true, the output nmsed boxes,
5117      scores and classes are padded to be of length
5118      `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
5119      which case it is clipped to `max_total_size`. Defaults to false.
5120    clip_boxes: If true, the coordinates of output nmsed boxes will be clipped
5121      to [0, 1]. If false, output the box coordinates as it is. Defaults to
5122      true.
5123    name: A name for the operation (optional).
5124
5125  Returns:
5126    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
5127      containing the non-max suppressed boxes.
5128    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
5129      the scores for the boxes.
5130    'nmsed_classes': A [batch_size, max_detections] float32 tensor
5131      containing the class for boxes.
5132    'valid_detections': A [batch_size] int32 tensor indicating the number of
5133      valid detections per batch item. Only the top valid_detections[i] entries
5134      in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
5135      entries are zero paddings.
5136  """
5137  with ops.name_scope(name, 'combined_non_max_suppression'):
5138    iou_threshold = ops.convert_to_tensor(
5139        iou_threshold, dtype=dtypes.float32, name='iou_threshold')
5140    score_threshold = ops.convert_to_tensor(
5141        score_threshold, dtype=dtypes.float32, name='score_threshold')
5142
5143    # Convert `max_total_size` to tensor *without* setting the `dtype` param.
5144    # This allows us to catch `int32` overflow case with `max_total_size`
5145    # whose expected dtype is `int32` by the op registration. Any number within
5146    # `int32` will get converted to `int32` tensor. Anything larger will get
5147    # converted to `int64`. Passing in `int64` for `max_total_size` to the op
5148    # will throw dtype mismatch exception.
5149    # TODO(b/173251596): Once there is a more general solution to warn against
5150    # int overflow conversions, revisit this check.
5151    max_total_size = ops.convert_to_tensor(max_total_size)
5152
5153    return gen_image_ops.combined_non_max_suppression(
5154        boxes, scores, max_output_size_per_class, max_total_size, iou_threshold,
5155        score_threshold, pad_per_class, clip_boxes)
5156
5157
5158def _bbox_overlap(boxes_a, boxes_b):
5159  """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b.
5160
5161  Args:
5162    boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of
5163      boxes per image. The last dimension is the pixel coordinates in
5164      [ymin, xmin, ymax, xmax] form.
5165    boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of
5166      boxes. The last dimension is the pixel coordinates in
5167      [ymin, xmin, ymax, xmax] form.
5168  Returns:
5169    intersection_over_union: a tensor with as a shape of [batch_size, N, M],
5170    representing the ratio of intersection area over union area (IoU) between
5171    two boxes
5172  """
5173  with ops.name_scope('bbox_overlap'):
5174    a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split(
5175        value=boxes_a, num_or_size_splits=4, axis=2)
5176    b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split(
5177        value=boxes_b, num_or_size_splits=4, axis=2)
5178
5179    # Calculates the intersection area.
5180    i_xmin = math_ops.maximum(
5181        a_x_min, array_ops.transpose(b_x_min, [0, 2, 1]))
5182    i_xmax = math_ops.minimum(
5183        a_x_max, array_ops.transpose(b_x_max, [0, 2, 1]))
5184    i_ymin = math_ops.maximum(
5185        a_y_min, array_ops.transpose(b_y_min, [0, 2, 1]))
5186    i_ymax = math_ops.minimum(
5187        a_y_max, array_ops.transpose(b_y_max, [0, 2, 1]))
5188    i_area = math_ops.maximum(
5189        (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0)
5190
5191    # Calculates the union area.
5192    a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min)
5193    b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min)
5194    EPSILON = 1e-8
5195    # Adds a small epsilon to avoid divide-by-zero.
5196    u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON
5197
5198    # Calculates IoU.
5199    intersection_over_union = i_area / u_area
5200
5201    return intersection_over_union
5202
5203
5204def _self_suppression(iou, _, iou_sum, iou_threshold):
5205  """Suppress boxes in the same tile.
5206
5207     Compute boxes that cannot be suppressed by others (i.e.,
5208     can_suppress_others), and then use them to suppress boxes in the same tile.
5209
5210  Args:
5211    iou: a tensor of shape [batch_size, num_boxes_with_padding] representing
5212    intersection over union.
5213    iou_sum: a scalar tensor.
5214    iou_threshold: a scalar tensor.
5215
5216  Returns:
5217    iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding].
5218    iou_diff: a scalar tensor representing whether any box is supressed in
5219      this step.
5220    iou_sum_new: a scalar tensor of shape [batch_size] that represents
5221      the iou sum after suppression.
5222    iou_threshold: a scalar tensor.
5223  """
5224  batch_size = array_ops.shape(iou)[0]
5225  can_suppress_others = math_ops.cast(
5226      array_ops.reshape(
5227          math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]),
5228      iou.dtype)
5229  iou_after_suppression = array_ops.reshape(
5230      math_ops.cast(
5231          math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold,
5232          iou.dtype),
5233      [batch_size, -1, 1]) * iou
5234  iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2])
5235  return [
5236      iou_after_suppression,
5237      math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new,
5238      iou_threshold
5239  ]
5240
5241
5242def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size):
5243  """Suppress boxes between different tiles.
5244
5245  Args:
5246    boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4]
5247    box_slice: a tensor of shape [batch_size, tile_size, 4]
5248    iou_threshold: a scalar tensor
5249    inner_idx: a scalar tensor representing the tile index of the tile
5250      that is used to supress box_slice
5251    tile_size: an integer representing the number of boxes in a tile
5252
5253  Returns:
5254    boxes: unchanged boxes as input
5255    box_slice_after_suppression: box_slice after suppression
5256    iou_threshold: unchanged
5257  """
5258  batch_size = array_ops.shape(boxes)[0]
5259  new_slice = array_ops.slice(
5260      boxes, [0, inner_idx * tile_size, 0],
5261      [batch_size, tile_size, 4])
5262  iou = _bbox_overlap(new_slice, box_slice)
5263  box_slice_after_suppression = array_ops.expand_dims(
5264      math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]),
5265                    box_slice.dtype),
5266      2) * box_slice
5267  return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1
5268
5269
5270def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size):
5271  """Process boxes in the range [idx*tile_size, (idx+1)*tile_size).
5272
5273  Args:
5274    boxes: a tensor with a shape of [batch_size, anchors, 4].
5275    iou_threshold: a float representing the threshold for deciding whether boxes
5276      overlap too much with respect to IOU.
5277    output_size: an int32 tensor of size [batch_size]. Representing the number
5278      of selected boxes for each batch.
5279    idx: an integer scalar representing induction variable.
5280    tile_size: an integer representing the number of boxes in a tile
5281
5282  Returns:
5283    boxes: updated boxes.
5284    iou_threshold: pass down iou_threshold to the next iteration.
5285    output_size: the updated output_size.
5286    idx: the updated induction variable.
5287  """
5288  with ops.name_scope('suppression_loop_body'):
5289    num_tiles = array_ops.shape(boxes)[1] // tile_size
5290    batch_size = array_ops.shape(boxes)[0]
5291
5292    def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx):
5293      return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx,
5294                                tile_size)
5295
5296    # Iterates over tiles that can possibly suppress the current tile.
5297    box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0],
5298                                [batch_size, tile_size, 4])
5299    _, box_slice, _, _ = control_flow_ops.while_loop(
5300        lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
5301        cross_suppression_func,
5302        [boxes, box_slice, iou_threshold, constant_op.constant(0)])
5303
5304    # Iterates over the current tile to compute self-suppression.
5305    iou = _bbox_overlap(box_slice, box_slice)
5306    mask = array_ops.expand_dims(
5307        array_ops.reshape(
5308            math_ops.range(tile_size), [1, -1]) > array_ops.reshape(
5309                math_ops.range(tile_size), [-1, 1]), 0)
5310    iou *= math_ops.cast(
5311        math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype)
5312    suppressed_iou, _, _, _ = control_flow_ops.while_loop(
5313        lambda _iou, loop_condition, _iou_sum, _: loop_condition,
5314        _self_suppression,
5315        [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]),
5316         iou_threshold])
5317    suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0
5318    box_slice *= array_ops.expand_dims(
5319        1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2)
5320
5321    # Uses box_slice to update the input boxes.
5322    mask = array_ops.reshape(
5323        math_ops.cast(
5324            math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype),
5325        [1, -1, 1, 1])
5326    boxes = array_ops.tile(array_ops.expand_dims(
5327        box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape(
5328            boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask)
5329    boxes = array_ops.reshape(boxes, [batch_size, -1, 4])
5330
5331    # Updates output_size.
5332    output_size += math_ops.reduce_sum(
5333        math_ops.cast(
5334            math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1])
5335  return boxes, iou_threshold, output_size, idx + 1
5336
5337
5338@tf_export('image.non_max_suppression_padded')
5339@dispatch.add_dispatch_support
5340def non_max_suppression_padded(boxes,
5341                               scores,
5342                               max_output_size,
5343                               iou_threshold=0.5,
5344                               score_threshold=float('-inf'),
5345                               pad_to_max_output_size=False,
5346                               name=None,
5347                               sorted_input=False,
5348                               canonicalized_coordinates=False,
5349                               tile_size=512):
5350  """Greedily selects a subset of bounding boxes in descending order of score.
5351
5352  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5353  with the addition of an optional parameter which zero-pads the output to
5354  be of size `max_output_size`.
5355  The output of this operation is a tuple containing the set of integers
5356  indexing into the input collection of bounding boxes representing the selected
5357  boxes and the number of valid indices in the index set.  The bounding box
5358  coordinates corresponding to the selected indices can then be obtained using
5359  the `tf.slice` and `tf.gather` operations.  For example:
5360    ```python
5361    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5362        boxes, scores, max_output_size, iou_threshold,
5363        score_threshold, pad_to_max_output_size=True)
5364    selected_indices = tf.slice(
5365        selected_indices_padded, tf.constant([0]), num_valid)
5366    selected_boxes = tf.gather(boxes, selected_indices)
5367    ```
5368
5369  Args:
5370    boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5371      Dimensions except the last two are batch dimensions.
5372    scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5373    max_output_size: a scalar integer `Tensor` representing the maximum number
5374      of boxes to be selected by non max suppression. Note that setting this
5375      value to a large number may result in OOM error depending on the system
5376      workload.
5377    iou_threshold: a float representing the threshold for deciding whether boxes
5378      overlap too much with respect to IoU (intersection over union).
5379    score_threshold: a float representing the threshold for box scores. Boxes
5380      with a score that is not larger than this threshold will be suppressed.
5381    pad_to_max_output_size: whether to pad the output idx to max_output_size.
5382      Must be set to True when the input is a batch of images.
5383    name: name of operation.
5384    sorted_input: a boolean indicating whether the input boxes and scores
5385      are sorted in descending order by the score.
5386    canonicalized_coordinates: if box coordinates are given as
5387    `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5388     computation to canonicalize box coordinates.
5389    tile_size: an integer representing the number of boxes in a tile, i.e.,
5390      the maximum number of boxes per image that can be used to suppress other
5391      boxes in parallel; larger tile_size means larger parallelism and
5392      potentially more redundant work.
5393  Returns:
5394    idx: a tensor with a shape of [..., num_boxes] representing the
5395      indices selected by non-max suppression. The leading dimensions
5396      are the batch dimensions of the input boxes. All numbers are within
5397      [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5398      indices (i.e., idx[i][:num_valid[i]]) are valid.
5399    num_valid: a tensor of rank 0 or higher with a shape of [...]
5400      representing the number of valid indices in idx. Its dimensions are the
5401      batch dimensions of the input boxes.
5402   Raises:
5403    ValueError: When set pad_to_max_output_size to False for batched input.
5404  """
5405  with ops.name_scope(name, 'non_max_suppression_padded'):
5406    if not pad_to_max_output_size:
5407      # pad_to_max_output_size may be set to False only when the shape of
5408      # boxes is [num_boxes, 4], i.e., a single image. We make best effort to
5409      # detect violations at compile time. If `boxes` does not have a static
5410      # rank, the check allows computation to proceed.
5411      if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2:
5412        raise ValueError("'pad_to_max_output_size' (value {}) must be True for "
5413                         'batched input'.format(pad_to_max_output_size))
5414    if name is None:
5415      name = ''
5416    idx, num_valid = non_max_suppression_padded_v2(
5417        boxes, scores, max_output_size, iou_threshold, score_threshold,
5418        sorted_input, canonicalized_coordinates, tile_size)
5419    # def_function.function seems to lose shape information, so set it here.
5420    if not pad_to_max_output_size:
5421      idx = idx[0, :num_valid]
5422    else:
5423      batch_dims = array_ops.concat([
5424          array_ops.shape(boxes)[:-2],
5425          array_ops.expand_dims(max_output_size, 0)
5426      ], 0)
5427      idx = array_ops.reshape(idx, batch_dims)
5428    return idx, num_valid
5429
5430
5431# TODO(b/158709815): Improve performance regression due to
5432# def_function.function.
5433@def_function.function(
5434    experimental_implements='non_max_suppression_padded_v2')
5435def non_max_suppression_padded_v2(boxes,
5436                                  scores,
5437                                  max_output_size,
5438                                  iou_threshold=0.5,
5439                                  score_threshold=float('-inf'),
5440                                  sorted_input=False,
5441                                  canonicalized_coordinates=False,
5442                                  tile_size=512):
5443  """Non-maximum suppression.
5444
5445  Prunes away boxes that have high intersection-over-union (IOU) overlap
5446  with previously selected boxes. Bounding boxes are supplied as
5447  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
5448  diagonal pair of box corners and the coordinates can be provided as normalized
5449  (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box
5450  coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`,
5451  where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower
5452  left and upper right corner. User may indiciate the input box coordinates are
5453  already canonicalized to eliminate redundant work by setting
5454  canonicalized_coordinates to `True`. Note that this algorithm is agnostic to
5455  where the origin is in the coordinate system. Note that this algorithm is
5456  invariant to orthogonal transformations and translations of the coordinate
5457  system; thus translating or reflections of the coordinate system result in the
5458  same boxes being selected by the algorithm.
5459
5460  Similar to tf.image.non_max_suppression, non_max_suppression_padded
5461  implements hard NMS but can operate on a batch of images and improves
5462  performance by titling the bounding boxes. Non_max_suppression_padded should
5463  be preferred over tf.image_non_max_suppression when running on devices with
5464  abundant parallelsim for higher computation speed. For soft NMS, refer to
5465  tf.image.non_max_suppression_with_scores.
5466
5467  While a serial NMS algorithm iteratively uses the highest-scored unprocessed
5468  box to suppress boxes, this algorithm uses many boxes to suppress other boxes
5469  in parallel. The key idea is to partition boxes into tiles based on their
5470  score and suppresses boxes tile by tile, thus achieving parallelism within a
5471  tile. The tile size determines the degree of parallelism.
5472
5473  In cross suppression (using boxes of tile A to suppress boxes of tile B),
5474  all boxes in A can independently suppress boxes in B.
5475
5476  Self suppression (suppressing boxes of the same tile) needs to be iteratively
5477  applied until there's no more suppression. In each iteration, boxes that
5478  cannot be suppressed are used to suppress boxes in the same tile.
5479
5480  boxes = boxes.pad_to_multiply_of(tile_size)
5481  num_tiles = len(boxes) // tile_size
5482  output_boxes = []
5483  for i in range(num_tiles):
5484    box_tile = boxes[i*tile_size : (i+1)*tile_size]
5485    for j in range(i - 1):
5486      # in parallel suppress boxes in box_tile using boxes from suppressing_tile
5487      suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
5488      iou = _bbox_overlap(box_tile, suppressing_tile)
5489      # if the box is suppressed in iou, clear it to a dot
5490      box_tile *= _update_boxes(iou)
5491    # Iteratively handle the diagnal tile.
5492    iou = _box_overlap(box_tile, box_tile)
5493    iou_changed = True
5494    while iou_changed:
5495      # boxes that are not suppressed by anything else
5496      suppressing_boxes = _get_suppressing_boxes(iou)
5497      # boxes that are suppressed by suppressing_boxes
5498      suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
5499      # clear iou to 0 for boxes that are suppressed, as they cannot be used
5500      # to suppress other boxes any more
5501      new_iou = _clear_iou(iou, suppressed_boxes)
5502      iou_changed = (new_iou != iou)
5503      iou = new_iou
5504    # remaining boxes that can still suppress others, are selected boxes.
5505    output_boxes.append(_get_suppressing_boxes(iou))
5506    if len(output_boxes) >= max_output_size:
5507      break
5508
5509  Args:
5510    boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5511      Dimensions except the last two are batch dimensions. The last dimension
5512      represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates
5513      on each dimension can be given in any order
5514      (see also `canonicalized_coordinates`) but must describe a box with
5515      a positive area.
5516    scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5517    max_output_size: a scalar integer `Tensor` representing the maximum number
5518      of boxes to be selected by non max suppression.
5519    iou_threshold: a float representing the threshold for deciding whether boxes
5520      overlap too much with respect to IoU (intersection over union).
5521    score_threshold: a float representing the threshold for box scores. Boxes
5522      with a score that is not larger than this threshold will be suppressed.
5523    sorted_input: a boolean indicating whether the input boxes and scores
5524      are sorted in descending order by the score.
5525    canonicalized_coordinates: if box coordinates are given as
5526    `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5527     computation to canonicalize box coordinates.
5528    tile_size: an integer representing the number of boxes in a tile, i.e.,
5529      the maximum number of boxes per image that can be used to suppress other
5530      boxes in parallel; larger tile_size means larger parallelism and
5531      potentially more redundant work.
5532  Returns:
5533    idx: a tensor with a shape of [..., num_boxes] representing the
5534      indices selected by non-max suppression. The leading dimensions
5535      are the batch dimensions of the input boxes. All numbers are within
5536      [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5537      indices (i.e., idx[i][:num_valid[i]]) are valid.
5538    num_valid: a tensor of rank 0 or higher with a shape of [...]
5539      representing the number of valid indices in idx. Its dimensions are the
5540      batch dimensions of the input boxes.
5541   Raises:
5542    ValueError: When set pad_to_max_output_size to False for batched input.
5543  """
5544  def _sort_scores_and_boxes(scores, boxes):
5545    """Sort boxes based their score from highest to lowest.
5546
5547    Args:
5548      scores: a tensor with a shape of [batch_size, num_boxes] representing
5549        the scores of boxes.
5550      boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing
5551        the boxes.
5552    Returns:
5553      sorted_scores: a tensor with a shape of [batch_size, num_boxes]
5554        representing the sorted scores.
5555      sorted_boxes: a tensor representing the sorted boxes.
5556      sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes]
5557        representing the index of the scores in a sorted descending order.
5558    """
5559    with ops.name_scope('sort_scores_and_boxes'):
5560      batch_size = array_ops.shape(boxes)[0]
5561      num_boxes = array_ops.shape(boxes)[1]
5562      sorted_scores_indices = sort_ops.argsort(
5563          scores, axis=1, direction='DESCENDING')
5564      index_offsets = math_ops.range(batch_size) * num_boxes
5565      indices = array_ops.reshape(
5566          sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1])
5567      sorted_scores = array_ops.reshape(
5568          array_ops.gather(array_ops.reshape(scores, [-1]), indices),
5569          [batch_size, -1])
5570      sorted_boxes = array_ops.reshape(
5571          array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices),
5572          [batch_size, -1, 4])
5573    return sorted_scores, sorted_boxes, sorted_scores_indices
5574
5575  batch_dims = array_ops.shape(boxes)[:-2]
5576  num_boxes = array_ops.shape(boxes)[-2]
5577  boxes = array_ops.reshape(boxes, [-1, num_boxes, 4])
5578  scores = array_ops.reshape(scores, [-1, num_boxes])
5579  batch_size = array_ops.shape(boxes)[0]
5580  if score_threshold != float('-inf'):
5581    with ops.name_scope('filter_by_score'):
5582      score_mask = math_ops.cast(scores > score_threshold, scores.dtype)
5583      scores *= score_mask
5584      box_mask = array_ops.expand_dims(
5585          math_ops.cast(score_mask, boxes.dtype), 2)
5586      boxes *= box_mask
5587
5588  if not canonicalized_coordinates:
5589    with ops.name_scope('canonicalize_coordinates'):
5590      y_1, x_1, y_2, x_2 = array_ops.split(
5591          value=boxes, num_or_size_splits=4, axis=2)
5592      y_1_is_min = math_ops.reduce_all(
5593          math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0]))
5594      y_min, y_max = control_flow_ops.cond(
5595          y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1))
5596      x_1_is_min = math_ops.reduce_all(
5597          math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0]))
5598      x_min, x_max = control_flow_ops.cond(
5599          x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1))
5600      boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2)
5601
5602  if not sorted_input:
5603    scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes)
5604  else:
5605    # Default value required for Autograph.
5606    sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32)
5607
5608  pad = math_ops.cast(
5609      math_ops.ceil(
5610          math_ops.cast(
5611              math_ops.maximum(num_boxes, max_output_size), dtypes.float32) /
5612          math_ops.cast(tile_size, dtypes.float32)),
5613      dtypes.int32) * tile_size - num_boxes
5614  boxes = array_ops.pad(
5615      math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]])
5616  scores = array_ops.pad(
5617      math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]])
5618  num_boxes_after_padding = num_boxes + pad
5619  num_iterations = num_boxes_after_padding // tile_size
5620  def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
5621    return math_ops.logical_and(
5622        math_ops.reduce_min(output_size) < max_output_size,
5623        idx < num_iterations)
5624
5625  def suppression_loop_body(boxes, iou_threshold, output_size, idx):
5626    return _suppression_loop_body(
5627        boxes, iou_threshold, output_size, idx, tile_size)
5628
5629  selected_boxes, _, output_size, _ = control_flow_ops.while_loop(
5630      _loop_cond,
5631      suppression_loop_body,
5632      [
5633          boxes, iou_threshold,
5634          array_ops.zeros([batch_size], dtypes.int32),
5635          constant_op.constant(0)
5636      ],
5637      shape_invariants=[
5638          tensor_shape.TensorShape([None, None, 4]),
5639          tensor_shape.TensorShape([]),
5640          tensor_shape.TensorShape([None]),
5641          tensor_shape.TensorShape([]),
5642      ],
5643  )
5644  num_valid = math_ops.minimum(output_size, max_output_size)
5645  idx = num_boxes_after_padding - math_ops.cast(
5646      nn_ops.top_k(
5647          math_ops.cast(math_ops.reduce_any(
5648              selected_boxes > 0, [2]), dtypes.int32) *
5649          array_ops.expand_dims(
5650              math_ops.range(num_boxes_after_padding, 0, -1), 0),
5651          max_output_size)[0], dtypes.int32)
5652  idx = math_ops.minimum(idx, num_boxes - 1)
5653
5654  if not sorted_input:
5655    index_offsets = math_ops.range(batch_size) * num_boxes
5656    gather_idx = array_ops.reshape(
5657        idx + array_ops.expand_dims(index_offsets, 1), [-1])
5658    idx = array_ops.reshape(
5659        array_ops.gather(array_ops.reshape(sorted_indices, [-1]),
5660                         gather_idx),
5661        [batch_size, -1])
5662  invalid_index = array_ops.zeros([batch_size, max_output_size],
5663                                  dtype=dtypes.int32)
5664  idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0)
5665  num_valid_expanded = array_ops.expand_dims(num_valid, 1)
5666  idx = array_ops.where(idx_index < num_valid_expanded,
5667                        idx, invalid_index)
5668
5669  num_valid = array_ops.reshape(num_valid, batch_dims)
5670  return idx, num_valid
5671
5672
5673def non_max_suppression_padded_v1(boxes,
5674                                  scores,
5675                                  max_output_size,
5676                                  iou_threshold=0.5,
5677                                  score_threshold=float('-inf'),
5678                                  pad_to_max_output_size=False,
5679                                  name=None):
5680  """Greedily selects a subset of bounding boxes in descending order of score.
5681
5682  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5683  with the addition of an optional parameter which zero-pads the output to
5684  be of size `max_output_size`.
5685  The output of this operation is a tuple containing the set of integers
5686  indexing into the input collection of bounding boxes representing the selected
5687  boxes and the number of valid indices in the index set.  The bounding box
5688  coordinates corresponding to the selected indices can then be obtained using
5689  the `tf.slice` and `tf.gather` operations.  For example:
5690    ```python
5691    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5692        boxes, scores, max_output_size, iou_threshold,
5693        score_threshold, pad_to_max_output_size=True)
5694    selected_indices = tf.slice(
5695        selected_indices_padded, tf.constant([0]), num_valid)
5696    selected_boxes = tf.gather(boxes, selected_indices)
5697    ```
5698
5699  Args:
5700    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
5701    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
5702      score corresponding to each box (each row of boxes).
5703    max_output_size: A scalar integer `Tensor` representing the maximum number
5704      of boxes to be selected by non-max suppression.
5705    iou_threshold: A float representing the threshold for deciding whether boxes
5706      overlap too much with respect to IOU.
5707    score_threshold: A float representing the threshold for deciding when to
5708      remove boxes based on score.
5709    pad_to_max_output_size: bool.  If True, size of `selected_indices` output is
5710      padded to `max_output_size`.
5711    name: A name for the operation (optional).
5712
5713  Returns:
5714    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
5715      selected indices from the boxes tensor, where `M <= max_output_size`.
5716    valid_outputs: A scalar integer `Tensor` denoting how many elements in
5717    `selected_indices` are valid.  Valid elements occur first, then padding.
5718  """
5719  with ops.name_scope(name, 'non_max_suppression_padded'):
5720    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
5721    score_threshold = ops.convert_to_tensor(
5722        score_threshold, name='score_threshold')
5723    return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size,
5724                                                iou_threshold, score_threshold,
5725                                                pad_to_max_output_size)
5726
5727
5728@tf_export('image.draw_bounding_boxes', v1=[])
5729@dispatch.add_dispatch_support
5730def draw_bounding_boxes_v2(images, boxes, colors, name=None):
5731  """Draw bounding boxes on a batch of images.
5732
5733  Outputs a copy of `images` but draws on top of the pixels zero or more
5734  bounding boxes specified by the locations in `boxes`. The coordinates of the
5735  each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5736  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5737  and the height of the underlying image.
5738
5739  For example, if an image is 100 x 200 pixels (height x width) and the bounding
5740  box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5741  the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5742
5743  Parts of the bounding box may fall outside the image.
5744
5745  Args:
5746    images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5747      4-D with shape `[batch, height, width, depth]`. A batch of images.
5748    boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5749      num_bounding_boxes, 4]` containing bounding boxes.
5750    colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5751      through for the boxes.
5752    name: A name for the operation (optional).
5753
5754  Returns:
5755    A `Tensor`. Has the same type as `images`.
5756
5757  Usage Example:
5758
5759  >>> # create an empty image
5760  >>> img = tf.zeros([1, 3, 3, 3])
5761  >>> # draw a box around the image
5762  >>> box = np.array([0, 0, 1, 1])
5763  >>> boxes = box.reshape([1, 1, 4])
5764  >>> # alternate between red and blue
5765  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5766  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5767  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5768  array([[[[1., 0., 0.],
5769          [1., 0., 0.],
5770          [1., 0., 0.]],
5771          [[1., 0., 0.],
5772          [0., 0., 0.],
5773          [1., 0., 0.]],
5774          [[1., 0., 0.],
5775          [1., 0., 0.],
5776          [1., 0., 0.]]]], dtype=float32)>
5777  """
5778  if colors is None:
5779    return gen_image_ops.draw_bounding_boxes(images, boxes, name)
5780  return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name)
5781
5782
5783@tf_export(v1=['image.draw_bounding_boxes'])
5784@dispatch.add_dispatch_support
5785def draw_bounding_boxes(images, boxes, name=None, colors=None):
5786  """Draw bounding boxes on a batch of images.
5787
5788  Outputs a copy of `images` but draws on top of the pixels zero or more
5789  bounding boxes specified by the locations in `boxes`. The coordinates of the
5790  each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5791  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5792  and the height of the underlying image.
5793
5794  For example, if an image is 100 x 200 pixels (height x width) and the bounding
5795  box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5796  the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5797
5798  Parts of the bounding box may fall outside the image.
5799
5800  Args:
5801    images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5802      4-D with shape `[batch, height, width, depth]`. A batch of images.
5803    boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5804      num_bounding_boxes, 4]` containing bounding boxes.
5805    name: A name for the operation (optional).
5806    colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5807      through for the boxes.
5808
5809  Returns:
5810    A `Tensor`. Has the same type as `images`.
5811
5812  Usage Example:
5813
5814  >>> # create an empty image
5815  >>> img = tf.zeros([1, 3, 3, 3])
5816  >>> # draw a box around the image
5817  >>> box = np.array([0, 0, 1, 1])
5818  >>> boxes = box.reshape([1, 1, 4])
5819  >>> # alternate between red and blue
5820  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5821  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5822  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5823  array([[[[1., 0., 0.],
5824          [1., 0., 0.],
5825          [1., 0., 0.]],
5826          [[1., 0., 0.],
5827          [0., 0., 0.],
5828          [1., 0., 0.]],
5829          [[1., 0., 0.],
5830          [1., 0., 0.],
5831          [1., 0., 0.]]]], dtype=float32)>
5832  """
5833  return draw_bounding_boxes_v2(images, boxes, colors, name)
5834
5835
5836@tf_export('image.generate_bounding_box_proposals')
5837@dispatch.add_dispatch_support
5838def generate_bounding_box_proposals(scores,
5839                                    bbox_deltas,
5840                                    image_info,
5841                                    anchors,
5842                                    nms_threshold=0.7,
5843                                    pre_nms_topn=6000,
5844                                    min_size=16,
5845                                    post_nms_topn=300,
5846                                    name=None):
5847  """Generate bounding box proposals from encoded bounding boxes.
5848
5849  Args:
5850    scores: A 4-D float `Tensor` of shape
5851     `[num_images, height, width, num_achors]` containing scores of
5852      the boxes for given anchors, can be unsorted.
5853    bbox_deltas: A 4-D float `Tensor` of shape
5854     `[num_images, height, width, 4 x num_anchors]` encoding boxes
5855      with respect to each anchor. Coordinates are given
5856      in the form `[dy, dx, dh, dw]`.
5857    image_info: A 2-D float `Tensor` of shape `[num_images, 5]`
5858      containing image information Height, Width, Scale.
5859    anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]`
5860      describing the anchor boxes.
5861      Boxes are formatted in the form `[y1, x1, y2, x2]`.
5862    nms_threshold: A scalar float `Tensor` for non-maximal-suppression
5863      threshold. Defaults to 0.7.
5864    pre_nms_topn: A scalar int `Tensor` for the number of
5865      top scoring boxes to be used as input. Defaults to 6000.
5866    min_size: A scalar float `Tensor`. Any box that has a smaller size
5867      than min_size will be discarded. Defaults to 16.
5868    post_nms_topn: An integer. Maximum number of rois in the output.
5869    name: A name for this operation (optional).
5870
5871  Returns:
5872    rois: Region of interest boxes sorted by their scores.
5873    roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`.
5874  """
5875  return gen_image_ops.generate_bounding_box_proposals(
5876      scores=scores,
5877      bbox_deltas=bbox_deltas,
5878      image_info=image_info,
5879      anchors=anchors,
5880      nms_threshold=nms_threshold,
5881      pre_nms_topn=pre_nms_topn,
5882      min_size=min_size,
5883      post_nms_topn=post_nms_topn,
5884      name=name)
5885