xref: /aosp_15_r20/external/tensorflow/tensorflow/python/ops/string_ops.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# -*- coding: utf-8 -*-
2# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15# ==============================================================================
16
17"""Operations for working with string Tensors."""
18
19import numpy as np
20
21from tensorflow.python.framework import constant_op
22from tensorflow.python.framework import dtypes
23from tensorflow.python.framework import ops
24from tensorflow.python.framework import sparse_tensor
25from tensorflow.python.framework import tensor_util
26from tensorflow.python.ops import array_ops
27from tensorflow.python.ops import gen_parsing_ops
28from tensorflow.python.ops import gen_string_ops
29from tensorflow.python.ops import math_ops
30
31# go/tf-wildcard-import
32# pylint: disable=wildcard-import
33# pylint: disable=g-bad-import-order
34from tensorflow.python.ops.gen_string_ops import *
35from tensorflow.python.util import compat as util_compat
36from tensorflow.python.util import deprecation
37from tensorflow.python.util import dispatch
38from tensorflow.python.util.tf_export import tf_export
39# pylint: enable=g-bad-import-order
40# pylint: enable=wildcard-import
41
42
43# pylint: disable=redefined-builtin
44@tf_export("strings.regex_full_match")
45@dispatch.register_unary_elementwise_api
46@dispatch.add_dispatch_support
47def regex_full_match(input, pattern, name=None):
48  r"""Match elements of `input` with regex `pattern`.
49
50  Args:
51    input: string `Tensor`, the source strings to process.
52    pattern: string or scalar string `Tensor`, regular expression to use,
53      see more details at https://github.com/google/re2/wiki/Syntax
54    name: Name of the op.
55
56  Returns:
57    bool `Tensor` of the same shape as `input` with match results.
58  """
59  if isinstance(pattern, util_compat.bytes_or_text_types):
60    # When `pattern` is static through the life of the op we can
61    # use a version which performs the expensive regex compilation once at
62    # creation time.
63    return gen_string_ops.static_regex_full_match(
64        input=input, pattern=pattern, name=name)
65  return gen_string_ops.regex_full_match(
66      input=input, pattern=pattern, name=name)
67
68regex_full_match.__doc__ = gen_string_ops.regex_full_match.__doc__
69
70
71@tf_export(
72    "strings.regex_replace", v1=["strings.regex_replace", "regex_replace"])
73@dispatch.register_unary_elementwise_api
74@dispatch.add_dispatch_support
75@deprecation.deprecated_endpoints("regex_replace")
76def regex_replace(input, pattern, rewrite, replace_global=True, name=None):
77  r"""Replace elements of `input` matching regex `pattern` with `rewrite`.
78
79  >>> tf.strings.regex_replace("Text with tags.<br /><b>contains html</b>",
80  ...                          "<[^>]+>", " ")
81  <tf.Tensor: shape=(), dtype=string, numpy=b'Text with tags.  contains html '>
82
83  Args:
84    input: string `Tensor`, the source strings to process.
85    pattern: string or scalar string `Tensor`, regular expression to use,
86      see more details at https://github.com/google/re2/wiki/Syntax
87    rewrite: string or scalar string `Tensor`, value to use in match
88      replacement, supports backslash-escaped digits (\1 to \9) can be to insert
89      text matching corresponding parenthesized group.
90    replace_global: `bool`, if `True` replace all non-overlapping matches,
91      else replace only the first match.
92    name: A name for the operation (optional).
93
94  Returns:
95    string `Tensor` of the same shape as `input` with specified replacements.
96  """
97  if (isinstance(pattern, util_compat.bytes_or_text_types) and
98      isinstance(rewrite, util_compat.bytes_or_text_types)):
99    # When `pattern` and `rewrite` are static through the life of the op we can
100    # use a version which performs the expensive regex compilation once at
101    # creation time.
102    return gen_string_ops.static_regex_replace(
103        input=input, pattern=pattern,
104        rewrite=rewrite, replace_global=replace_global,
105        name=name)
106  return gen_string_ops.regex_replace(
107      input=input, pattern=pattern,
108      rewrite=rewrite, replace_global=replace_global,
109      name=name)
110
111
112@tf_export("strings.format")
113@dispatch.add_dispatch_support
114def string_format(template, inputs, placeholder="{}", summarize=3, name=None):
115  r"""Formats a string template using a list of tensors.
116
117  Formats a string template using a list of tensors, abbreviating tensors by
118  only printing the first and last `summarize` elements of each dimension
119  (recursively). If formatting only one tensor into a template, the tensor does
120  not have to be wrapped in a list.
121
122  Example:
123    Formatting a single-tensor template:
124
125    >>> tensor = tf.range(5)
126    >>> tf.strings.format("tensor: {}, suffix", tensor)
127    <tf.Tensor: shape=(), dtype=string, numpy=b'tensor: [0 1 2 3 4], suffix'>
128
129    Formatting a multi-tensor template:
130
131    >>> tensor_a = tf.range(2)
132    >>> tensor_b = tf.range(1, 4, 2)
133    >>> tf.strings.format("a: {}, b: {}, suffix", (tensor_a, tensor_b))
134    <tf.Tensor: shape=(), dtype=string, numpy=b'a: [0 1], b: [1 3], suffix'>
135
136
137  Args:
138    template: A string template to format tensor values into.
139    inputs: A list of `Tensor` objects, or a single Tensor.
140      The list of tensors to format into the template string. If a solitary
141      tensor is passed in, the input tensor will automatically be wrapped as a
142      list.
143    placeholder: An optional `string`. Defaults to `{}`.
144      At each placeholder occurring in the template, a subsequent tensor
145      will be inserted.
146    summarize: An optional `int`. Defaults to `3`.
147      When formatting the tensors, show the first and last `summarize`
148      entries of each tensor dimension (recursively). If set to -1, all
149      elements of the tensor will be shown.
150    name: A name for the operation (optional).
151
152  Returns:
153    A scalar `Tensor` of type `string`.
154
155  Raises:
156    ValueError: if the number of placeholders does not match the number of
157      inputs.
158  """
159  # If there is only one tensor to format, we will automatically wrap it in a
160  # list to simplify the user experience
161  if tensor_util.is_tf_type(inputs):
162    inputs = [inputs]
163  if template.count(placeholder) != len(inputs):
164    raise ValueError(f"The template expects {template.count(placeholder)} "
165                     f"tensors, but the inputs only has {len(inputs)}. "
166                     "Please ensure the number of placeholders in template "
167                     "matches inputs length.")
168
169  return gen_string_ops.string_format(inputs,
170                                      template=template,
171                                      placeholder=placeholder,
172                                      summarize=summarize,
173                                      name=name)
174
175
176# Note: tf.strings.split is exported in ragged/ragged_string_ops.py, which
177# defines a wrapper for this function.
178def string_split(source, sep=None, skip_empty=True, delimiter=None):  # pylint: disable=invalid-name
179  """Split elements of `source` based on `delimiter` into a `SparseTensor`.
180
181  Let N be the size of source (typically N will be the batch size). Split each
182  element of `source` based on `delimiter` and return a `SparseTensor`
183  containing the split tokens. Empty tokens are ignored.
184
185  If `sep` is an empty string, each element of the `source` is split
186  into individual strings, each containing one byte. (This includes splitting
187  multibyte sequences of UTF-8.) If delimiter contains multiple bytes, it is
188  treated as a set of delimiters with each considered a potential split point.
189
190  For example:
191  N = 2, source[0] is 'hello world' and source[1] is 'a b c', then the output
192  will be
193
194  st.indices = [0, 0;
195                0, 1;
196                1, 0;
197                1, 1;
198                1, 2]
199  st.shape = [2, 3]
200  st.values = ['hello', 'world', 'a', 'b', 'c']
201
202  Args:
203    source: `1-D` string `Tensor`, the strings to split.
204    sep: `0-D` string `Tensor`, the delimiter character, the string should
205      be length 0 or 1. Default is ' '.
206    skip_empty: A `bool`. If `True`, skip the empty strings from the result.
207    delimiter: deprecated alias for `sep`.
208
209  Raises:
210    ValueError: If delimiter is not a string.
211
212  Returns:
213    A `SparseTensor` of rank `2`, the strings split according to the delimiter.
214    The first column of the indices corresponds to the row in `source` and the
215    second column corresponds to the index of the split component in this row.
216  """
217  delimiter = deprecation.deprecated_argument_lookup(
218      "sep", sep, "delimiter", delimiter)
219
220  if delimiter is None:
221    delimiter = " "
222  delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string)
223  source = ops.convert_to_tensor(source, dtype=dtypes.string)
224
225  indices, values, shape = gen_string_ops.string_split(
226      source, delimiter=delimiter, skip_empty=skip_empty)
227  indices.set_shape([None, 2])
228  values.set_shape([None])
229  shape.set_shape([2])
230  return sparse_tensor.SparseTensor(indices, values, shape)
231
232
233# Note: tf.strings.split is exported in ragged/ragged_string_ops.py, which
234# defines a wrapper for this function.
235def string_split_v2(source, sep=None, maxsplit=-1):
236  """Split elements of `source` based on `sep` into a `SparseTensor`.
237
238  Let N be the size of source (typically N will be the batch size). Split each
239  element of `source` based on `sep` and return a `SparseTensor`
240  containing the split tokens. Empty tokens are ignored.
241
242  For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
243  then the output will be
244
245  st.indices = [0, 0;
246                0, 1;
247                1, 0;
248                1, 1;
249                1, 2]
250  st.shape = [2, 3]
251  st.values = ['hello', 'world', 'a', 'b', 'c']
252
253  If `sep` is given, consecutive delimiters are not grouped together and are
254  deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
255  sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
256  string, consecutive whitespace are regarded as a single separator, and the
257  result will contain no empty strings at the start or end if the string has
258  leading or trailing whitespace.
259
260  Note that the above mentioned behavior matches python's str.split.
261
262  Args:
263    source: `1-D` string `Tensor`, the strings to split.
264    sep: `0-D` string `Tensor`, the delimiter character.
265    maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
266
267  Raises:
268    ValueError: If sep is not a string.
269
270  Returns:
271    A `SparseTensor` of rank `2`, the strings split according to the delimiter.
272    The first column of the indices corresponds to the row in `source` and the
273    second column corresponds to the index of the split component in this row.
274  """
275  if sep is None:
276    sep = ""
277  sep = ops.convert_to_tensor(sep, dtype=dtypes.string)
278  source = ops.convert_to_tensor(source, dtype=dtypes.string)
279
280  indices, values, shape = gen_string_ops.string_split_v2(
281      source, sep=sep, maxsplit=maxsplit)
282  indices.set_shape([None, 2])
283  values.set_shape([None])
284  shape.set_shape([2])
285  return sparse_tensor.SparseTensor(indices, values, shape)
286
287
288def _reduce_join_reduction_dims(x, axis):
289  """Returns range(rank(x) - 1, 0, -1) if axis is None; or axis otherwise."""
290  if axis is not None:
291    return axis
292  else:
293    # Fast path: avoid creating Rank and Range ops if ndims is known.
294    if x.get_shape().ndims is not None:
295      return constant_op.constant(
296          np.arange(x.get_shape().ndims - 1, -1, -1), dtype=dtypes.int32)
297
298    # Otherwise, we rely on Range and Rank to do the right thing at run-time.
299    return math_ops.range(array_ops.rank(x) - 1, -1, -1)
300
301
302@tf_export(v1=["strings.reduce_join", "reduce_join"])
303@dispatch.add_dispatch_support
304@deprecation.deprecated_args(None,
305                             "keep_dims is deprecated, use keepdims instead",
306                             "keep_dims")
307@deprecation.deprecated_endpoints("reduce_join")
308def reduce_join(inputs, axis=None,  # pylint: disable=missing-docstring
309                keep_dims=None,
310                separator="",
311                name=None,
312                reduction_indices=None,
313                keepdims=None):
314  keepdims = deprecation.deprecated_argument_lookup("keepdims", keepdims,
315                                                    "keep_dims", keep_dims)
316  if keep_dims is None:
317    keep_dims = False
318  axis = deprecation.deprecated_argument_lookup("axis", axis,
319                                                "reduction_indices",
320                                                reduction_indices)
321  return reduce_join_v2(
322      inputs=inputs,
323      axis=axis,
324      keepdims=keepdims,
325      separator=separator,
326      name=name)
327
328
329@tf_export("strings.reduce_join", v1=[])
330@dispatch.add_dispatch_support
331def reduce_join_v2(  # pylint: disable=missing-docstring
332    inputs,
333    axis=None,
334    keepdims=False,
335    separator="",
336    name=None):
337  """Joins all strings into a single string, or joins along an axis.
338
339  This is the reduction operation for the elementwise `tf.strings.join` op.
340
341  >>> tf.strings.reduce_join([['abc','123'],
342  ...                         ['def','456']]).numpy()
343  b'abc123def456'
344  >>> tf.strings.reduce_join([['abc','123'],
345  ...                         ['def','456']], axis=-1).numpy()
346  array([b'abc123', b'def456'], dtype=object)
347  >>> tf.strings.reduce_join([['abc','123'],
348  ...                         ['def','456']],
349  ...                        axis=-1,
350  ...                        separator=" ").numpy()
351  array([b'abc 123', b'def 456'], dtype=object)
352
353  Args:
354    inputs: A `tf.string` tensor.
355    axis: Which axis to join along. The default behavior is to join all
356      elements, producing a scalar.
357    keepdims: If true, retains reduced dimensions with length 1.
358    separator: a string added between each string being joined.
359    name: A name for the operation (optional).
360
361  Returns:
362    A `tf.string` tensor.
363  """
364  with ops.name_scope(None, "ReduceJoin", [inputs, axis]):
365    inputs_t = ops.convert_to_tensor(inputs)
366    axis = _reduce_join_reduction_dims(inputs_t, axis)
367    return gen_string_ops.reduce_join(
368        inputs=inputs_t,
369        reduction_indices=axis,
370        keep_dims=keepdims,
371        separator=separator,
372        name=name)
373
374reduce_join.__doc__ = reduce_join_v2.__doc__
375
376
377# This wrapper provides backwards compatibility for code that predates the
378# unit argument and that passed 'name' as a positional argument.
379@tf_export(v1=["strings.length"])
380@dispatch.register_unary_elementwise_api
381@dispatch.add_dispatch_support
382def string_length(input, name=None, unit="BYTE"):
383  """Computes the length of each string given in the input tensor.
384
385  >>> strings = tf.constant(['Hello','TensorFlow', '��'])
386  >>> tf.strings.length(strings).numpy() # default counts bytes
387  array([ 5, 10, 4], dtype=int32)
388  >>> tf.strings.length(strings, unit="UTF8_CHAR").numpy()
389  array([ 5, 10, 1], dtype=int32)
390
391  Args:
392    input: A `Tensor` of type `string`. The strings for which to compute the
393      length for each element.
394    name: A name for the operation (optional).
395    unit: An optional `string` from: `"BYTE", "UTF8_CHAR"`. Defaults to
396      `"BYTE"`. The unit that is counted to compute string length.  One of:
397        `"BYTE"` (for the number of bytes in each string) or `"UTF8_CHAR"` (for
398        the number of UTF-8 encoded Unicode code points in each string). Results
399        are undefined if `unit=UTF8_CHAR` and the `input` strings do not contain
400        structurally valid UTF-8.
401
402  Returns:
403    A `Tensor` of type `int32`, containing the length of the input string in
404    the same element of the input tensor.
405  """
406  return gen_string_ops.string_length(input, unit=unit, name=name)
407
408
409@tf_export("strings.length", v1=[])
410@dispatch.register_unary_elementwise_api
411@dispatch.add_dispatch_support
412def string_length_v2(input, unit="BYTE", name=None):
413  return gen_string_ops.string_length(input, unit=unit, name=name)
414
415
416string_length_v2.__doc__ = gen_string_ops.string_length.__doc__
417
418
419@tf_export(v1=["substr"])
420@dispatch.register_unary_elementwise_api
421@dispatch.add_dispatch_support
422@deprecation.deprecated(None, "Use `tf.strings.substr` instead of `tf.substr`.")
423def substr_deprecated(input, pos, len, name=None, unit="BYTE"):
424  return substr(input, pos, len, name=name, unit=unit)
425
426substr_deprecated.__doc__ = gen_string_ops.substr.__doc__
427
428
429@tf_export(v1=["strings.substr"])
430@dispatch.register_unary_elementwise_api
431@dispatch.add_dispatch_support
432def substr(input, pos, len, name=None, unit="BYTE"):
433  return gen_string_ops.substr(input, pos, len, unit=unit, name=name)
434
435substr.__doc__ = gen_string_ops.substr.__doc__
436
437
438@tf_export("strings.substr", v1=[])
439@dispatch.register_unary_elementwise_api
440@dispatch.add_dispatch_support
441def substr_v2(input, pos, len, unit="BYTE", name=None):
442  return gen_string_ops.substr(input, pos, len, unit=unit, name=name)
443
444substr_v2.__doc__ = gen_string_ops.substr.__doc__
445
446
447ops.NotDifferentiable("RegexReplace")
448ops.NotDifferentiable("StringToHashBucket")
449ops.NotDifferentiable("StringToHashBucketFast")
450ops.NotDifferentiable("StringToHashBucketStrong")
451ops.NotDifferentiable("ReduceJoin")
452ops.NotDifferentiable("StringJoin")
453ops.NotDifferentiable("StringSplit")
454ops.NotDifferentiable("AsString")
455ops.NotDifferentiable("EncodeBase64")
456ops.NotDifferentiable("DecodeBase64")
457
458
459@tf_export("strings.to_number", v1=[])
460@dispatch.register_unary_elementwise_api
461@dispatch.add_dispatch_support
462def string_to_number(input, out_type=dtypes.float32, name=None):
463  r"""Converts each string in the input Tensor to the specified numeric type.
464
465  (Note that int32 overflow results in an error while float overflow
466  results in a rounded value.)
467
468  Examples:
469
470  >>> tf.strings.to_number("1.55")
471  <tf.Tensor: shape=(), dtype=float32, numpy=1.55>
472  >>> tf.strings.to_number("3", tf.int32)
473  <tf.Tensor: shape=(), dtype=int32, numpy=3>
474
475  Args:
476    input: A `Tensor` of type `string`.
477    out_type: An optional `tf.DType` from: `tf.float32, tf.float64, tf.int32,
478      tf.int64`. Defaults to `tf.float32`.
479      The numeric type to interpret each string in `string_tensor` as.
480    name: A name for the operation (optional).
481
482  Returns:
483    A `Tensor` of type `out_type`.
484  """
485  return gen_parsing_ops.string_to_number(input, out_type, name)
486
487
488@tf_export(v1=["strings.to_number", "string_to_number"])
489@dispatch.add_dispatch_support
490def string_to_number_v1(
491    string_tensor=None,
492    out_type=dtypes.float32,
493    name=None,
494    input=None):
495  string_tensor = deprecation.deprecated_argument_lookup(
496      "input", input, "string_tensor", string_tensor)
497  return gen_parsing_ops.string_to_number(string_tensor, out_type, name)
498
499string_to_number_v1.__doc__ = gen_parsing_ops.string_to_number.__doc__
500
501
502@tf_export("strings.to_hash_bucket", v1=[])
503@dispatch.register_unary_elementwise_api
504@dispatch.add_dispatch_support
505def string_to_hash_bucket(input, num_buckets, name=None):
506  # pylint: disable=line-too-long
507  r"""Converts each string in the input Tensor to its hash mod by a number of buckets.
508
509  The hash function is deterministic on the content of the string within the
510  process.
511
512  Note that the hash function may change from time to time.
513  This functionality will be deprecated and it's recommended to use
514  `tf.strings.to_hash_bucket_fast()` or `tf.strings.to_hash_bucket_strong()`.
515
516  Examples:
517
518  >>> tf.strings.to_hash_bucket(["Hello", "TensorFlow", "2.x"], 3)
519  <tf.Tensor: shape=(3,), dtype=int64, numpy=array([2, 0, 1])>
520
521  Args:
522    input: A `Tensor` of type `string`.
523    num_buckets: An `int` that is `>= 1`. The number of buckets.
524    name: A name for the operation (optional).
525
526  Returns:
527    A `Tensor` of type `int64`.
528  """
529  # pylint: enable=line-too-long
530  return gen_string_ops.string_to_hash_bucket(input, num_buckets, name)
531
532
533@tf_export(v1=["strings.to_hash_bucket", "string_to_hash_bucket"])
534@dispatch.register_unary_elementwise_api
535@dispatch.add_dispatch_support
536def string_to_hash_bucket_v1(  # pylint: disable=missing-function-docstring
537    string_tensor=None,
538    num_buckets=None,
539    name=None,
540    input=None):
541  string_tensor = deprecation.deprecated_argument_lookup(
542      "input", input, "string_tensor", string_tensor)
543  return gen_string_ops.string_to_hash_bucket(string_tensor, num_buckets, name)
544
545string_to_hash_bucket_v1.__doc__ = gen_string_ops.string_to_hash_bucket.__doc__
546
547
548@tf_export("strings.join", v1=["strings.join", "string_join"])
549@dispatch.add_dispatch_support
550@deprecation.deprecated_endpoints("string_join")
551def string_join(inputs, separator="", name=None):
552  """Perform element-wise concatenation of a list of string tensors.
553
554  Given a list of string tensors of same shape, performs element-wise
555  concatenation of the strings of the same index in all tensors.
556
557
558  >>> tf.strings.join(['abc','def']).numpy()
559  b'abcdef'
560  >>> tf.strings.join([['abc','123'],
561  ...                  ['def','456'],
562  ...                  ['ghi','789']]).numpy()
563  array([b'abcdefghi', b'123456789'], dtype=object)
564  >>> tf.strings.join([['abc','123'],
565  ...                  ['def','456']],
566  ...                  separator=" ").numpy()
567  array([b'abc def', b'123 456'], dtype=object)
568
569  The reduction version of this elementwise operation is
570  `tf.strings.reduce_join`
571
572  Args:
573    inputs: A list of `tf.Tensor` objects of same size and `tf.string` dtype.
574    separator: A string added between each string being joined.
575    name: A name for the operation (optional).
576
577  Returns:
578    A `tf.string` tensor.
579  """
580  return gen_string_ops.string_join(inputs, separator=separator, name=name)
581
582
583@tf_export("strings.unsorted_segment_join")
584@dispatch.add_dispatch_support
585def unsorted_segment_join(inputs,
586                          segment_ids,
587                          num_segments,
588                          separator="",
589                          name=None):
590  """Joins the elements of `inputs` based on `segment_ids`.
591
592  Computes the string join along segments of a tensor.
593
594  Given `segment_ids` with rank `N` and `data` with rank `N+M`:
595
596  ```
597  output[i, k1...kM] = strings.join([data[j1...jN, k1...kM])
598  ```
599
600  where the join is over all `[j1...jN]` such that `segment_ids[j1...jN] = i`.
601
602  Strings are joined in row-major order.
603
604  For example:
605
606  >>> inputs = ['this', 'a', 'test', 'is']
607  >>> segment_ids = [0, 1, 1, 0]
608  >>> num_segments = 2
609  >>> separator = ' '
610  >>> tf.strings.unsorted_segment_join(inputs, segment_ids, num_segments,
611  ...                                  separator).numpy()
612  array([b'this is', b'a test'], dtype=object)
613
614  >>> inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']]
615  >>> segment_ids = [1, 0, 1]
616  >>> num_segments = 2
617  >>> tf.strings.unsorted_segment_join(inputs, segment_ids, num_segments,
618  ...                                  separator=':').numpy()
619  array([[b'Y', b'6', b'6'],
620         [b'Y:p', b'q:G', b'c:a']], dtype=object)
621
622  Args:
623    inputs: A list of `tf.Tensor` objects of type `tf.string`.
624    segment_ids: A tensor whose shape is a prefix of `inputs.shape` and whose
625      type must be `tf.int32` or `tf.int64`. Negative segment ids are not
626      supported.
627    num_segments: A scalar of type `tf.int32` or `tf.int64`. Must be
628      non-negative and larger than any segment id.
629    separator: The separator to use when joining. Defaults to `""`.
630    name: A name for the operation (optional).
631
632  Returns:
633    A `tf.string` tensor representing the concatenated values, using the given
634    separator.
635  """
636  return gen_string_ops.unsorted_segment_join(
637      inputs, segment_ids, num_segments, separator=separator, name=name)
638
639
640# Register elementwise ops that don't have Python wrappers.
641dispatch.register_unary_elementwise_api(gen_string_ops.as_string)
642dispatch.register_unary_elementwise_api(gen_string_ops.decode_base64)
643dispatch.register_unary_elementwise_api(gen_string_ops.encode_base64)
644dispatch.register_unary_elementwise_api(gen_string_ops.string_lower)
645dispatch.register_unary_elementwise_api(gen_string_ops.string_upper)
646dispatch.register_unary_elementwise_api(gen_string_ops.unicode_transcode)
647dispatch.register_unary_elementwise_api(gen_string_ops.string_strip)
648dispatch.register_unary_elementwise_api(
649    gen_string_ops.string_to_hash_bucket_fast)
650dispatch.register_unary_elementwise_api(
651    gen_string_ops.string_to_hash_bucket_strong)
652dispatch.register_unary_elementwise_api(gen_string_ops.unicode_script)
653