1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_ 17 #define TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_ 18 19 #include <array> 20 21 #include "tensorflow/core/platform/status.h" 22 #include "tensorflow/core/util/padding.h" 23 24 namespace tensorflow { 25 // GetWindowedOutputSize(): Given an input tensor, kernel, stride and padding 26 // type, the function computes the output and padding dimensions. 27 // 28 // For example, ignoring batches or multiple features, a 1D convolution 29 // takes as input a 1D tensor of shape (H), and convolves it with a filter of 30 // shape (K). 31 // 32 // It also takes in a few additional parameters: 33 // 34 // Stride (S): the stride with which we apply the filters. This is the offset 35 // between locations where we apply the filters. A larger stride 36 // means that the output will be spatially smaller. 37 // 38 // Padding (P): the padding we apply to the input tensor along each 39 // dimension. This is usually used to make sure that the spatial dimensions 40 // do not shrink when we progress with convolutions. This function supports two 41 // types of padding. 42 // SAME: the pad value is computed so that the output will have size H/S. 43 // VALID: no padding is carried out. 44 // If you want to use EXPLICIT padding, GetWindowedOutputSizeVerbose must be 45 // called instead. Note the padded area is zero-filled. 46 // 47 // The output dimensions for convolution and many other operations, when given 48 // all the parameters above, are as follows: 49 // - When Padding = SAME: the output size is (H'), where 50 // H' = ceil(float(H) / float(S)) 51 // where ceil is the ceiling function. The number of padded cells 52 // is computed as: 53 // Pc = ((H' - 1) * S + K - H) / 2 54 // When the stride is 1, the expression simplifies to 55 // H' = H, Pc = (K-1)/2. 56 // This is where SAME comes from - the output has the same size as the input 57 // has. 58 // 59 // - When Padding = VALID: the output size is computed as 60 // H' = ceil(float(H - K + 1) / float(S)) 61 // and the number of padded cells is always zero. 62 // When the stride is 1, the expression simplifies to 63 // H' = H-K+1. 64 // 65 // For convolution, mathematically, the output value at location (r') 66 // is the inner product of two vectors: the chunk of input at 67 // ((r'*S-Pr) : (r'*S-Pr+K)), 68 // and the filter. 69 // 70 // For 2D and 3D convolutions, the spatial dimensions are orthogonal, so the 71 // size and padding of each spatial dimension can be computed by calling 72 // GetWindowedOutputSize separately for each dimension. 73 // 74 Status GetWindowedOutputSize(int64_t input_size, int64_t filter_size, 75 int64_t stride, Padding padding_type, 76 int64_t* output_size, int64_t* padding_size); 77 78 // The V2 version computes the same outputs with arbitrary dilation_rate. 79 // The output dimensions are computed as follows: 80 // - When adding dilation_rate (D), we compute an effective filter size (K'): 81 // K' = (K - 1) * D + 1 82 // - When Padding = SAME: the output size is (H'), where 83 // H' = ceil(float(H) / float(S)) 84 // where ceil is the ceiling function. The number of padded cells 85 // is computed as: 86 // Pc = ((H' - 1) * S + K' - H) / 2 87 // When the stride is 1, the expression simplifies to 88 // H' = H, Pc = (K'-1)/2. 89 // This is where SAME comes from - the output has the same size as the input 90 // has. 91 // 92 // - When Padding = VALID: the output size is computed as 93 // H' = ceil(float(H - K' + 1) / float(S)) 94 // and the number of padded cells is always zero. 95 // When the stride is 1, the expression simplifies to 96 // H' = H-K'+1. 97 // 98 // If you want to use EXPLICIT padding, GetWindowedOutputSizeVerboseV2 must be 99 // called instead 100 // 101 // TODO(b/67112639): Merge V2 versions and the original versions eventually. 102 Status GetWindowedOutputSizeV2(int64_t input_size, int64_t filter_size, 103 int64_t dilation_rate, int64_t stride, 104 Padding padding_type, int64_t* output_size, 105 int64_t* padding_size); 106 107 // Returns the same output dimensions as in GetWindowedOutputSize, but returns 108 // verbose padding dimensions (before/after), and EXPLICIT padding is supported. 109 // When padding_type is EXPLICIT, *padding_before and *padding_after must 110 // already point to initialized integers with the padding amounts. Otherwise, 111 // *padding_before and *padding_after are set by this function, and any 112 // excess padding (caused by an odd padding size value) is added to the 113 // 'padding_after' dimension. 114 Status GetWindowedOutputSizeVerbose(int64_t input_size, int64_t filter_size, 115 int64_t stride, Padding padding_type, 116 int64_t* output_size, 117 int64_t* padding_before, 118 int64_t* padding_after); 119 120 // The V2 version computes the same outputs with arbitrary dilation_rate. For 121 // detailed equations, refer to the comments for GetWindowedOutputSizeV2(). 122 Status GetWindowedOutputSizeVerboseV2(int64_t input_size, int64_t filter_size, 123 int64_t dilation_rate, int64_t stride, 124 Padding padding_type, 125 int64_t* output_size, 126 int64_t* padding_before, 127 int64_t* padding_after); 128 129 // Given an input tensor, kernel, stride and padding type, populates the 3D size 130 // of the output tensor and padding to be applied to the input tensor at the 131 // lower end of every dimension. Use for 3D convolutions, where the input data 132 // is padded with zeros, as well as for 3D avg/max pooling, where the input data 133 // is padded with invalid values that are not considered for pooling. EXPLICIT 134 // padding is not supported. 135 Status Get3dOutputSize(const std::array<int64_t, 3>& input, 136 const std::array<int64_t, 3>& window, 137 const std::array<int64_t, 3>& strides, 138 Padding padding_type, std::array<int64_t, 3>* output_ptr, 139 std::array<int64_t, 3>* padding_ptr); 140 141 // The V2 version computes the same outputs with arbitrary dilation_rate. For 142 // detailed equations, refer to the comments for GetWindowedOutputSizeV2(). 143 Status Get3dOutputSizeV2(const std::array<int64_t, 3>& input, 144 const std::array<int64_t, 3>& window, 145 const std::array<int64_t, 3>& dilations, 146 const std::array<int64_t, 3>& strides, 147 Padding padding_type, 148 std::array<int64_t, 3>* output_ptr, 149 std::array<int64_t, 3>* padding_ptr); 150 151 } // namespace tensorflow 152 #endif // TENSORFLOW_CORE_FRAMEWORK_KERNEL_SHAPE_UTIL_H_ 153