1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H 12 13 namespace Eigen { 14 15 /** \class TensorImagePatch 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Patch extraction specialized for image processing. 19 * This assumes that the input has a least 3 dimensions ordered as follow: 20 * 1st dimension: channels (of size d) 21 * 2nd dimension: rows (of size r) 22 * 3rd dimension: columns (of size c) 23 * There can be additional dimensions such as time (for video) or batch (for 24 * bulk processing after the first 3. 25 * Calling the image patch code with patch_rows and patch_cols is equivalent 26 * to calling the regular patch extraction code with parameters d, patch_rows, 27 * patch_cols, and 1 for all the additional dimensions. 28 */ 29 namespace internal { 30 31 template<DenseIndex Rows, DenseIndex Cols, typename XprType> 32 struct traits<TensorImagePatchOp<Rows, Cols, XprType> > : public traits<XprType> 33 { 34 typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; 35 typedef traits<XprType> XprTraits; 36 typedef typename XprTraits::StorageKind StorageKind; 37 typedef typename XprTraits::Index Index; 38 typedef typename XprType::Nested Nested; 39 typedef typename remove_reference<Nested>::type _Nested; 40 static const int NumDimensions = XprTraits::NumDimensions + 1; 41 static const int Layout = XprTraits::Layout; 42 typedef typename XprTraits::PointerType PointerType; 43 }; 44 45 template<DenseIndex Rows, DenseIndex Cols, typename XprType> 46 struct eval<TensorImagePatchOp<Rows, Cols, XprType>, Eigen::Dense> 47 { 48 typedef const TensorImagePatchOp<Rows, Cols, XprType>& type; 49 }; 50 51 template<DenseIndex Rows, DenseIndex Cols, typename XprType> 52 struct nested<TensorImagePatchOp<Rows, Cols, XprType>, 1, typename eval<TensorImagePatchOp<Rows, Cols, XprType> >::type> 53 { 54 typedef TensorImagePatchOp<Rows, Cols, XprType> type; 55 }; 56 57 template <typename Self, bool Vectorizable> 58 struct ImagePatchCopyOp { 59 typedef typename Self::Index Index; 60 typedef typename Self::Scalar Scalar; 61 typedef typename Self::Impl Impl; 62 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( 63 const Self& self, const Index num_coeff_to_copy, const Index dst_index, 64 Scalar* dst_data, const Index src_index) { 65 const Impl& impl = self.impl(); 66 for (Index i = 0; i < num_coeff_to_copy; ++i) { 67 dst_data[dst_index + i] = impl.coeff(src_index + i); 68 } 69 } 70 }; 71 72 template <typename Self> 73 struct ImagePatchCopyOp<Self, true> { 74 typedef typename Self::Index Index; 75 typedef typename Self::Scalar Scalar; 76 typedef typename Self::Impl Impl; 77 typedef typename packet_traits<Scalar>::type Packet; 78 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( 79 const Self& self, const Index num_coeff_to_copy, const Index dst_index, 80 Scalar* dst_data, const Index src_index) { 81 const Impl& impl = self.impl(); 82 const Index packet_size = internal::unpacket_traits<Packet>::size; 83 const Index vectorized_size = 84 (num_coeff_to_copy / packet_size) * packet_size; 85 for (Index i = 0; i < vectorized_size; i += packet_size) { 86 Packet p = impl.template packet<Unaligned>(src_index + i); 87 internal::pstoret<Scalar, Packet, Unaligned>(dst_data + dst_index + i, p); 88 } 89 for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) { 90 dst_data[dst_index + i] = impl.coeff(src_index + i); 91 } 92 } 93 }; 94 95 template <typename Self> 96 struct ImagePatchPaddingOp { 97 typedef typename Self::Index Index; 98 typedef typename Self::Scalar Scalar; 99 typedef typename packet_traits<Scalar>::type Packet; 100 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( 101 const Index num_coeff_to_pad, const Scalar padding_value, 102 const Index dst_index, Scalar* dst_data) { 103 const Index packet_size = internal::unpacket_traits<Packet>::size; 104 const Packet padded_packet = internal::pset1<Packet>(padding_value); 105 const Index vectorized_size = 106 (num_coeff_to_pad / packet_size) * packet_size; 107 for (Index i = 0; i < vectorized_size; i += packet_size) { 108 internal::pstoret<Scalar, Packet, Unaligned>(dst_data + dst_index + i, 109 padded_packet); 110 } 111 for (Index i = vectorized_size; i < num_coeff_to_pad; ++i) { 112 dst_data[dst_index + i] = padding_value; 113 } 114 } 115 }; 116 117 } // end namespace internal 118 119 template<DenseIndex Rows, DenseIndex Cols, typename XprType> 120 class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprType>, ReadOnlyAccessors> 121 { 122 public: 123 typedef typename Eigen::internal::traits<TensorImagePatchOp>::Scalar Scalar; 124 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 125 typedef typename XprType::CoeffReturnType CoeffReturnType; 126 typedef typename Eigen::internal::nested<TensorImagePatchOp>::type Nested; 127 typedef typename Eigen::internal::traits<TensorImagePatchOp>::StorageKind StorageKind; 128 typedef typename Eigen::internal::traits<TensorImagePatchOp>::Index Index; 129 130 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, 131 DenseIndex row_strides, DenseIndex col_strides, 132 DenseIndex in_row_strides, DenseIndex in_col_strides, 133 DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, 134 PaddingType padding_type, Scalar padding_value) 135 : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), 136 m_row_strides(row_strides), m_col_strides(col_strides), 137 m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), 138 m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), 139 m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), 140 m_padding_type(padding_type), m_padding_value(padding_value) {} 141 142 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, 143 DenseIndex row_strides, DenseIndex col_strides, 144 DenseIndex in_row_strides, DenseIndex in_col_strides, 145 DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, 146 DenseIndex padding_top, DenseIndex padding_bottom, 147 DenseIndex padding_left, DenseIndex padding_right, 148 Scalar padding_value) 149 : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), 150 m_row_strides(row_strides), m_col_strides(col_strides), 151 m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), 152 m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), 153 m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), 154 m_padding_left(padding_left), m_padding_right(padding_right), 155 m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} 156 157 158 EIGEN_DEVICE_FUNC 159 DenseIndex patch_rows() const { return m_patch_rows; } 160 EIGEN_DEVICE_FUNC 161 DenseIndex patch_cols() const { return m_patch_cols; } 162 EIGEN_DEVICE_FUNC 163 DenseIndex row_strides() const { return m_row_strides; } 164 EIGEN_DEVICE_FUNC 165 DenseIndex col_strides() const { return m_col_strides; } 166 EIGEN_DEVICE_FUNC 167 DenseIndex in_row_strides() const { return m_in_row_strides; } 168 EIGEN_DEVICE_FUNC 169 DenseIndex in_col_strides() const { return m_in_col_strides; } 170 EIGEN_DEVICE_FUNC 171 DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } 172 EIGEN_DEVICE_FUNC 173 DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } 174 EIGEN_DEVICE_FUNC 175 bool padding_explicit() const { return m_padding_explicit; } 176 EIGEN_DEVICE_FUNC 177 DenseIndex padding_top() const { return m_padding_top; } 178 EIGEN_DEVICE_FUNC 179 DenseIndex padding_bottom() const { return m_padding_bottom; } 180 EIGEN_DEVICE_FUNC 181 DenseIndex padding_left() const { return m_padding_left; } 182 EIGEN_DEVICE_FUNC 183 DenseIndex padding_right() const { return m_padding_right; } 184 EIGEN_DEVICE_FUNC 185 PaddingType padding_type() const { return m_padding_type; } 186 EIGEN_DEVICE_FUNC 187 Scalar padding_value() const { return m_padding_value; } 188 189 EIGEN_DEVICE_FUNC 190 const typename internal::remove_all<typename XprType::Nested>::type& 191 expression() const { return m_xpr; } 192 193 protected: 194 typename XprType::Nested m_xpr; 195 const DenseIndex m_patch_rows; 196 const DenseIndex m_patch_cols; 197 const DenseIndex m_row_strides; 198 const DenseIndex m_col_strides; 199 const DenseIndex m_in_row_strides; 200 const DenseIndex m_in_col_strides; 201 const DenseIndex m_row_inflate_strides; 202 const DenseIndex m_col_inflate_strides; 203 const bool m_padding_explicit; 204 const DenseIndex m_padding_top; 205 const DenseIndex m_padding_bottom; 206 const DenseIndex m_padding_left; 207 const DenseIndex m_padding_right; 208 const PaddingType m_padding_type; 209 const Scalar m_padding_value; 210 }; 211 212 // Eval as rvalue 213 template<DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device> 214 struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> 215 { 216 typedef TensorImagePatchOp<Rows, Cols, ArgType> XprType; 217 typedef typename XprType::Index Index; 218 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 219 static const int NumDims = NumInputDims + 1; 220 typedef DSizes<Index, NumDims> Dimensions; 221 typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; 222 typedef TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, 223 Device> Self; 224 typedef TensorEvaluator<ArgType, Device> Impl; 225 typedef typename XprType::CoeffReturnType CoeffReturnType; 226 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 227 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 228 typedef StorageMemory<CoeffReturnType, Device> Storage; 229 typedef typename Storage::Type EvaluatorPointerType; 230 231 enum { 232 IsAligned = false, 233 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 234 BlockAccess = false, 235 PreferBlockAccess = true, 236 Layout = TensorEvaluator<ArgType, Device>::Layout, 237 CoordAccess = false, 238 RawAccess = false 239 }; 240 241 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 242 typedef internal::TensorBlockNotImplemented TensorBlock; 243 //===--------------------------------------------------------------------===// 244 245 EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device) 246 : m_device(device), m_impl(op.expression(), device) 247 { 248 EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); 249 250 m_paddingValue = op.padding_value(); 251 252 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 253 254 // Caches a few variables. 255 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 256 m_inputDepth = input_dims[0]; 257 m_inputRows = input_dims[1]; 258 m_inputCols = input_dims[2]; 259 } else { 260 m_inputDepth = input_dims[NumInputDims-1]; 261 m_inputRows = input_dims[NumInputDims-2]; 262 m_inputCols = input_dims[NumInputDims-3]; 263 } 264 265 m_row_strides = op.row_strides(); 266 m_col_strides = op.col_strides(); 267 268 // Input strides and effective input/patch size 269 m_in_row_strides = op.in_row_strides(); 270 m_in_col_strides = op.in_col_strides(); 271 m_row_inflate_strides = op.row_inflate_strides(); 272 m_col_inflate_strides = op.col_inflate_strides(); 273 // The "effective" input rows and input cols are the input rows and cols 274 // after inflating them with zeros. 275 // For examples, a 2x3 matrix with row_inflate_strides and 276 // col_inflate_strides of 2 comes from: 277 // A B C 278 // D E F 279 // 280 // to a matrix is 3 x 5: 281 // 282 // A . B . C 283 // . . . . . 284 // D . E . F 285 286 m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; 287 m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; 288 m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); 289 m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); 290 291 if (op.padding_explicit()) { 292 m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); 293 m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); 294 m_rowPaddingTop = op.padding_top(); 295 m_colPaddingLeft = op.padding_left(); 296 } else { 297 // Computing padding from the type 298 switch (op.padding_type()) { 299 case PADDING_VALID: 300 m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides)); 301 m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides)); 302 // Calculate the padding 303 m_rowPaddingTop = numext::maxi<Index>(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2); 304 m_colPaddingLeft = numext::maxi<Index>(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2); 305 break; 306 case PADDING_SAME: 307 m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides)); 308 m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides)); 309 // Calculate the padding 310 m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; 311 m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; 312 // The padding size calculation for PADDING_SAME has been updated to 313 // be consistent with how TensorFlow extracts its paddings. 314 m_rowPaddingTop = numext::maxi<Index>(0, m_rowPaddingTop); 315 m_colPaddingLeft = numext::maxi<Index>(0, m_colPaddingLeft); 316 break; 317 default: 318 eigen_assert(false && "unexpected padding"); 319 m_outputCols=0; // silence the uninitialised warning; 320 m_outputRows=0; //// silence the uninitialised warning; 321 } 322 } 323 eigen_assert(m_outputRows > 0); 324 eigen_assert(m_outputCols > 0); 325 326 // Dimensions for result of extraction. 327 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 328 // ColMajor 329 // 0: depth 330 // 1: patch_rows 331 // 2: patch_cols 332 // 3: number of patches 333 // 4 and beyond: anything else (such as batch). 334 m_dimensions[0] = input_dims[0]; 335 m_dimensions[1] = op.patch_rows(); 336 m_dimensions[2] = op.patch_cols(); 337 m_dimensions[3] = m_outputRows * m_outputCols; 338 for (int i = 4; i < NumDims; ++i) { 339 m_dimensions[i] = input_dims[i-1]; 340 } 341 } else { 342 // RowMajor 343 // NumDims-1: depth 344 // NumDims-2: patch_rows 345 // NumDims-3: patch_cols 346 // NumDims-4: number of patches 347 // NumDims-5 and beyond: anything else (such as batch). 348 m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; 349 m_dimensions[NumDims-2] = op.patch_rows(); 350 m_dimensions[NumDims-3] = op.patch_cols(); 351 m_dimensions[NumDims-4] = m_outputRows * m_outputCols; 352 for (int i = NumDims-5; i >= 0; --i) { 353 m_dimensions[i] = input_dims[i]; 354 } 355 } 356 357 // Strides for moving the patch in various dimensions. 358 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 359 m_colStride = m_dimensions[1]; 360 m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; 361 m_otherStride = m_patchStride * m_dimensions[3]; 362 } else { 363 m_colStride = m_dimensions[NumDims-2]; 364 m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; 365 m_otherStride = m_patchStride * m_dimensions[NumDims-4]; 366 } 367 368 // Strides for navigating through the input tensor. 369 m_rowInputStride = m_inputDepth; 370 m_colInputStride = m_inputDepth * m_inputRows; 371 m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; 372 373 // Fast representations of different variables. 374 m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride); 375 m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride); 376 m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); 377 m_fastInflateRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides); 378 m_fastInflateColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides); 379 m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff); 380 381 // Number of patches in the width dimension. 382 m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows); 383 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 384 m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]); 385 } else { 386 m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); 387 } 388 } 389 390 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 391 392 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { 393 m_impl.evalSubExprsIfNeeded(NULL); 394 return true; 395 } 396 397 #ifdef EIGEN_USE_THREADS 398 template <typename EvalSubExprsCallback> 399 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 400 EvaluatorPointerType, EvalSubExprsCallback done) { 401 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); 402 } 403 #endif // EIGEN_USE_THREADS 404 405 EIGEN_STRONG_INLINE void cleanup() { 406 m_impl.cleanup(); 407 } 408 409 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 410 { 411 // Patch index corresponding to the passed in index. 412 const Index patchIndex = index / m_fastPatchStride; 413 // Find the offset of the element wrt the location of the first element. 414 const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; 415 416 // Other ways to index this element. 417 const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; 418 const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; 419 420 // Calculate col index in the input original tensor. 421 const Index colIndex = patch2DIndex / m_fastOutputRows; 422 const Index colOffset = patchOffset / m_fastColStride; 423 const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; 424 const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0); 425 if (inputCol < 0 || inputCol >= m_input_cols_eff || 426 ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { 427 return Scalar(m_paddingValue); 428 } 429 430 // Calculate row index in the original input tensor. 431 const Index rowIndex = patch2DIndex - colIndex * m_outputRows; 432 const Index rowOffset = patchOffset - colOffset * m_colStride; 433 const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; 434 const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0); 435 if (inputRow < 0 || inputRow >= m_input_rows_eff || 436 ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { 437 return Scalar(m_paddingValue); 438 } 439 440 const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; 441 const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; 442 443 const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; 444 return m_impl.coeff(inputIndex); 445 } 446 447 template<int LoadMode> 448 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 449 { 450 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 451 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 452 453 if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { 454 return packetWithPossibleZero(index); 455 } 456 457 const Index indices[2] = {index, index + PacketSize - 1}; 458 const Index patchIndex = indices[0] / m_fastPatchStride; 459 if (patchIndex != indices[1] / m_fastPatchStride) { 460 return packetWithPossibleZero(index); 461 } 462 const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; 463 eigen_assert(otherIndex == indices[1] / m_fastOtherStride); 464 465 // Find the offset of the element wrt the location of the first element. 466 const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, 467 (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; 468 469 const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; 470 eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); 471 472 const Index colIndex = patch2DIndex / m_fastOutputRows; 473 const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; 474 475 // Calculate col indices in the original input tensor. 476 const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - 477 m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; 478 if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { 479 return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); 480 } 481 482 if (inputCols[0] == inputCols[1]) { 483 const Index rowIndex = patch2DIndex - colIndex * m_outputRows; 484 const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; 485 eigen_assert(rowOffsets[0] <= rowOffsets[1]); 486 // Calculate col indices in the original input tensor. 487 const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - 488 m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; 489 490 if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { 491 return internal::pset1<PacketReturnType>(Scalar(m_paddingValue)); 492 } 493 494 if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { 495 // no padding 496 const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1; 497 const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; 498 const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; 499 return m_impl.template packet<Unaligned>(inputIndex); 500 } 501 } 502 503 return packetWithPossibleZero(index); 504 } 505 506 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 507 508 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } 509 510 #ifdef EIGEN_USE_SYCL 511 // binding placeholder accessors to a command group handler for SYCL 512 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 513 m_impl.bind(cgh); 514 } 515 #endif 516 517 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowPaddingTop() const { return m_rowPaddingTop; } 518 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colPaddingLeft() const { return m_colPaddingLeft; } 519 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputRows() const { return m_outputRows; } 520 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outputCols() const { return m_outputCols; } 521 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userRowStride() const { return m_row_strides; } 522 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userColStride() const { return m_col_strides; } 523 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInRowStride() const { return m_in_row_strides; } 524 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index userInColStride() const { return m_in_col_strides; } 525 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowInflateStride() const { return m_row_inflate_strides; } 526 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colInflateStride() const { return m_col_inflate_strides; } 527 528 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 529 costPerCoeff(bool vectorized) const { 530 // We conservatively estimate the cost for the code path where the computed 531 // index is inside the original image and 532 // TensorEvaluator<ArgType, Device>::CoordAccess is false. 533 const double compute_cost = 3 * TensorOpCost::DivCost<Index>() + 534 6 * TensorOpCost::MulCost<Index>() + 535 8 * TensorOpCost::MulCost<Index>(); 536 return m_impl.costPerCoeff(vectorized) + 537 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); 538 } 539 540 protected: 541 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const 542 { 543 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 544 EIGEN_UNROLL_LOOP 545 for (int i = 0; i < PacketSize; ++i) { 546 values[i] = coeff(index+i); 547 } 548 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 549 return rslt; 550 } 551 552 Dimensions m_dimensions; 553 554 Index m_otherStride; 555 Index m_patchStride; 556 Index m_colStride; 557 Index m_row_strides; 558 Index m_col_strides; 559 560 Index m_in_row_strides; 561 Index m_in_col_strides; 562 Index m_row_inflate_strides; 563 Index m_col_inflate_strides; 564 565 Index m_input_rows_eff; 566 Index m_input_cols_eff; 567 Index m_patch_rows_eff; 568 Index m_patch_cols_eff; 569 570 internal::TensorIntDivisor<Index> m_fastOtherStride; 571 internal::TensorIntDivisor<Index> m_fastPatchStride; 572 internal::TensorIntDivisor<Index> m_fastColStride; 573 internal::TensorIntDivisor<Index> m_fastInflateRowStride; 574 internal::TensorIntDivisor<Index> m_fastInflateColStride; 575 internal::TensorIntDivisor<Index> m_fastInputColsEff; 576 577 Index m_rowInputStride; 578 Index m_colInputStride; 579 Index m_patchInputStride; 580 581 Index m_inputDepth; 582 Index m_inputRows; 583 Index m_inputCols; 584 585 Index m_outputRows; 586 Index m_outputCols; 587 588 Index m_rowPaddingTop; 589 Index m_colPaddingLeft; 590 591 internal::TensorIntDivisor<Index> m_fastOutputRows; 592 internal::TensorIntDivisor<Index> m_fastOutputDepth; 593 594 Scalar m_paddingValue; 595 596 const Device EIGEN_DEVICE_REF m_device; 597 TensorEvaluator<ArgType, Device> m_impl; 598 }; 599 600 601 } // end namespace Eigen 602 603 #endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H 604