1from typing import List, Optional 2 3import torch.nn.functional as F 4from torch import Tensor 5from torch.nn.common_types import ( 6 _ratio_2_t, 7 _ratio_3_t, 8 _size_1_t, 9 _size_2_opt_t, 10 _size_2_t, 11 _size_3_opt_t, 12 _size_3_t, 13 _size_any_opt_t, 14 _size_any_t, 15) 16 17from .module import Module 18from .utils import _pair, _single, _triple 19 20 21__all__ = [ 22 "MaxPool1d", 23 "MaxPool2d", 24 "MaxPool3d", 25 "MaxUnpool1d", 26 "MaxUnpool2d", 27 "MaxUnpool3d", 28 "AvgPool1d", 29 "AvgPool2d", 30 "AvgPool3d", 31 "FractionalMaxPool2d", 32 "FractionalMaxPool3d", 33 "LPPool1d", 34 "LPPool2d", 35 "LPPool3d", 36 "AdaptiveMaxPool1d", 37 "AdaptiveMaxPool2d", 38 "AdaptiveMaxPool3d", 39 "AdaptiveAvgPool1d", 40 "AdaptiveAvgPool2d", 41 "AdaptiveAvgPool3d", 42] 43 44 45class _MaxPoolNd(Module): 46 __constants__ = [ 47 "kernel_size", 48 "stride", 49 "padding", 50 "dilation", 51 "return_indices", 52 "ceil_mode", 53 ] 54 return_indices: bool 55 ceil_mode: bool 56 57 def __init__( 58 self, 59 kernel_size: _size_any_t, 60 stride: Optional[_size_any_t] = None, 61 padding: _size_any_t = 0, 62 dilation: _size_any_t = 1, 63 return_indices: bool = False, 64 ceil_mode: bool = False, 65 ) -> None: 66 super().__init__() 67 self.kernel_size = kernel_size 68 self.stride = stride if (stride is not None) else kernel_size 69 self.padding = padding 70 self.dilation = dilation 71 self.return_indices = return_indices 72 self.ceil_mode = ceil_mode 73 74 def extra_repr(self) -> str: 75 return ( 76 "kernel_size={kernel_size}, stride={stride}, padding={padding}" 77 ", dilation={dilation}, ceil_mode={ceil_mode}".format(**self.__dict__) 78 ) 79 80 81class MaxPool1d(_MaxPoolNd): 82 r"""Applies a 1D max pooling over an input signal composed of several input planes. 83 84 In the simplest case, the output value of the layer with input size :math:`(N, C, L)` 85 and output :math:`(N, C, L_{out})` can be precisely described as: 86 87 .. math:: 88 out(N_i, C_j, k) = \max_{m=0, \ldots, \text{kernel\_size} - 1} 89 input(N_i, C_j, stride \times k + m) 90 91 If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides 92 for :attr:`padding` number of points. :attr:`dilation` is the stride between the elements within the 93 sliding window. This `link`_ has a nice visualization of the pooling parameters. 94 95 Note: 96 When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding 97 or the input. Sliding windows that would start in the right padded region are ignored. 98 99 Args: 100 kernel_size: The size of the sliding window, must be > 0. 101 stride: The stride of the sliding window, must be > 0. Default value is :attr:`kernel_size`. 102 padding: Implicit negative infinity padding to be added on both sides, must be >= 0 and <= kernel_size / 2. 103 dilation: The stride between elements within a sliding window, must be > 0. 104 return_indices: If ``True``, will return the argmax along with the max values. 105 Useful for :class:`torch.nn.MaxUnpool1d` later 106 ceil_mode: If ``True``, will use `ceil` instead of `floor` to compute the output shape. This 107 ensures that every element in the input tensor is covered by a sliding window. 108 109 Shape: 110 - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`. 111 - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where 112 113 .. math:: 114 L_{out} = \left\lfloor \frac{L_{in} + 2 \times \text{padding} - \text{dilation} 115 \times (\text{kernel\_size} - 1) - 1}{\text{stride}} + 1\right\rfloor 116 117 Examples:: 118 119 >>> # pool of size=3, stride=2 120 >>> m = nn.MaxPool1d(3, stride=2) 121 >>> input = torch.randn(20, 16, 50) 122 >>> output = m(input) 123 124 .. _link: 125 https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 126 """ 127 128 kernel_size: _size_1_t 129 stride: _size_1_t 130 padding: _size_1_t 131 dilation: _size_1_t 132 133 def forward(self, input: Tensor): 134 return F.max_pool1d( 135 input, 136 self.kernel_size, 137 self.stride, 138 self.padding, 139 self.dilation, 140 ceil_mode=self.ceil_mode, 141 return_indices=self.return_indices, 142 ) 143 144 145class MaxPool2d(_MaxPoolNd): 146 r"""Applies a 2D max pooling over an input signal composed of several input planes. 147 148 In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`, 149 output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)` 150 can be precisely described as: 151 152 .. math:: 153 \begin{aligned} 154 out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\ 155 & \text{input}(N_i, C_j, \text{stride[0]} \times h + m, 156 \text{stride[1]} \times w + n) 157 \end{aligned} 158 159 If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides 160 for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points. 161 It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does. 162 163 Note: 164 When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding 165 or the input. Sliding windows that would start in the right padded region are ignored. 166 167 The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be: 168 169 - a single ``int`` -- in which case the same value is used for the height and width dimension 170 - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, 171 and the second `int` for the width dimension 172 173 Args: 174 kernel_size: the size of the window to take a max over 175 stride: the stride of the window. Default value is :attr:`kernel_size` 176 padding: Implicit negative infinity padding to be added on both sides 177 dilation: a parameter that controls the stride of elements in the window 178 return_indices: if ``True``, will return the max indices along with the outputs. 179 Useful for :class:`torch.nn.MaxUnpool2d` later 180 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 181 182 Shape: 183 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})` 184 - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where 185 186 .. math:: 187 H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]} 188 \times (\text{kernel\_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor 189 190 .. math:: 191 W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]} 192 \times (\text{kernel\_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor 193 194 Examples:: 195 196 >>> # pool of square window of size=3, stride=2 197 >>> m = nn.MaxPool2d(3, stride=2) 198 >>> # pool of non-square window 199 >>> m = nn.MaxPool2d((3, 2), stride=(2, 1)) 200 >>> input = torch.randn(20, 16, 50, 32) 201 >>> output = m(input) 202 203 .. _link: 204 https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 205 """ 206 207 kernel_size: _size_2_t 208 stride: _size_2_t 209 padding: _size_2_t 210 dilation: _size_2_t 211 212 def forward(self, input: Tensor): 213 return F.max_pool2d( 214 input, 215 self.kernel_size, 216 self.stride, 217 self.padding, 218 self.dilation, 219 ceil_mode=self.ceil_mode, 220 return_indices=self.return_indices, 221 ) 222 223 224class MaxPool3d(_MaxPoolNd): 225 r"""Applies a 3D max pooling over an input signal composed of several input planes. 226 227 In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`, 228 output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)` 229 can be precisely described as: 230 231 .. math:: 232 \begin{aligned} 233 \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, kD-1} \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\ 234 & \text{input}(N_i, C_j, \text{stride[0]} \times d + k, 235 \text{stride[1]} \times h + m, \text{stride[2]} \times w + n) 236 \end{aligned} 237 238 If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides 239 for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points. 240 It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does. 241 242 Note: 243 When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding 244 or the input. Sliding windows that would start in the right padded region are ignored. 245 246 The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be: 247 248 - a single ``int`` -- in which case the same value is used for the depth, height and width dimension 249 - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension, 250 the second `int` for the height dimension and the third `int` for the width dimension 251 252 Args: 253 kernel_size: the size of the window to take a max over 254 stride: the stride of the window. Default value is :attr:`kernel_size` 255 padding: Implicit negative infinity padding to be added on all three sides 256 dilation: a parameter that controls the stride of elements in the window 257 return_indices: if ``True``, will return the max indices along with the outputs. 258 Useful for :class:`torch.nn.MaxUnpool3d` later 259 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 260 261 Shape: 262 - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`. 263 - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`, where 264 265 .. math:: 266 D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times 267 (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor 268 269 .. math:: 270 H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times 271 (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor 272 273 .. math:: 274 W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times 275 (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor 276 277 Examples:: 278 279 >>> # pool of square window of size=3, stride=2 280 >>> m = nn.MaxPool3d(3, stride=2) 281 >>> # pool of non-square window 282 >>> m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2)) 283 >>> input = torch.randn(20, 16, 50, 44, 31) 284 >>> output = m(input) 285 286 .. _link: 287 https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md 288 """ # noqa: E501 289 290 kernel_size: _size_3_t 291 stride: _size_3_t 292 padding: _size_3_t 293 dilation: _size_3_t 294 295 def forward(self, input: Tensor): 296 return F.max_pool3d( 297 input, 298 self.kernel_size, 299 self.stride, 300 self.padding, 301 self.dilation, 302 ceil_mode=self.ceil_mode, 303 return_indices=self.return_indices, 304 ) 305 306 307class _MaxUnpoolNd(Module): 308 def extra_repr(self) -> str: 309 return f"kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding}" 310 311 312class MaxUnpool1d(_MaxUnpoolNd): 313 r"""Computes a partial inverse of :class:`MaxPool1d`. 314 315 :class:`MaxPool1d` is not fully invertible, since the non-maximal values are lost. 316 317 :class:`MaxUnpool1d` takes in as input the output of :class:`MaxPool1d` 318 including the indices of the maximal values and computes a partial inverse 319 in which all non-maximal values are set to zero. 320 321 Note: 322 This operation may behave nondeterministically when the input indices has repeat values. 323 See https://github.com/pytorch/pytorch/issues/80827 and :doc:`/notes/randomness` for more information. 324 325 .. note:: :class:`MaxPool1d` can map several input sizes to the same output 326 sizes. Hence, the inversion process can get ambiguous. 327 To accommodate this, you can provide the needed output size 328 as an additional argument :attr:`output_size` in the forward call. 329 See the Inputs and Example below. 330 331 Args: 332 kernel_size (int or tuple): Size of the max pooling window. 333 stride (int or tuple): Stride of the max pooling window. 334 It is set to :attr:`kernel_size` by default. 335 padding (int or tuple): Padding that was added to the input 336 337 Inputs: 338 - `input`: the input Tensor to invert 339 - `indices`: the indices given out by :class:`~torch.nn.MaxPool1d` 340 - `output_size` (optional): the targeted output size 341 342 Shape: 343 - Input: :math:`(N, C, H_{in})` or :math:`(C, H_{in})`. 344 - Output: :math:`(N, C, H_{out})` or :math:`(C, H_{out})`, where 345 346 .. math:: 347 H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{kernel\_size}[0] 348 349 or as given by :attr:`output_size` in the call operator 350 351 Example:: 352 353 >>> # xdoctest: +IGNORE_WANT("do other tests modify the global state?") 354 >>> pool = nn.MaxPool1d(2, stride=2, return_indices=True) 355 >>> unpool = nn.MaxUnpool1d(2, stride=2) 356 >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8]]]) 357 >>> output, indices = pool(input) 358 >>> unpool(output, indices) 359 tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]]) 360 361 >>> # Example showcasing the use of output_size 362 >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8, 9]]]) 363 >>> output, indices = pool(input) 364 >>> unpool(output, indices, output_size=input.size()) 365 tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8., 0.]]]) 366 367 >>> unpool(output, indices) 368 tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]]) 369 """ 370 371 kernel_size: _size_1_t 372 stride: _size_1_t 373 padding: _size_1_t 374 375 def __init__( 376 self, 377 kernel_size: _size_1_t, 378 stride: Optional[_size_1_t] = None, 379 padding: _size_1_t = 0, 380 ) -> None: 381 super().__init__() 382 self.kernel_size = _single(kernel_size) 383 self.stride = _single(stride if (stride is not None) else kernel_size) 384 self.padding = _single(padding) 385 386 def forward( 387 self, input: Tensor, indices: Tensor, output_size: Optional[List[int]] = None 388 ) -> Tensor: 389 return F.max_unpool1d( 390 input, indices, self.kernel_size, self.stride, self.padding, output_size 391 ) 392 393 394class MaxUnpool2d(_MaxUnpoolNd): 395 r"""Computes a partial inverse of :class:`MaxPool2d`. 396 397 :class:`MaxPool2d` is not fully invertible, since the non-maximal values are lost. 398 399 :class:`MaxUnpool2d` takes in as input the output of :class:`MaxPool2d` 400 including the indices of the maximal values and computes a partial inverse 401 in which all non-maximal values are set to zero. 402 403 Note: 404 This operation may behave nondeterministically when the input indices has repeat values. 405 See https://github.com/pytorch/pytorch/issues/80827 and :doc:`/notes/randomness` for more information. 406 407 .. note:: :class:`MaxPool2d` can map several input sizes to the same output 408 sizes. Hence, the inversion process can get ambiguous. 409 To accommodate this, you can provide the needed output size 410 as an additional argument :attr:`output_size` in the forward call. 411 See the Inputs and Example below. 412 413 Args: 414 kernel_size (int or tuple): Size of the max pooling window. 415 stride (int or tuple): Stride of the max pooling window. 416 It is set to :attr:`kernel_size` by default. 417 padding (int or tuple): Padding that was added to the input 418 419 Inputs: 420 - `input`: the input Tensor to invert 421 - `indices`: the indices given out by :class:`~torch.nn.MaxPool2d` 422 - `output_size` (optional): the targeted output size 423 424 Shape: 425 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. 426 - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where 427 428 .. math:: 429 H_{out} = (H_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]} 430 431 .. math:: 432 W_{out} = (W_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]} 433 434 or as given by :attr:`output_size` in the call operator 435 436 Example:: 437 438 >>> pool = nn.MaxPool2d(2, stride=2, return_indices=True) 439 >>> unpool = nn.MaxUnpool2d(2, stride=2) 440 >>> input = torch.tensor([[[[ 1., 2., 3., 4.], 441 [ 5., 6., 7., 8.], 442 [ 9., 10., 11., 12.], 443 [13., 14., 15., 16.]]]]) 444 >>> output, indices = pool(input) 445 >>> unpool(output, indices) 446 tensor([[[[ 0., 0., 0., 0.], 447 [ 0., 6., 0., 8.], 448 [ 0., 0., 0., 0.], 449 [ 0., 14., 0., 16.]]]]) 450 >>> # Now using output_size to resolve an ambiguous size for the inverse 451 >>> input = torch.tensor([[[[ 1., 2., 3., 4., 5.], 452 [ 6., 7., 8., 9., 10.], 453 [11., 12., 13., 14., 15.], 454 [16., 17., 18., 19., 20.]]]]) 455 >>> output, indices = pool(input) 456 >>> # This call will not work without specifying output_size 457 >>> unpool(output, indices, output_size=input.size()) 458 tensor([[[[ 0., 0., 0., 0., 0.], 459 [ 0., 7., 0., 9., 0.], 460 [ 0., 0., 0., 0., 0.], 461 [ 0., 17., 0., 19., 0.]]]]) 462 463 464 """ 465 466 kernel_size: _size_2_t 467 stride: _size_2_t 468 padding: _size_2_t 469 470 def __init__( 471 self, 472 kernel_size: _size_2_t, 473 stride: Optional[_size_2_t] = None, 474 padding: _size_2_t = 0, 475 ) -> None: 476 super().__init__() 477 self.kernel_size = _pair(kernel_size) 478 self.stride = _pair(stride if (stride is not None) else kernel_size) 479 self.padding = _pair(padding) 480 481 def forward( 482 self, input: Tensor, indices: Tensor, output_size: Optional[List[int]] = None 483 ) -> Tensor: 484 return F.max_unpool2d( 485 input, indices, self.kernel_size, self.stride, self.padding, output_size 486 ) 487 488 489class MaxUnpool3d(_MaxUnpoolNd): 490 r"""Computes a partial inverse of :class:`MaxPool3d`. 491 492 :class:`MaxPool3d` is not fully invertible, since the non-maximal values are lost. 493 :class:`MaxUnpool3d` takes in as input the output of :class:`MaxPool3d` 494 including the indices of the maximal values and computes a partial inverse 495 in which all non-maximal values are set to zero. 496 497 Note: 498 This operation may behave nondeterministically when the input indices has repeat values. 499 See https://github.com/pytorch/pytorch/issues/80827 and :doc:`/notes/randomness` for more information. 500 501 .. note:: :class:`MaxPool3d` can map several input sizes to the same output 502 sizes. Hence, the inversion process can get ambiguous. 503 To accommodate this, you can provide the needed output size 504 as an additional argument :attr:`output_size` in the forward call. 505 See the Inputs section below. 506 507 Args: 508 kernel_size (int or tuple): Size of the max pooling window. 509 stride (int or tuple): Stride of the max pooling window. 510 It is set to :attr:`kernel_size` by default. 511 padding (int or tuple): Padding that was added to the input 512 513 Inputs: 514 - `input`: the input Tensor to invert 515 - `indices`: the indices given out by :class:`~torch.nn.MaxPool3d` 516 - `output_size` (optional): the targeted output size 517 518 Shape: 519 - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`. 520 - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`, where 521 522 .. math:: 523 D_{out} = (D_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]} 524 525 .. math:: 526 H_{out} = (H_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]} 527 528 .. math:: 529 W_{out} = (W_{in} - 1) \times \text{stride[2]} - 2 \times \text{padding[2]} + \text{kernel\_size[2]} 530 531 or as given by :attr:`output_size` in the call operator 532 533 Example:: 534 535 >>> # pool of square window of size=3, stride=2 536 >>> pool = nn.MaxPool3d(3, stride=2, return_indices=True) 537 >>> unpool = nn.MaxUnpool3d(3, stride=2) 538 >>> output, indices = pool(torch.randn(20, 16, 51, 33, 15)) 539 >>> unpooled_output = unpool(output, indices) 540 >>> unpooled_output.size() 541 torch.Size([20, 16, 51, 33, 15]) 542 """ 543 544 kernel_size: _size_3_t 545 stride: _size_3_t 546 padding: _size_3_t 547 548 def __init__( 549 self, 550 kernel_size: _size_3_t, 551 stride: Optional[_size_3_t] = None, 552 padding: _size_3_t = 0, 553 ) -> None: 554 super().__init__() 555 self.kernel_size = _triple(kernel_size) 556 self.stride = _triple(stride if (stride is not None) else kernel_size) 557 self.padding = _triple(padding) 558 559 def forward( 560 self, input: Tensor, indices: Tensor, output_size: Optional[List[int]] = None 561 ) -> Tensor: 562 return F.max_unpool3d( 563 input, indices, self.kernel_size, self.stride, self.padding, output_size 564 ) 565 566 567class _AvgPoolNd(Module): 568 __constants__ = [ 569 "kernel_size", 570 "stride", 571 "padding", 572 "ceil_mode", 573 "count_include_pad", 574 ] 575 576 def extra_repr(self) -> str: 577 return f"kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding}" 578 579 580class AvgPool1d(_AvgPoolNd): 581 r"""Applies a 1D average pooling over an input signal composed of several input planes. 582 583 In the simplest case, the output value of the layer with input size :math:`(N, C, L)`, 584 output :math:`(N, C, L_{out})` and :attr:`kernel_size` :math:`k` 585 can be precisely described as: 586 587 .. math:: 588 589 \text{out}(N_i, C_j, l) = \frac{1}{k} \sum_{m=0}^{k-1} 590 \text{input}(N_i, C_j, \text{stride} \times l + m) 591 592 If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides 593 for :attr:`padding` number of points. 594 595 Note: 596 When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding 597 or the input. Sliding windows that would start in the right padded region are ignored. 598 599 The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can each be 600 an ``int`` or a one-element tuple. 601 602 Args: 603 kernel_size: the size of the window 604 stride: the stride of the window. Default value is :attr:`kernel_size` 605 padding: implicit zero padding to be added on both sides 606 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 607 count_include_pad: when True, will include the zero-padding in the averaging calculation 608 609 Shape: 610 - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`. 611 - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where 612 613 .. math:: 614 L_{out} = \left\lfloor \frac{L_{in} + 615 2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor 616 617 Per the note above, if ``ceil_mode`` is True and :math:`(L_{out} - 1) \times \text{stride} \geq L_{in} 618 + \text{padding}`, we skip the last window as it would start in the right padded region, resulting in 619 :math:`L_{out}` being reduced by one. 620 621 Examples:: 622 623 >>> # pool with window of size=3, stride=2 624 >>> m = nn.AvgPool1d(3, stride=2) 625 >>> m(torch.tensor([[[1., 2, 3, 4, 5, 6, 7]]])) 626 tensor([[[2., 4., 6.]]]) 627 """ 628 629 kernel_size: _size_1_t 630 stride: _size_1_t 631 padding: _size_1_t 632 ceil_mode: bool 633 count_include_pad: bool 634 635 def __init__( 636 self, 637 kernel_size: _size_1_t, 638 stride: _size_1_t = None, 639 padding: _size_1_t = 0, 640 ceil_mode: bool = False, 641 count_include_pad: bool = True, 642 ) -> None: 643 super().__init__() 644 self.kernel_size = _single(kernel_size) 645 self.stride = _single(stride if stride is not None else kernel_size) 646 self.padding = _single(padding) 647 self.ceil_mode = ceil_mode 648 self.count_include_pad = count_include_pad 649 650 def forward(self, input: Tensor) -> Tensor: 651 return F.avg_pool1d( 652 input, 653 self.kernel_size, 654 self.stride, 655 self.padding, 656 self.ceil_mode, 657 self.count_include_pad, 658 ) 659 660 661class AvgPool2d(_AvgPoolNd): 662 r"""Applies a 2D average pooling over an input signal composed of several input planes. 663 664 In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`, 665 output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)` 666 can be precisely described as: 667 668 .. math:: 669 670 out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} 671 input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) 672 673 If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides 674 for :attr:`padding` number of points. 675 676 Note: 677 When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding 678 or the input. Sliding windows that would start in the right padded region are ignored. 679 680 The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be: 681 682 - a single ``int`` -- in which case the same value is used for the height and width dimension 683 - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, 684 and the second `int` for the width dimension 685 686 Args: 687 kernel_size: the size of the window 688 stride: the stride of the window. Default value is :attr:`kernel_size` 689 padding: implicit zero padding to be added on both sides 690 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 691 count_include_pad: when True, will include the zero-padding in the averaging calculation 692 divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. 693 694 695 Shape: 696 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. 697 - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where 698 699 .. math:: 700 H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - 701 \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor 702 703 .. math:: 704 W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - 705 \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor 706 707 Per the note above, if ``ceil_mode`` is True and :math:`(H_{out} - 1)\times \text{stride}[0]\geq H_{in} 708 + \text{padding}[0]`, we skip the last window as it would start in the bottom padded region, 709 resulting in :math:`H_{out}` being reduced by one. 710 711 The same applies for :math:`W_{out}`. 712 713 Examples:: 714 715 >>> # pool of square window of size=3, stride=2 716 >>> m = nn.AvgPool2d(3, stride=2) 717 >>> # pool of non-square window 718 >>> m = nn.AvgPool2d((3, 2), stride=(2, 1)) 719 >>> input = torch.randn(20, 16, 50, 32) 720 >>> output = m(input) 721 """ 722 723 __constants__ = [ 724 "kernel_size", 725 "stride", 726 "padding", 727 "ceil_mode", 728 "count_include_pad", 729 "divisor_override", 730 ] 731 732 kernel_size: _size_2_t 733 stride: _size_2_t 734 padding: _size_2_t 735 ceil_mode: bool 736 count_include_pad: bool 737 738 def __init__( 739 self, 740 kernel_size: _size_2_t, 741 stride: Optional[_size_2_t] = None, 742 padding: _size_2_t = 0, 743 ceil_mode: bool = False, 744 count_include_pad: bool = True, 745 divisor_override: Optional[int] = None, 746 ) -> None: 747 super().__init__() 748 self.kernel_size = kernel_size 749 self.stride = stride if (stride is not None) else kernel_size 750 self.padding = padding 751 self.ceil_mode = ceil_mode 752 self.count_include_pad = count_include_pad 753 self.divisor_override = divisor_override 754 755 def forward(self, input: Tensor) -> Tensor: 756 return F.avg_pool2d( 757 input, 758 self.kernel_size, 759 self.stride, 760 self.padding, 761 self.ceil_mode, 762 self.count_include_pad, 763 self.divisor_override, 764 ) 765 766 767class AvgPool3d(_AvgPoolNd): 768 r"""Applies a 3D average pooling over an input signal composed of several input planes. 769 770 In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`, 771 output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)` 772 can be precisely described as: 773 774 .. math:: 775 \begin{aligned} 776 \text{out}(N_i, C_j, d, h, w) ={} & \sum_{k=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} \\ 777 & \frac{\text{input}(N_i, C_j, \text{stride}[0] \times d + k, 778 \text{stride}[1] \times h + m, \text{stride}[2] \times w + n)} 779 {kD \times kH \times kW} 780 \end{aligned} 781 782 If :attr:`padding` is non-zero, then the input is implicitly zero-padded on all three sides 783 for :attr:`padding` number of points. 784 785 Note: 786 When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding 787 or the input. Sliding windows that would start in the right padded region are ignored. 788 789 The parameters :attr:`kernel_size`, :attr:`stride` can either be: 790 791 - a single ``int`` -- in which case the same value is used for the depth, height and width dimension 792 - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension, 793 the second `int` for the height dimension and the third `int` for the width dimension 794 795 Args: 796 kernel_size: the size of the window 797 stride: the stride of the window. Default value is :attr:`kernel_size` 798 padding: implicit zero padding to be added on all three sides 799 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 800 count_include_pad: when True, will include the zero-padding in the averaging calculation 801 divisor_override: if specified, it will be used as divisor, otherwise :attr:`kernel_size` will be used 802 803 Shape: 804 - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`. 805 - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or 806 :math:`(C, D_{out}, H_{out}, W_{out})`, where 807 808 .. math:: 809 D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - 810 \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor 811 812 .. math:: 813 H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - 814 \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor 815 816 .. math:: 817 W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - 818 \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor 819 820 Per the note above, if ``ceil_mode`` is True and :math:`(D_{out} - 1)\times \text{stride}[0]\geq D_{in} 821 + \text{padding}[0]`, we skip the last window as it would start in the padded region, 822 resulting in :math:`D_{out}` being reduced by one. 823 824 The same applies for :math:`W_{out}` and :math:`H_{out}`. 825 826 Examples:: 827 828 >>> # pool of square window of size=3, stride=2 829 >>> m = nn.AvgPool3d(3, stride=2) 830 >>> # pool of non-square window 831 >>> m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2)) 832 >>> input = torch.randn(20, 16, 50, 44, 31) 833 >>> output = m(input) 834 """ 835 836 __constants__ = [ 837 "kernel_size", 838 "stride", 839 "padding", 840 "ceil_mode", 841 "count_include_pad", 842 "divisor_override", 843 ] 844 845 kernel_size: _size_3_t 846 stride: _size_3_t 847 padding: _size_3_t 848 ceil_mode: bool 849 count_include_pad: bool 850 851 def __init__( 852 self, 853 kernel_size: _size_3_t, 854 stride: Optional[_size_3_t] = None, 855 padding: _size_3_t = 0, 856 ceil_mode: bool = False, 857 count_include_pad: bool = True, 858 divisor_override: Optional[int] = None, 859 ) -> None: 860 super().__init__() 861 self.kernel_size = kernel_size 862 self.stride = stride if (stride is not None) else kernel_size 863 self.padding = padding 864 self.ceil_mode = ceil_mode 865 self.count_include_pad = count_include_pad 866 self.divisor_override = divisor_override 867 868 def forward(self, input: Tensor) -> Tensor: 869 return F.avg_pool3d( 870 input, 871 self.kernel_size, 872 self.stride, 873 self.padding, 874 self.ceil_mode, 875 self.count_include_pad, 876 self.divisor_override, 877 ) 878 879 def __setstate__(self, d): 880 super().__setstate__(d) 881 self.__dict__.setdefault("padding", 0) 882 self.__dict__.setdefault("ceil_mode", False) 883 self.__dict__.setdefault("count_include_pad", True) 884 885 886class FractionalMaxPool2d(Module): 887 r"""Applies a 2D fractional max pooling over an input signal composed of several input planes. 888 889 Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham 890 891 The max-pooling operation is applied in :math:`kH \times kW` regions by a stochastic 892 step size determined by the target output size. 893 The number of output features is equal to the number of input planes. 894 895 .. note:: Exactly one of ``output_size`` or ``output_ratio`` must be defined. 896 897 Args: 898 kernel_size: the size of the window to take a max over. 899 Can be a single number k (for a square kernel of k x k) or a tuple `(kh, kw)` 900 output_size: the target output size of the image of the form `oH x oW`. 901 Can be a tuple `(oH, oW)` or a single number oH for a square image `oH x oH`. 902 Note that we must have :math:`kH + oH - 1 <= H_{in}` and :math:`kW + oW - 1 <= W_{in}` 903 output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given. 904 This has to be a number or tuple in the range (0, 1). 905 Note that we must have :math:`kH + (output\_ratio\_H * H_{in}) - 1 <= H_{in}` 906 and :math:`kW + (output\_ratio\_W * W_{in}) - 1 <= W_{in}` 907 return_indices: if ``True``, will return the indices along with the outputs. 908 Useful to pass to :meth:`nn.MaxUnpool2d`. Default: ``False`` 909 910 Shape: 911 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. 912 - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where 913 :math:`(H_{out}, W_{out})=\text{output\_size}` or 914 :math:`(H_{out}, W_{out})=\text{output\_ratio} \times (H_{in}, W_{in})`. 915 916 Examples: 917 >>> # pool of square window of size=3, and target output size 13x12 918 >>> m = nn.FractionalMaxPool2d(3, output_size=(13, 12)) 919 >>> # pool of square window and target output size being half of input image size 920 >>> m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5)) 921 >>> input = torch.randn(20, 16, 50, 32) 922 >>> output = m(input) 923 924 .. _Fractional MaxPooling: 925 https://arxiv.org/abs/1412.6071 926 """ 927 928 __constants__ = ["kernel_size", "return_indices", "output_size", "output_ratio"] 929 930 kernel_size: _size_2_t 931 return_indices: bool 932 output_size: _size_2_t 933 output_ratio: _ratio_2_t 934 935 def __init__( 936 self, 937 kernel_size: _size_2_t, 938 output_size: Optional[_size_2_t] = None, 939 output_ratio: Optional[_ratio_2_t] = None, 940 return_indices: bool = False, 941 _random_samples=None, 942 ) -> None: 943 super().__init__() 944 self.kernel_size = _pair(kernel_size) 945 self.return_indices = return_indices 946 self.register_buffer("_random_samples", _random_samples) 947 self.output_size = _pair(output_size) if output_size is not None else None 948 self.output_ratio = _pair(output_ratio) if output_ratio is not None else None 949 if output_size is None and output_ratio is None: 950 raise ValueError( 951 "FractionalMaxPool2d requires specifying either " 952 "an output size, or a pooling ratio" 953 ) 954 if output_size is not None and output_ratio is not None: 955 raise ValueError( 956 "only one of output_size and output_ratio may be specified" 957 ) 958 if self.output_ratio is not None: 959 if not (0 < self.output_ratio[0] < 1 and 0 < self.output_ratio[1] < 1): 960 raise ValueError( 961 f"output_ratio must be between 0 and 1 (got {output_ratio})" 962 ) 963 964 def forward(self, input: Tensor): 965 return F.fractional_max_pool2d( 966 input, 967 self.kernel_size, 968 self.output_size, 969 self.output_ratio, 970 self.return_indices, 971 _random_samples=self._random_samples, 972 ) 973 974 975class FractionalMaxPool3d(Module): 976 r"""Applies a 3D fractional max pooling over an input signal composed of several input planes. 977 978 Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham 979 980 The max-pooling operation is applied in :math:`kT \times kH \times kW` regions by a stochastic 981 step size determined by the target output size. 982 The number of output features is equal to the number of input planes. 983 984 .. note:: Exactly one of ``output_size`` or ``output_ratio`` must be defined. 985 986 Args: 987 kernel_size: the size of the window to take a max over. 988 Can be a single number k (for a square kernel of k x k x k) or a tuple `(kt x kh x kw)` 989 output_size: the target output size of the image of the form `oT x oH x oW`. 990 Can be a tuple `(oT, oH, oW)` or a single number oH for a square image `oH x oH x oH` 991 output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given. 992 This has to be a number or tuple in the range (0, 1) 993 return_indices: if ``True``, will return the indices along with the outputs. 994 Useful to pass to :meth:`nn.MaxUnpool3d`. Default: ``False`` 995 996 Shape: 997 - Input: :math:`(N, C, T_{in}, H_{in}, W_{in})` or :math:`(C, T_{in}, H_{in}, W_{in})`. 998 - Output: :math:`(N, C, T_{out}, H_{out}, W_{out})` or :math:`(C, T_{out}, H_{out}, W_{out})`, where 999 :math:`(T_{out}, H_{out}, W_{out})=\text{output\_size}` or 1000 :math:`(T_{out}, H_{out}, W_{out})=\text{output\_ratio} \times (T_{in}, H_{in}, W_{in})` 1001 1002 Examples: 1003 >>> # pool of cubic window of size=3, and target output size 13x12x11 1004 >>> m = nn.FractionalMaxPool3d(3, output_size=(13, 12, 11)) 1005 >>> # pool of cubic window and target output size being half of input size 1006 >>> m = nn.FractionalMaxPool3d(3, output_ratio=(0.5, 0.5, 0.5)) 1007 >>> input = torch.randn(20, 16, 50, 32, 16) 1008 >>> output = m(input) 1009 1010 .. _Fractional MaxPooling: 1011 https://arxiv.org/abs/1412.6071 1012 """ 1013 1014 __constants__ = ["kernel_size", "return_indices", "output_size", "output_ratio"] 1015 kernel_size: _size_3_t 1016 return_indices: bool 1017 output_size: _size_3_t 1018 output_ratio: _ratio_3_t 1019 1020 def __init__( 1021 self, 1022 kernel_size: _size_3_t, 1023 output_size: Optional[_size_3_t] = None, 1024 output_ratio: Optional[_ratio_3_t] = None, 1025 return_indices: bool = False, 1026 _random_samples=None, 1027 ) -> None: 1028 super().__init__() 1029 self.kernel_size = _triple(kernel_size) 1030 self.return_indices = return_indices 1031 self.register_buffer("_random_samples", _random_samples) 1032 self.output_size = _triple(output_size) if output_size is not None else None 1033 self.output_ratio = _triple(output_ratio) if output_ratio is not None else None 1034 if output_size is None and output_ratio is None: 1035 raise ValueError( 1036 "FractionalMaxPool3d requires specifying either " 1037 "an output size, or a pooling ratio" 1038 ) 1039 if output_size is not None and output_ratio is not None: 1040 raise ValueError( 1041 "only one of output_size and output_ratio may be specified" 1042 ) 1043 if self.output_ratio is not None: 1044 if not ( 1045 0 < self.output_ratio[0] < 1 1046 and 0 < self.output_ratio[1] < 1 1047 and 0 < self.output_ratio[2] < 1 1048 ): 1049 raise ValueError( 1050 f"output_ratio must be between 0 and 1 (got {output_ratio})" 1051 ) 1052 1053 def forward(self, input: Tensor): 1054 return F.fractional_max_pool3d( 1055 input, 1056 self.kernel_size, 1057 self.output_size, 1058 self.output_ratio, 1059 self.return_indices, 1060 _random_samples=self._random_samples, 1061 ) 1062 1063 1064class _LPPoolNd(Module): 1065 __constants__ = ["norm_type", "kernel_size", "stride", "ceil_mode"] 1066 1067 norm_type: float 1068 ceil_mode: bool 1069 1070 def __init__( 1071 self, 1072 norm_type: float, 1073 kernel_size: _size_any_t, 1074 stride: Optional[_size_any_t] = None, 1075 ceil_mode: bool = False, 1076 ) -> None: 1077 super().__init__() 1078 self.norm_type = norm_type 1079 self.kernel_size = kernel_size 1080 self.stride = stride 1081 self.ceil_mode = ceil_mode 1082 1083 def extra_repr(self) -> str: 1084 return ( 1085 "norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, " 1086 "ceil_mode={ceil_mode}".format(**self.__dict__) 1087 ) 1088 1089 1090class LPPool1d(_LPPoolNd): 1091 r"""Applies a 1D power-average pooling over an input signal composed of several input planes. 1092 1093 On each window, the function computed is: 1094 1095 .. math:: 1096 f(X) = \sqrt[p]{\sum_{x \in X} x^{p}} 1097 1098 - At p = :math:`\infty`, one gets Max Pooling 1099 - At p = 1, one gets Sum Pooling (which is proportional to Average Pooling) 1100 1101 .. note:: If the sum to the power of `p` is zero, the gradient of this function is 1102 not defined. This implementation will set the gradient to zero in this case. 1103 1104 Args: 1105 kernel_size: a single int, the size of the window 1106 stride: a single int, the stride of the window. Default value is :attr:`kernel_size` 1107 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 1108 1109 Shape: 1110 - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`. 1111 - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where 1112 1113 .. math:: 1114 L_{out} = \left\lfloor\frac{L_{in} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor 1115 1116 Examples:: 1117 >>> # power-2 pool of window of length 3, with stride 2. 1118 >>> m = nn.LPPool1d(2, 3, stride=2) 1119 >>> input = torch.randn(20, 16, 50) 1120 >>> output = m(input) 1121 """ 1122 1123 kernel_size: _size_1_t 1124 stride: _size_1_t 1125 1126 def forward(self, input: Tensor) -> Tensor: 1127 return F.lp_pool1d( 1128 input, float(self.norm_type), self.kernel_size, self.stride, self.ceil_mode 1129 ) 1130 1131 1132class LPPool2d(_LPPoolNd): 1133 r"""Applies a 2D power-average pooling over an input signal composed of several input planes. 1134 1135 On each window, the function computed is: 1136 1137 .. math:: 1138 f(X) = \sqrt[p]{\sum_{x \in X} x^{p}} 1139 1140 - At p = :math:`\infty`, one gets Max Pooling 1141 - At p = 1, one gets Sum Pooling (which is proportional to average pooling) 1142 1143 The parameters :attr:`kernel_size`, :attr:`stride` can either be: 1144 1145 - a single ``int`` -- in which case the same value is used for the height and width dimension 1146 - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension, 1147 and the second `int` for the width dimension 1148 1149 .. note:: If the sum to the power of `p` is zero, the gradient of this function is 1150 not defined. This implementation will set the gradient to zero in this case. 1151 1152 Args: 1153 kernel_size: the size of the window 1154 stride: the stride of the window. Default value is :attr:`kernel_size` 1155 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 1156 1157 Shape: 1158 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. 1159 - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where 1160 1161 .. math:: 1162 H_{out} = \left\lfloor\frac{H_{in} - \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor 1163 1164 .. math:: 1165 W_{out} = \left\lfloor\frac{W_{in} - \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor 1166 1167 Examples:: 1168 1169 >>> # power-2 pool of square window of size=3, stride=2 1170 >>> m = nn.LPPool2d(2, 3, stride=2) 1171 >>> # pool of non-square window of power 1.2 1172 >>> m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1)) 1173 >>> input = torch.randn(20, 16, 50, 32) 1174 >>> output = m(input) 1175 1176 """ 1177 1178 kernel_size: _size_2_t 1179 stride: _size_2_t 1180 1181 def forward(self, input: Tensor) -> Tensor: 1182 return F.lp_pool2d( 1183 input, float(self.norm_type), self.kernel_size, self.stride, self.ceil_mode 1184 ) 1185 1186 1187class LPPool3d(_LPPoolNd): 1188 r"""Applies a 3D power-average pooling over an input signal composed of several input planes. 1189 1190 On each window, the function computed is: 1191 1192 .. math:: 1193 f(X) = \sqrt[p]{\sum_{x \in X} x^{p}} 1194 1195 - At p = :math:`\infty`, one gets Max Pooling 1196 - At p = 1, one gets Sum Pooling (which is proportional to average pooling) 1197 1198 The parameters :attr:`kernel_size`, :attr:`stride` can either be: 1199 1200 - a single ``int`` -- in which case the same value is used for the height, width and depth dimension 1201 - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension, 1202 the second `int` for the height dimension and the third `int` for the width dimension 1203 1204 .. note:: If the sum to the power of `p` is zero, the gradient of this function is 1205 not defined. This implementation will set the gradient to zero in this case. 1206 1207 Args: 1208 kernel_size: the size of the window 1209 stride: the stride of the window. Default value is :attr:`kernel_size` 1210 ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape 1211 1212 Shape: 1213 - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`. 1214 - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or 1215 :math:`(C, D_{out}, H_{out}, W_{out})`, where 1216 1217 .. math:: 1218 D_{out} = \left\lfloor\frac{D_{in} - \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor 1219 1220 .. math:: 1221 H_{out} = \left\lfloor\frac{H_{in} - \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor 1222 1223 .. math:: 1224 W_{out} = \left\lfloor\frac{W_{in} - \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor 1225 1226 Examples:: 1227 1228 >>> # power-2 pool of square window of size=3, stride=2 1229 >>> m = nn.LPPool3d(2, 3, stride=2) 1230 >>> # pool of non-square window of power 1.2 1231 >>> m = nn.LPPool3d(1.2, (3, 2, 2), stride=(2, 1, 2)) 1232 >>> input = torch.randn(20, 16, 50, 44, 31) 1233 >>> output = m(input) 1234 1235 """ 1236 1237 kernel_size: _size_3_t 1238 stride: _size_3_t 1239 1240 def forward(self, input: Tensor) -> Tensor: 1241 return F.lp_pool3d( 1242 input, float(self.norm_type), self.kernel_size, self.stride, self.ceil_mode 1243 ) 1244 1245 1246class _AdaptiveMaxPoolNd(Module): 1247 __constants__ = ["output_size", "return_indices"] 1248 return_indices: bool 1249 1250 def __init__( 1251 self, output_size: _size_any_opt_t, return_indices: bool = False 1252 ) -> None: 1253 super().__init__() 1254 self.output_size = output_size 1255 self.return_indices = return_indices 1256 1257 def extra_repr(self) -> str: 1258 return f"output_size={self.output_size}" 1259 1260 1261# FIXME (by @ssnl): Improve adaptive pooling docs: specify what the input and 1262# output shapes are, and how the operation computes output. 1263 1264 1265class AdaptiveMaxPool1d(_AdaptiveMaxPoolNd): 1266 r"""Applies a 1D adaptive max pooling over an input signal composed of several input planes. 1267 1268 The output size is :math:`L_{out}`, for any input size. 1269 The number of output features is equal to the number of input planes. 1270 1271 Args: 1272 output_size: the target output size :math:`L_{out}`. 1273 return_indices: if ``True``, will return the indices along with the outputs. 1274 Useful to pass to nn.MaxUnpool1d. Default: ``False`` 1275 1276 Shape: 1277 - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`. 1278 - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where 1279 :math:`L_{out}=\text{output\_size}`. 1280 1281 Examples: 1282 >>> # target output size of 5 1283 >>> m = nn.AdaptiveMaxPool1d(5) 1284 >>> input = torch.randn(1, 64, 8) 1285 >>> output = m(input) 1286 1287 """ 1288 1289 output_size: _size_1_t 1290 1291 def forward(self, input: Tensor): 1292 return F.adaptive_max_pool1d(input, self.output_size, self.return_indices) 1293 1294 1295class AdaptiveMaxPool2d(_AdaptiveMaxPoolNd): 1296 r"""Applies a 2D adaptive max pooling over an input signal composed of several input planes. 1297 1298 The output is of size :math:`H_{out} \times W_{out}`, for any input size. 1299 The number of output features is equal to the number of input planes. 1300 1301 Args: 1302 output_size: the target output size of the image of the form :math:`H_{out} \times W_{out}`. 1303 Can be a tuple :math:`(H_{out}, W_{out})` or a single :math:`H_{out}` for a 1304 square image :math:`H_{out} \times H_{out}`. :math:`H_{out}` and :math:`W_{out}` 1305 can be either a ``int``, or ``None`` which means the size will be the same as that 1306 of the input. 1307 return_indices: if ``True``, will return the indices along with the outputs. 1308 Useful to pass to nn.MaxUnpool2d. Default: ``False`` 1309 1310 Shape: 1311 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. 1312 - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where 1313 :math:`(H_{out}, W_{out})=\text{output\_size}`. 1314 1315 Examples: 1316 >>> # target output size of 5x7 1317 >>> m = nn.AdaptiveMaxPool2d((5, 7)) 1318 >>> input = torch.randn(1, 64, 8, 9) 1319 >>> output = m(input) 1320 >>> # target output size of 7x7 (square) 1321 >>> m = nn.AdaptiveMaxPool2d(7) 1322 >>> input = torch.randn(1, 64, 10, 9) 1323 >>> output = m(input) 1324 >>> # target output size of 10x7 1325 >>> m = nn.AdaptiveMaxPool2d((None, 7)) 1326 >>> input = torch.randn(1, 64, 10, 9) 1327 >>> output = m(input) 1328 1329 """ 1330 1331 output_size: _size_2_opt_t 1332 1333 def forward(self, input: Tensor): 1334 return F.adaptive_max_pool2d(input, self.output_size, self.return_indices) 1335 1336 1337class AdaptiveMaxPool3d(_AdaptiveMaxPoolNd): 1338 r"""Applies a 3D adaptive max pooling over an input signal composed of several input planes. 1339 1340 The output is of size :math:`D_{out} \times H_{out} \times W_{out}`, for any input size. 1341 The number of output features is equal to the number of input planes. 1342 1343 Args: 1344 output_size: the target output size of the image of the form :math:`D_{out} \times H_{out} \times W_{out}`. 1345 Can be a tuple :math:`(D_{out}, H_{out}, W_{out})` or a single 1346 :math:`D_{out}` for a cube :math:`D_{out} \times D_{out} \times D_{out}`. 1347 :math:`D_{out}`, :math:`H_{out}` and :math:`W_{out}` can be either a 1348 ``int``, or ``None`` which means the size will be the same as that of the input. 1349 1350 return_indices: if ``True``, will return the indices along with the outputs. 1351 Useful to pass to nn.MaxUnpool3d. Default: ``False`` 1352 1353 Shape: 1354 - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`. 1355 - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`, 1356 where :math:`(D_{out}, H_{out}, W_{out})=\text{output\_size}`. 1357 1358 Examples: 1359 >>> # target output size of 5x7x9 1360 >>> m = nn.AdaptiveMaxPool3d((5, 7, 9)) 1361 >>> input = torch.randn(1, 64, 8, 9, 10) 1362 >>> output = m(input) 1363 >>> # target output size of 7x7x7 (cube) 1364 >>> m = nn.AdaptiveMaxPool3d(7) 1365 >>> input = torch.randn(1, 64, 10, 9, 8) 1366 >>> output = m(input) 1367 >>> # target output size of 7x9x8 1368 >>> m = nn.AdaptiveMaxPool3d((7, None, None)) 1369 >>> input = torch.randn(1, 64, 10, 9, 8) 1370 >>> output = m(input) 1371 1372 """ 1373 1374 output_size: _size_3_opt_t 1375 1376 def forward(self, input: Tensor): 1377 return F.adaptive_max_pool3d(input, self.output_size, self.return_indices) 1378 1379 1380class _AdaptiveAvgPoolNd(Module): 1381 __constants__ = ["output_size"] 1382 1383 def __init__(self, output_size: _size_any_opt_t) -> None: 1384 super().__init__() 1385 self.output_size = output_size 1386 1387 def extra_repr(self) -> str: 1388 return f"output_size={self.output_size}" 1389 1390 1391class AdaptiveAvgPool1d(_AdaptiveAvgPoolNd): 1392 r"""Applies a 1D adaptive average pooling over an input signal composed of several input planes. 1393 1394 The output size is :math:`L_{out}`, for any input size. 1395 The number of output features is equal to the number of input planes. 1396 1397 Args: 1398 output_size: the target output size :math:`L_{out}`. 1399 1400 Shape: 1401 - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`. 1402 - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where 1403 :math:`L_{out}=\text{output\_size}`. 1404 1405 Examples: 1406 >>> # target output size of 5 1407 >>> m = nn.AdaptiveAvgPool1d(5) 1408 >>> input = torch.randn(1, 64, 8) 1409 >>> output = m(input) 1410 1411 """ 1412 1413 output_size: _size_1_t 1414 1415 def forward(self, input: Tensor) -> Tensor: 1416 return F.adaptive_avg_pool1d(input, self.output_size) 1417 1418 1419class AdaptiveAvgPool2d(_AdaptiveAvgPoolNd): 1420 r"""Applies a 2D adaptive average pooling over an input signal composed of several input planes. 1421 1422 The output is of size H x W, for any input size. 1423 The number of output features is equal to the number of input planes. 1424 1425 Args: 1426 output_size: the target output size of the image of the form H x W. 1427 Can be a tuple (H, W) or a single H for a square image H x H. 1428 H and W can be either a ``int``, or ``None`` which means the size will 1429 be the same as that of the input. 1430 1431 Shape: 1432 - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`. 1433 - Output: :math:`(N, C, S_{0}, S_{1})` or :math:`(C, S_{0}, S_{1})`, where 1434 :math:`S=\text{output\_size}`. 1435 1436 Examples: 1437 >>> # target output size of 5x7 1438 >>> m = nn.AdaptiveAvgPool2d((5, 7)) 1439 >>> input = torch.randn(1, 64, 8, 9) 1440 >>> output = m(input) 1441 >>> # target output size of 7x7 (square) 1442 >>> m = nn.AdaptiveAvgPool2d(7) 1443 >>> input = torch.randn(1, 64, 10, 9) 1444 >>> output = m(input) 1445 >>> # target output size of 10x7 1446 >>> m = nn.AdaptiveAvgPool2d((None, 7)) 1447 >>> input = torch.randn(1, 64, 10, 9) 1448 >>> output = m(input) 1449 1450 """ 1451 1452 output_size: _size_2_opt_t 1453 1454 def forward(self, input: Tensor) -> Tensor: 1455 return F.adaptive_avg_pool2d(input, self.output_size) 1456 1457 1458class AdaptiveAvgPool3d(_AdaptiveAvgPoolNd): 1459 r"""Applies a 3D adaptive average pooling over an input signal composed of several input planes. 1460 1461 The output is of size D x H x W, for any input size. 1462 The number of output features is equal to the number of input planes. 1463 1464 Args: 1465 output_size: the target output size of the form D x H x W. 1466 Can be a tuple (D, H, W) or a single number D for a cube D x D x D. 1467 D, H and W can be either a ``int``, or ``None`` which means the size will 1468 be the same as that of the input. 1469 1470 Shape: 1471 - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`. 1472 - Output: :math:`(N, C, S_{0}, S_{1}, S_{2})` or :math:`(C, S_{0}, S_{1}, S_{2})`, 1473 where :math:`S=\text{output\_size}`. 1474 1475 Examples: 1476 >>> # target output size of 5x7x9 1477 >>> m = nn.AdaptiveAvgPool3d((5, 7, 9)) 1478 >>> input = torch.randn(1, 64, 8, 9, 10) 1479 >>> output = m(input) 1480 >>> # target output size of 7x7x7 (cube) 1481 >>> m = nn.AdaptiveAvgPool3d(7) 1482 >>> input = torch.randn(1, 64, 10, 9, 8) 1483 >>> output = m(input) 1484 >>> # target output size of 7x9x8 1485 >>> m = nn.AdaptiveAvgPool3d((7, None, None)) 1486 >>> input = torch.randn(1, 64, 10, 9, 8) 1487 >>> output = m(input) 1488 1489 """ 1490 1491 output_size: _size_3_opt_t 1492 1493 def forward(self, input: Tensor) -> Tensor: 1494 return F.adaptive_avg_pool3d(input, self.output_size) 1495