1import torch.nn.functional as F 2from torch import Tensor 3 4from .module import Module 5 6 7__all__ = [ 8 "Dropout", 9 "Dropout1d", 10 "Dropout2d", 11 "Dropout3d", 12 "AlphaDropout", 13 "FeatureAlphaDropout", 14] 15 16 17class _DropoutNd(Module): 18 __constants__ = ["p", "inplace"] 19 p: float 20 inplace: bool 21 22 def __init__(self, p: float = 0.5, inplace: bool = False) -> None: 23 super().__init__() 24 if p < 0 or p > 1: 25 raise ValueError( 26 f"dropout probability has to be between 0 and 1, but got {p}" 27 ) 28 self.p = p 29 self.inplace = inplace 30 31 def extra_repr(self) -> str: 32 return f"p={self.p}, inplace={self.inplace}" 33 34 35class Dropout(_DropoutNd): 36 r"""During training, randomly zeroes some of the elements of the input tensor with probability :attr:`p`. 37 38 The zeroed elements are chosen independently for each forward call and are sampled from a Bernoulli distribution. 39 40 Each channel will be zeroed out independently on every forward call. 41 42 This has proven to be an effective technique for regularization and 43 preventing the co-adaptation of neurons as described in the paper 44 `Improving neural networks by preventing co-adaptation of feature 45 detectors`_ . 46 47 Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during 48 training. This means that during evaluation the module simply computes an 49 identity function. 50 51 Args: 52 p: probability of an element to be zeroed. Default: 0.5 53 inplace: If set to ``True``, will do this operation in-place. Default: ``False`` 54 55 Shape: 56 - Input: :math:`(*)`. Input can be of any shape 57 - Output: :math:`(*)`. Output is of the same shape as input 58 59 Examples:: 60 61 >>> m = nn.Dropout(p=0.2) 62 >>> input = torch.randn(20, 16) 63 >>> output = m(input) 64 65 .. _Improving neural networks by preventing co-adaptation of feature 66 detectors: https://arxiv.org/abs/1207.0580 67 """ 68 69 def forward(self, input: Tensor) -> Tensor: 70 return F.dropout(input, self.p, self.training, self.inplace) 71 72 73class Dropout1d(_DropoutNd): 74 r"""Randomly zero out entire channels. 75 76 A channel is a 1D feature map, 77 e.g., the :math:`j`-th channel of the :math:`i`-th sample in the 78 batched input is a 1D tensor :math:`\text{input}[i, j]`. 79 80 Each channel will be zeroed out independently on every forward call with 81 probability :attr:`p` using samples from a Bernoulli distribution. 82 83 Usually the input comes from :class:`nn.Conv1d` modules. 84 85 As described in the paper 86 `Efficient Object Localization Using Convolutional Networks`_ , 87 if adjacent pixels within feature maps are strongly correlated 88 (as is normally the case in early convolution layers) then i.i.d. dropout 89 will not regularize the activations and will otherwise just result 90 in an effective learning rate decrease. 91 92 In this case, :func:`nn.Dropout1d` will help promote independence between 93 feature maps and should be used instead. 94 95 Args: 96 p (float, optional): probability of an element to be zero-ed. 97 inplace (bool, optional): If set to ``True``, will do this operation 98 in-place 99 100 Shape: 101 - Input: :math:`(N, C, L)` or :math:`(C, L)`. 102 - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input). 103 104 Examples:: 105 106 >>> m = nn.Dropout1d(p=0.2) 107 >>> input = torch.randn(20, 16, 32) 108 >>> output = m(input) 109 110 .. _Efficient Object Localization Using Convolutional Networks: 111 https://arxiv.org/abs/1411.4280 112 """ 113 114 def forward(self, input: Tensor) -> Tensor: 115 return F.dropout1d(input, self.p, self.training, self.inplace) 116 117 118class Dropout2d(_DropoutNd): 119 r"""Randomly zero out entire channels. 120 121 A channel is a 2D feature map, 122 e.g., the :math:`j`-th channel of the :math:`i`-th sample in the 123 batched input is a 2D tensor :math:`\text{input}[i, j]`. 124 125 Each channel will be zeroed out independently on every forward call with 126 probability :attr:`p` using samples from a Bernoulli distribution. 127 128 Usually the input comes from :class:`nn.Conv2d` modules. 129 130 As described in the paper 131 `Efficient Object Localization Using Convolutional Networks`_ , 132 if adjacent pixels within feature maps are strongly correlated 133 (as is normally the case in early convolution layers) then i.i.d. dropout 134 will not regularize the activations and will otherwise just result 135 in an effective learning rate decrease. 136 137 In this case, :func:`nn.Dropout2d` will help promote independence between 138 feature maps and should be used instead. 139 140 Args: 141 p (float, optional): probability of an element to be zero-ed. 142 inplace (bool, optional): If set to ``True``, will do this operation 143 in-place 144 145 .. warning :: 146 Due to historical reasons, this class will perform 1D channel-wise dropout 147 for 3D inputs (as done by :class:`nn.Dropout1d`). Thus, it currently does NOT 148 support inputs without a batch dimension of shape :math:`(C, H, W)`. This 149 behavior will change in a future release to interpret 3D inputs as no-batch-dim 150 inputs. To maintain the old behavior, switch to :class:`nn.Dropout1d`. 151 152 Shape: 153 - Input: :math:`(N, C, H, W)` or :math:`(N, C, L)`. 154 - Output: :math:`(N, C, H, W)` or :math:`(N, C, L)` (same shape as input). 155 156 Examples:: 157 158 >>> m = nn.Dropout2d(p=0.2) 159 >>> input = torch.randn(20, 16, 32, 32) 160 >>> output = m(input) 161 162 .. _Efficient Object Localization Using Convolutional Networks: 163 https://arxiv.org/abs/1411.4280 164 """ 165 166 def forward(self, input: Tensor) -> Tensor: 167 return F.dropout2d(input, self.p, self.training, self.inplace) 168 169 170class Dropout3d(_DropoutNd): 171 r"""Randomly zero out entire channels. 172 173 A channel is a 3D feature map, 174 e.g., the :math:`j`-th channel of the :math:`i`-th sample in the 175 batched input is a 3D tensor :math:`\text{input}[i, j]`. 176 177 Each channel will be zeroed out independently on every forward call with 178 probability :attr:`p` using samples from a Bernoulli distribution. 179 180 Usually the input comes from :class:`nn.Conv3d` modules. 181 182 As described in the paper 183 `Efficient Object Localization Using Convolutional Networks`_ , 184 if adjacent pixels within feature maps are strongly correlated 185 (as is normally the case in early convolution layers) then i.i.d. dropout 186 will not regularize the activations and will otherwise just result 187 in an effective learning rate decrease. 188 189 In this case, :func:`nn.Dropout3d` will help promote independence between 190 feature maps and should be used instead. 191 192 Args: 193 p (float, optional): probability of an element to be zeroed. 194 inplace (bool, optional): If set to ``True``, will do this operation 195 in-place 196 197 Shape: 198 - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`. 199 - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input). 200 201 Examples:: 202 203 >>> m = nn.Dropout3d(p=0.2) 204 >>> input = torch.randn(20, 16, 4, 32, 32) 205 >>> output = m(input) 206 207 .. _Efficient Object Localization Using Convolutional Networks: 208 https://arxiv.org/abs/1411.4280 209 """ 210 211 def forward(self, input: Tensor) -> Tensor: 212 return F.dropout3d(input, self.p, self.training, self.inplace) 213 214 215class AlphaDropout(_DropoutNd): 216 r"""Applies Alpha Dropout over the input. 217 218 Alpha Dropout is a type of Dropout that maintains the self-normalizing 219 property. 220 For an input with zero mean and unit standard deviation, the output of 221 Alpha Dropout maintains the original mean and standard deviation of the 222 input. 223 Alpha Dropout goes hand-in-hand with SELU activation function, which ensures 224 that the outputs have zero mean and unit standard deviation. 225 226 During training, it randomly masks some of the elements of the input 227 tensor with probability *p* using samples from a bernoulli distribution. 228 The elements to masked are randomized on every forward call, and scaled 229 and shifted to maintain zero mean and unit standard deviation. 230 231 During evaluation the module simply computes an identity function. 232 233 More details can be found in the paper `Self-Normalizing Neural Networks`_ . 234 235 Args: 236 p (float): probability of an element to be dropped. Default: 0.5 237 inplace (bool, optional): If set to ``True``, will do this operation 238 in-place 239 240 Shape: 241 - Input: :math:`(*)`. Input can be of any shape 242 - Output: :math:`(*)`. Output is of the same shape as input 243 244 Examples:: 245 246 >>> m = nn.AlphaDropout(p=0.2) 247 >>> input = torch.randn(20, 16) 248 >>> output = m(input) 249 250 .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515 251 """ 252 253 def forward(self, input: Tensor) -> Tensor: 254 return F.alpha_dropout(input, self.p, self.training) 255 256 257class FeatureAlphaDropout(_DropoutNd): 258 r"""Randomly masks out entire channels. 259 260 A channel is a feature map, 261 e.g. the :math:`j`-th channel of the :math:`i`-th sample in the batch input 262 is a tensor :math:`\text{input}[i, j]` of the input tensor). Instead of 263 setting activations to zero, as in regular Dropout, the activations are set 264 to the negative saturation value of the SELU activation function. More details 265 can be found in the paper `Self-Normalizing Neural Networks`_ . 266 267 Each element will be masked independently for each sample on every forward 268 call with probability :attr:`p` using samples from a Bernoulli distribution. 269 The elements to be masked are randomized on every forward call, and scaled 270 and shifted to maintain zero mean and unit variance. 271 272 Usually the input comes from :class:`nn.AlphaDropout` modules. 273 274 As described in the paper 275 `Efficient Object Localization Using Convolutional Networks`_ , 276 if adjacent pixels within feature maps are strongly correlated 277 (as is normally the case in early convolution layers) then i.i.d. dropout 278 will not regularize the activations and will otherwise just result 279 in an effective learning rate decrease. 280 281 In this case, :func:`nn.AlphaDropout` will help promote independence between 282 feature maps and should be used instead. 283 284 Args: 285 p (float, optional): probability of an element to be zeroed. Default: 0.5 286 inplace (bool, optional): If set to ``True``, will do this operation 287 in-place 288 289 Shape: 290 - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`. 291 - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input). 292 293 Examples:: 294 295 >>> m = nn.FeatureAlphaDropout(p=0.2) 296 >>> input = torch.randn(20, 16, 4, 32, 32) 297 >>> output = m(input) 298 299 .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515 300 .. _Efficient Object Localization Using Convolutional Networks: 301 https://arxiv.org/abs/1411.4280 302 """ 303 304 def forward(self, input: Tensor) -> Tensor: 305 return F.feature_alpha_dropout(input, self.p, self.training) 306