xref: /aosp_15_r20/external/pytorch/torch/nn/modules/dropout.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1import torch.nn.functional as F
2from torch import Tensor
3
4from .module import Module
5
6
7__all__ = [
8    "Dropout",
9    "Dropout1d",
10    "Dropout2d",
11    "Dropout3d",
12    "AlphaDropout",
13    "FeatureAlphaDropout",
14]
15
16
17class _DropoutNd(Module):
18    __constants__ = ["p", "inplace"]
19    p: float
20    inplace: bool
21
22    def __init__(self, p: float = 0.5, inplace: bool = False) -> None:
23        super().__init__()
24        if p < 0 or p > 1:
25            raise ValueError(
26                f"dropout probability has to be between 0 and 1, but got {p}"
27            )
28        self.p = p
29        self.inplace = inplace
30
31    def extra_repr(self) -> str:
32        return f"p={self.p}, inplace={self.inplace}"
33
34
35class Dropout(_DropoutNd):
36    r"""During training, randomly zeroes some of the elements of the input tensor with probability :attr:`p`.
37
38    The zeroed elements are chosen independently for each forward call and are sampled from a Bernoulli distribution.
39
40    Each channel will be zeroed out independently on every forward call.
41
42    This has proven to be an effective technique for regularization and
43    preventing the co-adaptation of neurons as described in the paper
44    `Improving neural networks by preventing co-adaptation of feature
45    detectors`_ .
46
47    Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during
48    training. This means that during evaluation the module simply computes an
49    identity function.
50
51    Args:
52        p: probability of an element to be zeroed. Default: 0.5
53        inplace: If set to ``True``, will do this operation in-place. Default: ``False``
54
55    Shape:
56        - Input: :math:`(*)`. Input can be of any shape
57        - Output: :math:`(*)`. Output is of the same shape as input
58
59    Examples::
60
61        >>> m = nn.Dropout(p=0.2)
62        >>> input = torch.randn(20, 16)
63        >>> output = m(input)
64
65    .. _Improving neural networks by preventing co-adaptation of feature
66        detectors: https://arxiv.org/abs/1207.0580
67    """
68
69    def forward(self, input: Tensor) -> Tensor:
70        return F.dropout(input, self.p, self.training, self.inplace)
71
72
73class Dropout1d(_DropoutNd):
74    r"""Randomly zero out entire channels.
75
76    A channel is a 1D feature map,
77    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
78    batched input is a 1D tensor :math:`\text{input}[i, j]`.
79
80    Each channel will be zeroed out independently on every forward call with
81    probability :attr:`p` using samples from a Bernoulli distribution.
82
83    Usually the input comes from :class:`nn.Conv1d` modules.
84
85    As described in the paper
86    `Efficient Object Localization Using Convolutional Networks`_ ,
87    if adjacent pixels within feature maps are strongly correlated
88    (as is normally the case in early convolution layers) then i.i.d. dropout
89    will not regularize the activations and will otherwise just result
90    in an effective learning rate decrease.
91
92    In this case, :func:`nn.Dropout1d` will help promote independence between
93    feature maps and should be used instead.
94
95    Args:
96        p (float, optional): probability of an element to be zero-ed.
97        inplace (bool, optional): If set to ``True``, will do this operation
98            in-place
99
100    Shape:
101        - Input: :math:`(N, C, L)` or :math:`(C, L)`.
102        - Output: :math:`(N, C, L)` or :math:`(C, L)` (same shape as input).
103
104    Examples::
105
106        >>> m = nn.Dropout1d(p=0.2)
107        >>> input = torch.randn(20, 16, 32)
108        >>> output = m(input)
109
110    .. _Efficient Object Localization Using Convolutional Networks:
111       https://arxiv.org/abs/1411.4280
112    """
113
114    def forward(self, input: Tensor) -> Tensor:
115        return F.dropout1d(input, self.p, self.training, self.inplace)
116
117
118class Dropout2d(_DropoutNd):
119    r"""Randomly zero out entire channels.
120
121    A channel is a 2D feature map,
122    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
123    batched input is a 2D tensor :math:`\text{input}[i, j]`.
124
125    Each channel will be zeroed out independently on every forward call with
126    probability :attr:`p` using samples from a Bernoulli distribution.
127
128    Usually the input comes from :class:`nn.Conv2d` modules.
129
130    As described in the paper
131    `Efficient Object Localization Using Convolutional Networks`_ ,
132    if adjacent pixels within feature maps are strongly correlated
133    (as is normally the case in early convolution layers) then i.i.d. dropout
134    will not regularize the activations and will otherwise just result
135    in an effective learning rate decrease.
136
137    In this case, :func:`nn.Dropout2d` will help promote independence between
138    feature maps and should be used instead.
139
140    Args:
141        p (float, optional): probability of an element to be zero-ed.
142        inplace (bool, optional): If set to ``True``, will do this operation
143            in-place
144
145    .. warning ::
146        Due to historical reasons, this class will perform 1D channel-wise dropout
147        for 3D inputs (as done by :class:`nn.Dropout1d`). Thus, it currently does NOT
148        support inputs without a batch dimension of shape :math:`(C, H, W)`. This
149        behavior will change in a future release to interpret 3D inputs as no-batch-dim
150        inputs. To maintain the old behavior, switch to :class:`nn.Dropout1d`.
151
152    Shape:
153        - Input: :math:`(N, C, H, W)` or :math:`(N, C, L)`.
154        - Output: :math:`(N, C, H, W)` or :math:`(N, C, L)` (same shape as input).
155
156    Examples::
157
158        >>> m = nn.Dropout2d(p=0.2)
159        >>> input = torch.randn(20, 16, 32, 32)
160        >>> output = m(input)
161
162    .. _Efficient Object Localization Using Convolutional Networks:
163       https://arxiv.org/abs/1411.4280
164    """
165
166    def forward(self, input: Tensor) -> Tensor:
167        return F.dropout2d(input, self.p, self.training, self.inplace)
168
169
170class Dropout3d(_DropoutNd):
171    r"""Randomly zero out entire channels.
172
173    A channel is a 3D feature map,
174    e.g., the :math:`j`-th channel of the :math:`i`-th sample in the
175    batched input is a 3D tensor :math:`\text{input}[i, j]`.
176
177    Each channel will be zeroed out independently on every forward call with
178    probability :attr:`p` using samples from a Bernoulli distribution.
179
180    Usually the input comes from :class:`nn.Conv3d` modules.
181
182    As described in the paper
183    `Efficient Object Localization Using Convolutional Networks`_ ,
184    if adjacent pixels within feature maps are strongly correlated
185    (as is normally the case in early convolution layers) then i.i.d. dropout
186    will not regularize the activations and will otherwise just result
187    in an effective learning rate decrease.
188
189    In this case, :func:`nn.Dropout3d` will help promote independence between
190    feature maps and should be used instead.
191
192    Args:
193        p (float, optional): probability of an element to be zeroed.
194        inplace (bool, optional): If set to ``True``, will do this operation
195            in-place
196
197    Shape:
198        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
199        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
200
201    Examples::
202
203        >>> m = nn.Dropout3d(p=0.2)
204        >>> input = torch.randn(20, 16, 4, 32, 32)
205        >>> output = m(input)
206
207    .. _Efficient Object Localization Using Convolutional Networks:
208       https://arxiv.org/abs/1411.4280
209    """
210
211    def forward(self, input: Tensor) -> Tensor:
212        return F.dropout3d(input, self.p, self.training, self.inplace)
213
214
215class AlphaDropout(_DropoutNd):
216    r"""Applies Alpha Dropout over the input.
217
218    Alpha Dropout is a type of Dropout that maintains the self-normalizing
219    property.
220    For an input with zero mean and unit standard deviation, the output of
221    Alpha Dropout maintains the original mean and standard deviation of the
222    input.
223    Alpha Dropout goes hand-in-hand with SELU activation function, which ensures
224    that the outputs have zero mean and unit standard deviation.
225
226    During training, it randomly masks some of the elements of the input
227    tensor with probability *p* using samples from a bernoulli distribution.
228    The elements to masked are randomized on every forward call, and scaled
229    and shifted to maintain zero mean and unit standard deviation.
230
231    During evaluation the module simply computes an identity function.
232
233    More details can be found in the paper `Self-Normalizing Neural Networks`_ .
234
235    Args:
236        p (float): probability of an element to be dropped. Default: 0.5
237        inplace (bool, optional): If set to ``True``, will do this operation
238            in-place
239
240    Shape:
241        - Input: :math:`(*)`. Input can be of any shape
242        - Output: :math:`(*)`. Output is of the same shape as input
243
244    Examples::
245
246        >>> m = nn.AlphaDropout(p=0.2)
247        >>> input = torch.randn(20, 16)
248        >>> output = m(input)
249
250    .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515
251    """
252
253    def forward(self, input: Tensor) -> Tensor:
254        return F.alpha_dropout(input, self.p, self.training)
255
256
257class FeatureAlphaDropout(_DropoutNd):
258    r"""Randomly masks out entire channels.
259
260    A channel is a feature map,
261    e.g. the :math:`j`-th channel of the :math:`i`-th sample in the batch input
262    is a tensor :math:`\text{input}[i, j]` of the input tensor). Instead of
263    setting activations to zero, as in regular Dropout, the activations are set
264    to the negative saturation value of the SELU activation function. More details
265    can be found in the paper `Self-Normalizing Neural Networks`_ .
266
267    Each element will be masked independently for each sample on every forward
268    call with probability :attr:`p` using samples from a Bernoulli distribution.
269    The elements to be masked are randomized on every forward call, and scaled
270    and shifted to maintain zero mean and unit variance.
271
272    Usually the input comes from :class:`nn.AlphaDropout` modules.
273
274    As described in the paper
275    `Efficient Object Localization Using Convolutional Networks`_ ,
276    if adjacent pixels within feature maps are strongly correlated
277    (as is normally the case in early convolution layers) then i.i.d. dropout
278    will not regularize the activations and will otherwise just result
279    in an effective learning rate decrease.
280
281    In this case, :func:`nn.AlphaDropout` will help promote independence between
282    feature maps and should be used instead.
283
284    Args:
285        p (float, optional): probability of an element to be zeroed. Default: 0.5
286        inplace (bool, optional): If set to ``True``, will do this operation
287            in-place
288
289    Shape:
290        - Input: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)`.
291        - Output: :math:`(N, C, D, H, W)` or :math:`(C, D, H, W)` (same shape as input).
292
293    Examples::
294
295        >>> m = nn.FeatureAlphaDropout(p=0.2)
296        >>> input = torch.randn(20, 16, 4, 32, 32)
297        >>> output = m(input)
298
299    .. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515
300    .. _Efficient Object Localization Using Convolutional Networks:
301       https://arxiv.org/abs/1411.4280
302    """
303
304    def forward(self, input: Tensor) -> Tensor:
305        return F.feature_alpha_dropout(input, self.p, self.training)
306