1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker
9*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
10*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
11*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
12*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
13*4bdc9457SAndroid Build Coastguard Worker #include <stdlib.h>
14*4bdc9457SAndroid Build Coastguard Worker
15*4bdc9457SAndroid Build Coastguard Worker #include <fp16.h>
16*4bdc9457SAndroid Build Coastguard Worker
17*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
18*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/allocator.h>
19*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/operator.h>
20*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/log.h>
21*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
22*4bdc9457SAndroid Build Coastguard Worker
23*4bdc9457SAndroid Build Coastguard Worker
xnn_create_softmax_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * softmax_op_out)24*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_softmax_nc_qu8(
25*4bdc9457SAndroid Build Coastguard Worker size_t channels,
26*4bdc9457SAndroid Build Coastguard Worker size_t input_stride,
27*4bdc9457SAndroid Build Coastguard Worker size_t output_stride,
28*4bdc9457SAndroid Build Coastguard Worker float input_scale,
29*4bdc9457SAndroid Build Coastguard Worker uint8_t output_zero_point,
30*4bdc9457SAndroid Build Coastguard Worker float output_scale,
31*4bdc9457SAndroid Build Coastguard Worker uint32_t flags,
32*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t* softmax_op_out)
33*4bdc9457SAndroid Build Coastguard Worker {
34*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t softmax_op = NULL;
35*4bdc9457SAndroid Build Coastguard Worker enum xnn_status status = xnn_status_uninitialized;
36*4bdc9457SAndroid Build Coastguard Worker
37*4bdc9457SAndroid Build Coastguard Worker if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
38*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
39*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
40*4bdc9457SAndroid Build Coastguard Worker goto error;
41*4bdc9457SAndroid Build Coastguard Worker }
42*4bdc9457SAndroid Build Coastguard Worker
43*4bdc9457SAndroid Build Coastguard Worker status = xnn_status_invalid_parameter;
44*4bdc9457SAndroid Build Coastguard Worker
45*4bdc9457SAndroid Build Coastguard Worker if (channels == 0) {
46*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
47*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with %zu channels: number of channels must be non-zero",
48*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), channels);
49*4bdc9457SAndroid Build Coastguard Worker goto error;
50*4bdc9457SAndroid Build Coastguard Worker }
51*4bdc9457SAndroid Build Coastguard Worker
52*4bdc9457SAndroid Build Coastguard Worker if (input_stride < channels) {
53*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
54*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with input element stride of %zu: "
55*4bdc9457SAndroid Build Coastguard Worker "stride must be at least as large as the number of channels (%zu)",
56*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_stride, channels);
57*4bdc9457SAndroid Build Coastguard Worker goto error;
58*4bdc9457SAndroid Build Coastguard Worker }
59*4bdc9457SAndroid Build Coastguard Worker
60*4bdc9457SAndroid Build Coastguard Worker if (output_stride < channels) {
61*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
62*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with output element stride of %zu: "
63*4bdc9457SAndroid Build Coastguard Worker "stride must be at least as large as the number of channels (%zu)",
64*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_stride, channels);
65*4bdc9457SAndroid Build Coastguard Worker goto error;
66*4bdc9457SAndroid Build Coastguard Worker }
67*4bdc9457SAndroid Build Coastguard Worker
68*4bdc9457SAndroid Build Coastguard Worker if (input_scale <= 0.0f || !isnormal(input_scale)) {
69*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
70*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
71*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_scale);
72*4bdc9457SAndroid Build Coastguard Worker goto error;
73*4bdc9457SAndroid Build Coastguard Worker }
74*4bdc9457SAndroid Build Coastguard Worker
75*4bdc9457SAndroid Build Coastguard Worker if (output_scale <= 0.0f || !isnormal(output_scale)) {
76*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
77*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
78*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
79*4bdc9457SAndroid Build Coastguard Worker goto error;
80*4bdc9457SAndroid Build Coastguard Worker }
81*4bdc9457SAndroid Build Coastguard Worker
82*4bdc9457SAndroid Build Coastguard Worker status = xnn_status_unsupported_parameter;
83*4bdc9457SAndroid Build Coastguard Worker
84*4bdc9457SAndroid Build Coastguard Worker if (output_scale != 0x1.0p-8f) {
85*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
86*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with %.7g output scale: only output scale of 1/256 is supported",
87*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
88*4bdc9457SAndroid Build Coastguard Worker goto error;
89*4bdc9457SAndroid Build Coastguard Worker }
90*4bdc9457SAndroid Build Coastguard Worker
91*4bdc9457SAndroid Build Coastguard Worker if (output_zero_point != 0) {
92*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
93*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
94*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_zero_point);
95*4bdc9457SAndroid Build Coastguard Worker goto error;
96*4bdc9457SAndroid Build Coastguard Worker }
97*4bdc9457SAndroid Build Coastguard Worker
98*4bdc9457SAndroid Build Coastguard Worker status = xnn_status_out_of_memory;
99*4bdc9457SAndroid Build Coastguard Worker
100*4bdc9457SAndroid Build Coastguard Worker softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
101*4bdc9457SAndroid Build Coastguard Worker if (softmax_op == NULL) {
102*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
103*4bdc9457SAndroid Build Coastguard Worker "failed to allocate %zu bytes for %s operator descriptor",
104*4bdc9457SAndroid Build Coastguard Worker sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
105*4bdc9457SAndroid Build Coastguard Worker goto error;
106*4bdc9457SAndroid Build Coastguard Worker }
107*4bdc9457SAndroid Build Coastguard Worker
108*4bdc9457SAndroid Build Coastguard Worker softmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
109*4bdc9457SAndroid Build Coastguard Worker if (softmax_op->lookup_table == NULL) {
110*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
111*4bdc9457SAndroid Build Coastguard Worker "failed to allocate 256 bytes for %s operator lookup table",
112*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
113*4bdc9457SAndroid Build Coastguard Worker goto error;
114*4bdc9457SAndroid Build Coastguard Worker }
115*4bdc9457SAndroid Build Coastguard Worker
116*4bdc9457SAndroid Build Coastguard Worker uint32_t* lookup_table = softmax_op->lookup_table;
117*4bdc9457SAndroid Build Coastguard Worker const double qscale = fmin(((double) UINT32_MAX) / (double) channels, 8388607.0);
118*4bdc9457SAndroid Build Coastguard Worker for (int32_t i = 0; i < 256; i++) {
119*4bdc9457SAndroid Build Coastguard Worker const double scaled_exp_xi = qscale * exp((double) (i - 255) * (double) input_scale);
120*4bdc9457SAndroid Build Coastguard Worker lookup_table[(uint32_t) i] = (uint32_t) lrint(scaled_exp_xi);
121*4bdc9457SAndroid Build Coastguard Worker }
122*4bdc9457SAndroid Build Coastguard Worker
123*4bdc9457SAndroid Build Coastguard Worker softmax_op->channels = channels;
124*4bdc9457SAndroid Build Coastguard Worker softmax_op->input_pixel_stride = input_stride;
125*4bdc9457SAndroid Build Coastguard Worker softmax_op->output_pixel_stride = output_stride;
126*4bdc9457SAndroid Build Coastguard Worker
127*4bdc9457SAndroid Build Coastguard Worker softmax_op->type = xnn_operator_type_softmax_nc_qu8;
128*4bdc9457SAndroid Build Coastguard Worker softmax_op->flags = flags;
129*4bdc9457SAndroid Build Coastguard Worker
130*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_invalid;
131*4bdc9457SAndroid Build Coastguard Worker
132*4bdc9457SAndroid Build Coastguard Worker *softmax_op_out = softmax_op;
133*4bdc9457SAndroid Build Coastguard Worker return xnn_status_success;
134*4bdc9457SAndroid Build Coastguard Worker
135*4bdc9457SAndroid Build Coastguard Worker error:
136*4bdc9457SAndroid Build Coastguard Worker xnn_delete_operator(softmax_op);
137*4bdc9457SAndroid Build Coastguard Worker return status;
138*4bdc9457SAndroid Build Coastguard Worker }
139*4bdc9457SAndroid Build Coastguard Worker
xnn_setup_softmax_nc_qu8(xnn_operator_t softmax_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)140*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_softmax_nc_qu8(
141*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t softmax_op,
142*4bdc9457SAndroid Build Coastguard Worker size_t batch_size,
143*4bdc9457SAndroid Build Coastguard Worker const uint8_t* input,
144*4bdc9457SAndroid Build Coastguard Worker uint8_t* output,
145*4bdc9457SAndroid Build Coastguard Worker pthreadpool_t threadpool)
146*4bdc9457SAndroid Build Coastguard Worker {
147*4bdc9457SAndroid Build Coastguard Worker if (softmax_op->type != xnn_operator_type_softmax_nc_qu8) {
148*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
149*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_softmax_nc_qu8),
150*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(softmax_op->type));
151*4bdc9457SAndroid Build Coastguard Worker return xnn_status_invalid_parameter;
152*4bdc9457SAndroid Build Coastguard Worker }
153*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_invalid;
154*4bdc9457SAndroid Build Coastguard Worker
155*4bdc9457SAndroid Build Coastguard Worker if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
156*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
157*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
158*4bdc9457SAndroid Build Coastguard Worker return xnn_status_uninitialized;
159*4bdc9457SAndroid Build Coastguard Worker }
160*4bdc9457SAndroid Build Coastguard Worker
161*4bdc9457SAndroid Build Coastguard Worker if (batch_size == 0) {
162*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_skip;
163*4bdc9457SAndroid Build Coastguard Worker return xnn_status_success;
164*4bdc9457SAndroid Build Coastguard Worker }
165*4bdc9457SAndroid Build Coastguard Worker
166*4bdc9457SAndroid Build Coastguard Worker softmax_op->batch_size = batch_size;
167*4bdc9457SAndroid Build Coastguard Worker softmax_op->input = input;
168*4bdc9457SAndroid Build Coastguard Worker softmax_op->output = output;
169*4bdc9457SAndroid Build Coastguard Worker
170*4bdc9457SAndroid Build Coastguard Worker softmax_op->context.u8_softmax = (struct u8_softmax_context) {
171*4bdc9457SAndroid Build Coastguard Worker .n = softmax_op->channels,
172*4bdc9457SAndroid Build Coastguard Worker .x = input,
173*4bdc9457SAndroid Build Coastguard Worker .x_stride = softmax_op->input_pixel_stride * sizeof(uint8_t),
174*4bdc9457SAndroid Build Coastguard Worker .t = softmax_op->lookup_table,
175*4bdc9457SAndroid Build Coastguard Worker .y = output,
176*4bdc9457SAndroid Build Coastguard Worker .y_stride = softmax_op->output_pixel_stride * sizeof(uint8_t),
177*4bdc9457SAndroid Build Coastguard Worker .rmax_ukernel = xnn_params.u8.rmax,
178*4bdc9457SAndroid Build Coastguard Worker .lut_norm_ukernel = xnn_params.u8.lut32norm,
179*4bdc9457SAndroid Build Coastguard Worker };
180*4bdc9457SAndroid Build Coastguard Worker softmax_op->compute.type = xnn_parallelization_type_1d;
181*4bdc9457SAndroid Build Coastguard Worker softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_u8_softmax;
182*4bdc9457SAndroid Build Coastguard Worker softmax_op->compute.range[0] = batch_size;
183*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_ready;
184*4bdc9457SAndroid Build Coastguard Worker
185*4bdc9457SAndroid Build Coastguard Worker return xnn_status_success;
186*4bdc9457SAndroid Build Coastguard Worker }
187*4bdc9457SAndroid Build Coastguard Worker
create_softmax_nc_floating_point(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * softmax_op_out)188*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status create_softmax_nc_floating_point(
189*4bdc9457SAndroid Build Coastguard Worker size_t channels,
190*4bdc9457SAndroid Build Coastguard Worker size_t input_stride,
191*4bdc9457SAndroid Build Coastguard Worker size_t output_stride,
192*4bdc9457SAndroid Build Coastguard Worker uint32_t flags,
193*4bdc9457SAndroid Build Coastguard Worker uint32_t datatype_init_flags,
194*4bdc9457SAndroid Build Coastguard Worker enum xnn_operator_type operator_type,
195*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t* softmax_op_out)
196*4bdc9457SAndroid Build Coastguard Worker {
197*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t softmax_op = NULL;
198*4bdc9457SAndroid Build Coastguard Worker enum xnn_status status = xnn_status_uninitialized;
199*4bdc9457SAndroid Build Coastguard Worker
200*4bdc9457SAndroid Build Coastguard Worker if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
201*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
202*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(operator_type));
203*4bdc9457SAndroid Build Coastguard Worker goto error;
204*4bdc9457SAndroid Build Coastguard Worker }
205*4bdc9457SAndroid Build Coastguard Worker
206*4bdc9457SAndroid Build Coastguard Worker status = xnn_status_unsupported_hardware;
207*4bdc9457SAndroid Build Coastguard Worker
208*4bdc9457SAndroid Build Coastguard Worker if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
209*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to create %s operator: operations on data type are not supported",
210*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(operator_type));
211*4bdc9457SAndroid Build Coastguard Worker goto error;
212*4bdc9457SAndroid Build Coastguard Worker }
213*4bdc9457SAndroid Build Coastguard Worker
214*4bdc9457SAndroid Build Coastguard Worker status = xnn_status_invalid_parameter;
215*4bdc9457SAndroid Build Coastguard Worker
216*4bdc9457SAndroid Build Coastguard Worker if (channels == 0) {
217*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
218*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with %zu channels: number of channels must be non-zero",
219*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(operator_type), channels);
220*4bdc9457SAndroid Build Coastguard Worker goto error;
221*4bdc9457SAndroid Build Coastguard Worker }
222*4bdc9457SAndroid Build Coastguard Worker
223*4bdc9457SAndroid Build Coastguard Worker if (input_stride < channels) {
224*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
225*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with input element stride of %zu: "
226*4bdc9457SAndroid Build Coastguard Worker "stride must be at least as large as the number of channels (%zu)",
227*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(operator_type), input_stride, channels);
228*4bdc9457SAndroid Build Coastguard Worker goto error;
229*4bdc9457SAndroid Build Coastguard Worker }
230*4bdc9457SAndroid Build Coastguard Worker
231*4bdc9457SAndroid Build Coastguard Worker if (output_stride < channels) {
232*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
233*4bdc9457SAndroid Build Coastguard Worker "failed to create %s operator with output element stride of %zu: "
234*4bdc9457SAndroid Build Coastguard Worker "stride must be at least as large as the number of channels (%zu)",
235*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(operator_type), output_stride, channels);
236*4bdc9457SAndroid Build Coastguard Worker goto error;
237*4bdc9457SAndroid Build Coastguard Worker }
238*4bdc9457SAndroid Build Coastguard Worker
239*4bdc9457SAndroid Build Coastguard Worker status = xnn_status_out_of_memory;
240*4bdc9457SAndroid Build Coastguard Worker
241*4bdc9457SAndroid Build Coastguard Worker softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
242*4bdc9457SAndroid Build Coastguard Worker if (softmax_op == NULL) {
243*4bdc9457SAndroid Build Coastguard Worker xnn_log_error(
244*4bdc9457SAndroid Build Coastguard Worker "failed to allocate %zu bytes for %s operator descriptor",
245*4bdc9457SAndroid Build Coastguard Worker sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
246*4bdc9457SAndroid Build Coastguard Worker goto error;
247*4bdc9457SAndroid Build Coastguard Worker }
248*4bdc9457SAndroid Build Coastguard Worker
249*4bdc9457SAndroid Build Coastguard Worker softmax_op->channels = channels;
250*4bdc9457SAndroid Build Coastguard Worker softmax_op->input_pixel_stride = input_stride;
251*4bdc9457SAndroid Build Coastguard Worker softmax_op->output_pixel_stride = output_stride;
252*4bdc9457SAndroid Build Coastguard Worker
253*4bdc9457SAndroid Build Coastguard Worker softmax_op->type = operator_type;
254*4bdc9457SAndroid Build Coastguard Worker softmax_op->flags = flags;
255*4bdc9457SAndroid Build Coastguard Worker
256*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_invalid;
257*4bdc9457SAndroid Build Coastguard Worker
258*4bdc9457SAndroid Build Coastguard Worker *softmax_op_out = softmax_op;
259*4bdc9457SAndroid Build Coastguard Worker return xnn_status_success;
260*4bdc9457SAndroid Build Coastguard Worker
261*4bdc9457SAndroid Build Coastguard Worker error:
262*4bdc9457SAndroid Build Coastguard Worker xnn_delete_operator(softmax_op);
263*4bdc9457SAndroid Build Coastguard Worker return status;
264*4bdc9457SAndroid Build Coastguard Worker }
265*4bdc9457SAndroid Build Coastguard Worker
xnn_create_softmax_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)266*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_softmax_nc_f16(
267*4bdc9457SAndroid Build Coastguard Worker size_t channels,
268*4bdc9457SAndroid Build Coastguard Worker size_t input_stride,
269*4bdc9457SAndroid Build Coastguard Worker size_t output_stride,
270*4bdc9457SAndroid Build Coastguard Worker uint32_t flags,
271*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t* softmax_op_out)
272*4bdc9457SAndroid Build Coastguard Worker {
273*4bdc9457SAndroid Build Coastguard Worker return create_softmax_nc_floating_point(
274*4bdc9457SAndroid Build Coastguard Worker channels, input_stride, output_stride,
275*4bdc9457SAndroid Build Coastguard Worker flags,
276*4bdc9457SAndroid Build Coastguard Worker XNN_INIT_FLAG_F16,
277*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_softmax_nc_f16,
278*4bdc9457SAndroid Build Coastguard Worker softmax_op_out);
279*4bdc9457SAndroid Build Coastguard Worker }
280*4bdc9457SAndroid Build Coastguard Worker
xnn_create_softmax_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)281*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_softmax_nc_f32(
282*4bdc9457SAndroid Build Coastguard Worker size_t channels,
283*4bdc9457SAndroid Build Coastguard Worker size_t input_stride,
284*4bdc9457SAndroid Build Coastguard Worker size_t output_stride,
285*4bdc9457SAndroid Build Coastguard Worker uint32_t flags,
286*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t* softmax_op_out)
287*4bdc9457SAndroid Build Coastguard Worker {
288*4bdc9457SAndroid Build Coastguard Worker return create_softmax_nc_floating_point(
289*4bdc9457SAndroid Build Coastguard Worker channels, input_stride, output_stride,
290*4bdc9457SAndroid Build Coastguard Worker flags,
291*4bdc9457SAndroid Build Coastguard Worker XNN_INIT_FLAG_F32,
292*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_softmax_nc_f32,
293*4bdc9457SAndroid Build Coastguard Worker softmax_op_out);
294*4bdc9457SAndroid Build Coastguard Worker }
295*4bdc9457SAndroid Build Coastguard Worker
setup_softmax_nc_floating_point(xnn_operator_t softmax_op,enum xnn_operator_type expected_operator_type,size_t batch_size,const void * input,void * output,uint32_t log2_element_size,xnn_rmax_ukernel_function rmax,const struct raddstoreexpminusmax_parameters raddstoreexpminusmax[restrict XNN_MIN_ELEMENTS (1)],const struct vbinary_parameters vmul[restrict XNN_MIN_ELEMENTS (1)],xnn_compute_reciprocal_function compute_reciprocal,const void * expminus_params,size_t expminus_params_size,const void * minmax_params,size_t minmax_params_size)296*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status setup_softmax_nc_floating_point(
297*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t softmax_op,
298*4bdc9457SAndroid Build Coastguard Worker enum xnn_operator_type expected_operator_type,
299*4bdc9457SAndroid Build Coastguard Worker size_t batch_size,
300*4bdc9457SAndroid Build Coastguard Worker const void* input,
301*4bdc9457SAndroid Build Coastguard Worker void* output,
302*4bdc9457SAndroid Build Coastguard Worker uint32_t log2_element_size,
303*4bdc9457SAndroid Build Coastguard Worker xnn_rmax_ukernel_function rmax,
304*4bdc9457SAndroid Build Coastguard Worker const struct raddstoreexpminusmax_parameters raddstoreexpminusmax[restrict XNN_MIN_ELEMENTS(1)],
305*4bdc9457SAndroid Build Coastguard Worker const struct vbinary_parameters vmul[restrict XNN_MIN_ELEMENTS(1)],
306*4bdc9457SAndroid Build Coastguard Worker xnn_compute_reciprocal_function compute_reciprocal,
307*4bdc9457SAndroid Build Coastguard Worker const void* expminus_params,
308*4bdc9457SAndroid Build Coastguard Worker size_t expminus_params_size,
309*4bdc9457SAndroid Build Coastguard Worker const void* minmax_params,
310*4bdc9457SAndroid Build Coastguard Worker size_t minmax_params_size)
311*4bdc9457SAndroid Build Coastguard Worker {
312*4bdc9457SAndroid Build Coastguard Worker if (softmax_op->type != expected_operator_type) {
313*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
314*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(expected_operator_type),
315*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(softmax_op->type));
316*4bdc9457SAndroid Build Coastguard Worker return xnn_status_invalid_parameter;
317*4bdc9457SAndroid Build Coastguard Worker }
318*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_invalid;
319*4bdc9457SAndroid Build Coastguard Worker
320*4bdc9457SAndroid Build Coastguard Worker if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
321*4bdc9457SAndroid Build Coastguard Worker xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
322*4bdc9457SAndroid Build Coastguard Worker xnn_operator_type_to_string(expected_operator_type));
323*4bdc9457SAndroid Build Coastguard Worker return xnn_status_uninitialized;
324*4bdc9457SAndroid Build Coastguard Worker }
325*4bdc9457SAndroid Build Coastguard Worker
326*4bdc9457SAndroid Build Coastguard Worker if (batch_size == 0) {
327*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_skip;
328*4bdc9457SAndroid Build Coastguard Worker return xnn_status_success;
329*4bdc9457SAndroid Build Coastguard Worker }
330*4bdc9457SAndroid Build Coastguard Worker
331*4bdc9457SAndroid Build Coastguard Worker softmax_op->batch_size = batch_size;
332*4bdc9457SAndroid Build Coastguard Worker softmax_op->input = input;
333*4bdc9457SAndroid Build Coastguard Worker softmax_op->output = output;
334*4bdc9457SAndroid Build Coastguard Worker
335*4bdc9457SAndroid Build Coastguard Worker softmax_op->context.floating_point_softmax = (struct floating_point_softmax_context) {
336*4bdc9457SAndroid Build Coastguard Worker .n = softmax_op->channels << log2_element_size,
337*4bdc9457SAndroid Build Coastguard Worker .x = input,
338*4bdc9457SAndroid Build Coastguard Worker .x_stride = softmax_op->input_pixel_stride << log2_element_size,
339*4bdc9457SAndroid Build Coastguard Worker .y = output,
340*4bdc9457SAndroid Build Coastguard Worker .y_stride = softmax_op->output_pixel_stride << log2_element_size,
341*4bdc9457SAndroid Build Coastguard Worker .rmax_ukernel = rmax,
342*4bdc9457SAndroid Build Coastguard Worker .raddstoreexpminusmax_ukernel = raddstoreexpminusmax->ukernel,
343*4bdc9457SAndroid Build Coastguard Worker .compute_reciprocal = compute_reciprocal,
344*4bdc9457SAndroid Build Coastguard Worker .vmulc_ukernel = vmul->minmax.opc_ukernel,
345*4bdc9457SAndroid Build Coastguard Worker };
346*4bdc9457SAndroid Build Coastguard Worker if (vmul->linear.opc_ukernel != NULL) {
347*4bdc9457SAndroid Build Coastguard Worker softmax_op->context.floating_point_softmax.vmulc_ukernel = vmul->linear.opc_ukernel;
348*4bdc9457SAndroid Build Coastguard Worker };
349*4bdc9457SAndroid Build Coastguard Worker memcpy(&softmax_op->context.floating_point_softmax.expminus_params, expminus_params, expminus_params_size);
350*4bdc9457SAndroid Build Coastguard Worker memcpy(&softmax_op->context.floating_point_softmax.minmax_params, minmax_params, minmax_params_size);
351*4bdc9457SAndroid Build Coastguard Worker softmax_op->compute.type = xnn_parallelization_type_1d;
352*4bdc9457SAndroid Build Coastguard Worker softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_floating_point_softmax;
353*4bdc9457SAndroid Build Coastguard Worker softmax_op->compute.range[0] = batch_size;
354*4bdc9457SAndroid Build Coastguard Worker softmax_op->state = xnn_run_state_ready;
355*4bdc9457SAndroid Build Coastguard Worker
356*4bdc9457SAndroid Build Coastguard Worker return xnn_status_success;
357*4bdc9457SAndroid Build Coastguard Worker }
358*4bdc9457SAndroid Build Coastguard Worker
compute_reciprocal_f16(const uint16_t input[XNN_MIN_ELEMENTS (1)],uint16_t output[XNN_MIN_ELEMENTS (1)])359*4bdc9457SAndroid Build Coastguard Worker static void compute_reciprocal_f16(
360*4bdc9457SAndroid Build Coastguard Worker const uint16_t input[XNN_MIN_ELEMENTS(1)],
361*4bdc9457SAndroid Build Coastguard Worker uint16_t output[XNN_MIN_ELEMENTS(1)])
362*4bdc9457SAndroid Build Coastguard Worker {
363*4bdc9457SAndroid Build Coastguard Worker *output = fp16_ieee_from_fp32_value(1.0f / fp16_ieee_to_fp32_value(*input));
364*4bdc9457SAndroid Build Coastguard Worker }
365*4bdc9457SAndroid Build Coastguard Worker
xnn_setup_softmax_nc_f16(xnn_operator_t softmax_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)366*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_softmax_nc_f16(
367*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t softmax_op,
368*4bdc9457SAndroid Build Coastguard Worker size_t batch_size,
369*4bdc9457SAndroid Build Coastguard Worker const void* input,
370*4bdc9457SAndroid Build Coastguard Worker void* output,
371*4bdc9457SAndroid Build Coastguard Worker pthreadpool_t threadpool)
372*4bdc9457SAndroid Build Coastguard Worker {
373*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_expminus_params expminus_params;
374*4bdc9457SAndroid Build Coastguard Worker if (xnn_params.f16.raddstoreexpminusmax.init.f16 != NULL) {
375*4bdc9457SAndroid Build Coastguard Worker xnn_params.f16.raddstoreexpminusmax.init.f16(&expminus_params);
376*4bdc9457SAndroid Build Coastguard Worker }
377*4bdc9457SAndroid Build Coastguard Worker union xnn_f16_minmax_params minmax_params;
378*4bdc9457SAndroid Build Coastguard Worker if (xnn_params.f16.vmul.init.f16_minmax != NULL) {
379*4bdc9457SAndroid Build Coastguard Worker xnn_params.f16.vmul.init.f16_minmax(&minmax_params, UINT16_C(0xFC00), UINT16_C(0x7C00));
380*4bdc9457SAndroid Build Coastguard Worker }
381*4bdc9457SAndroid Build Coastguard Worker return setup_softmax_nc_floating_point(
382*4bdc9457SAndroid Build Coastguard Worker softmax_op, xnn_operator_type_softmax_nc_f16,
383*4bdc9457SAndroid Build Coastguard Worker batch_size, input, output,
384*4bdc9457SAndroid Build Coastguard Worker 1 /* log2(sizeof(uint16_t)) */,
385*4bdc9457SAndroid Build Coastguard Worker xnn_params.f16.rmax, &xnn_params.f16.raddstoreexpminusmax, &xnn_params.f16.vmul,
386*4bdc9457SAndroid Build Coastguard Worker (xnn_compute_reciprocal_function) compute_reciprocal_f16,
387*4bdc9457SAndroid Build Coastguard Worker &expminus_params, sizeof(expminus_params),
388*4bdc9457SAndroid Build Coastguard Worker &minmax_params, sizeof(minmax_params));
389*4bdc9457SAndroid Build Coastguard Worker }
390*4bdc9457SAndroid Build Coastguard Worker
compute_reciprocal_f32(const float input[XNN_MIN_ELEMENTS (1)],float output[XNN_MIN_ELEMENTS (1)])391*4bdc9457SAndroid Build Coastguard Worker static void compute_reciprocal_f32(
392*4bdc9457SAndroid Build Coastguard Worker const float input[XNN_MIN_ELEMENTS(1)],
393*4bdc9457SAndroid Build Coastguard Worker float output[XNN_MIN_ELEMENTS(1)])
394*4bdc9457SAndroid Build Coastguard Worker {
395*4bdc9457SAndroid Build Coastguard Worker *output = 1.0f / *input;
396*4bdc9457SAndroid Build Coastguard Worker }
397*4bdc9457SAndroid Build Coastguard Worker
xnn_setup_softmax_nc_f32(xnn_operator_t softmax_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)398*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_softmax_nc_f32(
399*4bdc9457SAndroid Build Coastguard Worker xnn_operator_t softmax_op,
400*4bdc9457SAndroid Build Coastguard Worker size_t batch_size,
401*4bdc9457SAndroid Build Coastguard Worker const float* input,
402*4bdc9457SAndroid Build Coastguard Worker float* output,
403*4bdc9457SAndroid Build Coastguard Worker pthreadpool_t threadpool)
404*4bdc9457SAndroid Build Coastguard Worker {
405*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_expminus_params expminus_params;
406*4bdc9457SAndroid Build Coastguard Worker if (xnn_params.f32.raddstoreexpminusmax.init.f32 != NULL) {
407*4bdc9457SAndroid Build Coastguard Worker xnn_params.f32.raddstoreexpminusmax.init.f32(&expminus_params);
408*4bdc9457SAndroid Build Coastguard Worker }
409*4bdc9457SAndroid Build Coastguard Worker union xnn_f32_minmax_params minmax_params;
410*4bdc9457SAndroid Build Coastguard Worker if (xnn_params.f32.vmul.init.f32_minmax != NULL) {
411*4bdc9457SAndroid Build Coastguard Worker xnn_params.f32.vmul.init.f32_minmax(&minmax_params, -INFINITY, INFINITY);
412*4bdc9457SAndroid Build Coastguard Worker }
413*4bdc9457SAndroid Build Coastguard Worker return setup_softmax_nc_floating_point(
414*4bdc9457SAndroid Build Coastguard Worker softmax_op, xnn_operator_type_softmax_nc_f32,
415*4bdc9457SAndroid Build Coastguard Worker batch_size, input, output,
416*4bdc9457SAndroid Build Coastguard Worker 2 /* log2(sizeof(float)) */,
417*4bdc9457SAndroid Build Coastguard Worker xnn_params.f32.rmax, &xnn_params.f32.raddstoreexpminusmax, &xnn_params.f32.vmul,
418*4bdc9457SAndroid Build Coastguard Worker (xnn_compute_reciprocal_function) compute_reciprocal_f32,
419*4bdc9457SAndroid Build Coastguard Worker &expminus_params, sizeof(expminus_params),
420*4bdc9457SAndroid Build Coastguard Worker &minmax_params, sizeof(minmax_params));
421*4bdc9457SAndroid Build Coastguard Worker }
422