xref: /aosp_15_r20/external/XNNPACK/src/operators/binary-elementwise-nd.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
8*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
9*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
10*4bdc9457SAndroid Build Coastguard Worker #include <stdlib.h>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker #include <fp16.h>
13*4bdc9457SAndroid Build Coastguard Worker 
14*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack.h>
15*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/allocator.h>
16*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/log.h>
17*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/operator.h>
18*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/microparams-init.h>
19*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/params.h>
20*4bdc9457SAndroid Build Coastguard Worker 
21*4bdc9457SAndroid Build Coastguard Worker 
create_binary_elementwise_nd(uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,const struct vbinary_fused_ukernels * vbinary_fused_ukernels,xnn_operator_t * binary_elementwise_op_out)22*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status create_binary_elementwise_nd(
23*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
24*4bdc9457SAndroid Build Coastguard Worker     const void* params,
25*4bdc9457SAndroid Build Coastguard Worker     size_t params_size,
26*4bdc9457SAndroid Build Coastguard Worker     uint32_t datatype_init_flags,
27*4bdc9457SAndroid Build Coastguard Worker     enum xnn_operator_type operator_type,
28*4bdc9457SAndroid Build Coastguard Worker     const struct vbinary_fused_ukernels* vbinary_fused_ukernels,
29*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* binary_elementwise_op_out)
30*4bdc9457SAndroid Build Coastguard Worker {
31*4bdc9457SAndroid Build Coastguard Worker   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
32*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
33*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
34*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_uninitialized;
35*4bdc9457SAndroid Build Coastguard Worker   }
36*4bdc9457SAndroid Build Coastguard Worker 
37*4bdc9457SAndroid Build Coastguard Worker   if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
38*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error("failed to create %s operator: operations on data type are not supported",
39*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
40*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_hardware;
41*4bdc9457SAndroid Build Coastguard Worker   }
42*4bdc9457SAndroid Build Coastguard Worker 
43*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t binary_elementwise_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
44*4bdc9457SAndroid Build Coastguard Worker   if (binary_elementwise_op == NULL) {
45*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
46*4bdc9457SAndroid Build Coastguard Worker       "failed to allocate %zu bytes for %s operator descriptor",
47*4bdc9457SAndroid Build Coastguard Worker       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
48*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_out_of_memory;
49*4bdc9457SAndroid Build Coastguard Worker   }
50*4bdc9457SAndroid Build Coastguard Worker 
51*4bdc9457SAndroid Build Coastguard Worker   if (params_size != 0) {
52*4bdc9457SAndroid Build Coastguard Worker     memcpy(&binary_elementwise_op->params, params, params_size);
53*4bdc9457SAndroid Build Coastguard Worker   }
54*4bdc9457SAndroid Build Coastguard Worker 
55*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->ukernel.vbinary.op_function   = vbinary_fused_ukernels->op_ukernel;
56*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->ukernel.vbinary.opc_function  = vbinary_fused_ukernels->opc_ukernel;
57*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->ukernel.vbinary.ropc_function = vbinary_fused_ukernels->ropc_ukernel;
58*4bdc9457SAndroid Build Coastguard Worker 
59*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->type = operator_type;
60*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->flags = flags;
61*4bdc9457SAndroid Build Coastguard Worker 
62*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->state = xnn_run_state_invalid;
63*4bdc9457SAndroid Build Coastguard Worker 
64*4bdc9457SAndroid Build Coastguard Worker   *binary_elementwise_op_out = binary_elementwise_op;
65*4bdc9457SAndroid Build Coastguard Worker   return xnn_status_success;
66*4bdc9457SAndroid Build Coastguard Worker }
67*4bdc9457SAndroid Build Coastguard Worker 
create_binary_elementwise_nd_f16(float output_min,float output_max,uint32_t flags,enum xnn_operator_type operator_type,const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS (1)],xnn_operator_t * binary_elementwise_op_out)68*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status create_binary_elementwise_nd_f16(
69*4bdc9457SAndroid Build Coastguard Worker     float output_min,
70*4bdc9457SAndroid Build Coastguard Worker     float output_max,
71*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
72*4bdc9457SAndroid Build Coastguard Worker     enum xnn_operator_type operator_type,
73*4bdc9457SAndroid Build Coastguard Worker     const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS(1)],
74*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* binary_elementwise_op_out)
75*4bdc9457SAndroid Build Coastguard Worker {
76*4bdc9457SAndroid Build Coastguard Worker   if (isnan(output_min)) {
77*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
78*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
79*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
80*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
81*4bdc9457SAndroid Build Coastguard Worker   }
82*4bdc9457SAndroid Build Coastguard Worker 
83*4bdc9457SAndroid Build Coastguard Worker   if (isnan(output_max)) {
84*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
85*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
86*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
87*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
88*4bdc9457SAndroid Build Coastguard Worker   }
89*4bdc9457SAndroid Build Coastguard Worker 
90*4bdc9457SAndroid Build Coastguard Worker   if (fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)) >= fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max))) {
91*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
92*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
93*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type),
94*4bdc9457SAndroid Build Coastguard Worker       fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_min)),
95*4bdc9457SAndroid Build Coastguard Worker       fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(output_max)));
96*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
97*4bdc9457SAndroid Build Coastguard Worker   }
98*4bdc9457SAndroid Build Coastguard Worker 
99*4bdc9457SAndroid Build Coastguard Worker   union xnn_f16_minmax_params params;
100*4bdc9457SAndroid Build Coastguard Worker   if (vbinary->init.f16_minmax != NULL) {
101*4bdc9457SAndroid Build Coastguard Worker     vbinary->init.f16_minmax(&params,
102*4bdc9457SAndroid Build Coastguard Worker       fp16_ieee_from_fp32_value(output_min), fp16_ieee_from_fp32_value(output_max));
103*4bdc9457SAndroid Build Coastguard Worker   }
104*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
105*4bdc9457SAndroid Build Coastguard Worker     flags,
106*4bdc9457SAndroid Build Coastguard Worker     &params,
107*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
108*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F16,
109*4bdc9457SAndroid Build Coastguard Worker     operator_type,
110*4bdc9457SAndroid Build Coastguard Worker     &vbinary->minmax,
111*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op_out);
112*4bdc9457SAndroid Build Coastguard Worker }
113*4bdc9457SAndroid Build Coastguard Worker 
create_binary_elementwise_nd_f32(float output_min,float output_max,uint32_t flags,enum xnn_operator_type operator_type,const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS (1)],xnn_operator_t * binary_elementwise_op_out)114*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status create_binary_elementwise_nd_f32(
115*4bdc9457SAndroid Build Coastguard Worker     float output_min,
116*4bdc9457SAndroid Build Coastguard Worker     float output_max,
117*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
118*4bdc9457SAndroid Build Coastguard Worker     enum xnn_operator_type operator_type,
119*4bdc9457SAndroid Build Coastguard Worker     const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS(1)],
120*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* binary_elementwise_op_out)
121*4bdc9457SAndroid Build Coastguard Worker {
122*4bdc9457SAndroid Build Coastguard Worker   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
123*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
124*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
125*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_uninitialized;
126*4bdc9457SAndroid Build Coastguard Worker   }
127*4bdc9457SAndroid Build Coastguard Worker 
128*4bdc9457SAndroid Build Coastguard Worker   if (isnan(output_min)) {
129*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
130*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
131*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
132*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
133*4bdc9457SAndroid Build Coastguard Worker   }
134*4bdc9457SAndroid Build Coastguard Worker 
135*4bdc9457SAndroid Build Coastguard Worker   if (isnan(output_max)) {
136*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
137*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
138*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type));
139*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
140*4bdc9457SAndroid Build Coastguard Worker   }
141*4bdc9457SAndroid Build Coastguard Worker 
142*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
143*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
144*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
145*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(operator_type), output_min, output_max);
146*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
147*4bdc9457SAndroid Build Coastguard Worker   }
148*4bdc9457SAndroid Build Coastguard Worker 
149*4bdc9457SAndroid Build Coastguard Worker   const bool linear_activation = (output_max == INFINITY) && (output_min == -output_max);
150*4bdc9457SAndroid Build Coastguard Worker   const struct vbinary_fused_ukernels* vbinary_fused_ukernels = &vbinary->minmax;
151*4bdc9457SAndroid Build Coastguard Worker   if (linear_activation && vbinary->linear.op_ukernel != NULL) {
152*4bdc9457SAndroid Build Coastguard Worker     vbinary_fused_ukernels = &vbinary->linear;
153*4bdc9457SAndroid Build Coastguard Worker   }
154*4bdc9457SAndroid Build Coastguard Worker 
155*4bdc9457SAndroid Build Coastguard Worker   union xnn_f32_minmax_params params;
156*4bdc9457SAndroid Build Coastguard Worker   if (vbinary->init.f32_minmax != NULL) {
157*4bdc9457SAndroid Build Coastguard Worker     vbinary->init.f32_minmax(&params, output_min, output_max);
158*4bdc9457SAndroid Build Coastguard Worker   }
159*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
160*4bdc9457SAndroid Build Coastguard Worker     flags,
161*4bdc9457SAndroid Build Coastguard Worker     &params,
162*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
163*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F32,
164*4bdc9457SAndroid Build Coastguard Worker     operator_type,
165*4bdc9457SAndroid Build Coastguard Worker     vbinary_fused_ukernels,
166*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op_out);
167*4bdc9457SAndroid Build Coastguard Worker }
168*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_add_nd_qs8(int8_t input1_zero_point,float input1_scale,int8_t input2_zero_point,float input2_scale,int8_t output_zero_point,float output_scale,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * add_op_out)169*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_add_nd_qs8(
170*4bdc9457SAndroid Build Coastguard Worker     int8_t input1_zero_point,
171*4bdc9457SAndroid Build Coastguard Worker     float input1_scale,
172*4bdc9457SAndroid Build Coastguard Worker     int8_t input2_zero_point,
173*4bdc9457SAndroid Build Coastguard Worker     float input2_scale,
174*4bdc9457SAndroid Build Coastguard Worker     int8_t output_zero_point,
175*4bdc9457SAndroid Build Coastguard Worker     float output_scale,
176*4bdc9457SAndroid Build Coastguard Worker     int8_t output_min,
177*4bdc9457SAndroid Build Coastguard Worker     int8_t output_max,
178*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
179*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* add_op_out)
180*4bdc9457SAndroid Build Coastguard Worker {
181*4bdc9457SAndroid Build Coastguard Worker   if (input1_scale <= 0.0f || !isnormal(input1_scale)) {
182*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
183*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 1 scale: scale must be finite and positive",
184*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qs8), input1_scale);
185*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
186*4bdc9457SAndroid Build Coastguard Worker   }
187*4bdc9457SAndroid Build Coastguard Worker 
188*4bdc9457SAndroid Build Coastguard Worker   if (input2_scale <= 0.0f || !isnormal(input2_scale)) {
189*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
190*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 2 scale: scale must be finite and positive",
191*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qs8), input2_scale);
192*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
193*4bdc9457SAndroid Build Coastguard Worker   }
194*4bdc9457SAndroid Build Coastguard Worker 
195*4bdc9457SAndroid Build Coastguard Worker   if (output_scale <= 0.0f || !isnormal(output_scale)) {
196*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
197*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g output scale: scale must be finite and positive",
198*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qs8), output_scale);
199*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
200*4bdc9457SAndroid Build Coastguard Worker   }
201*4bdc9457SAndroid Build Coastguard Worker 
202*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
203*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
204*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: lower bound must be below upper bound",
205*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qs8), output_min, output_max);
206*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
207*4bdc9457SAndroid Build Coastguard Worker   }
208*4bdc9457SAndroid Build Coastguard Worker 
209*4bdc9457SAndroid Build Coastguard Worker   const float input1_output_scale = input1_scale / output_scale;
210*4bdc9457SAndroid Build Coastguard Worker   if (input1_output_scale < 0x1.0p-10f || input1_output_scale >= 0x1.0p+8f) {
211*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
212*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input1-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
213*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qs8), input1_output_scale);
214*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
215*4bdc9457SAndroid Build Coastguard Worker   }
216*4bdc9457SAndroid Build Coastguard Worker 
217*4bdc9457SAndroid Build Coastguard Worker   const float input2_output_scale = input2_scale / output_scale;
218*4bdc9457SAndroid Build Coastguard Worker   if (input2_output_scale < 0x1.0p-10f || input2_output_scale >= 0x1.0p+8f) {
219*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
220*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input2-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
221*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qs8), input2_output_scale);
222*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
223*4bdc9457SAndroid Build Coastguard Worker   }
224*4bdc9457SAndroid Build Coastguard Worker 
225*4bdc9457SAndroid Build Coastguard Worker   struct {
226*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_add_minmax_params qs8_add;
227*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_add_minmax_params qs8_radd;
228*4bdc9457SAndroid Build Coastguard Worker   } params;
229*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.qs8.vadd.init.qs8_add != NULL) {
230*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd.init.qs8_add(
231*4bdc9457SAndroid Build Coastguard Worker       &params.qs8_add, input1_zero_point, input2_zero_point, output_zero_point,
232*4bdc9457SAndroid Build Coastguard Worker       input1_output_scale, input2_output_scale, output_min, output_max);
233*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd.init.qs8_add(
234*4bdc9457SAndroid Build Coastguard Worker       &params.qs8_radd, input2_zero_point, input1_zero_point, output_zero_point,
235*4bdc9457SAndroid Build Coastguard Worker       input2_output_scale, input1_output_scale, output_min, output_max);
236*4bdc9457SAndroid Build Coastguard Worker   }
237*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
238*4bdc9457SAndroid Build Coastguard Worker     flags,
239*4bdc9457SAndroid Build Coastguard Worker     &params,
240*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
241*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_QS8,
242*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_add_nd_qs8,
243*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qs8.vadd.minmax,
244*4bdc9457SAndroid Build Coastguard Worker     add_op_out);
245*4bdc9457SAndroid Build Coastguard Worker }
246*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_add_nd_qu8(uint8_t input1_zero_point,float input1_scale,uint8_t input2_zero_point,float input2_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * add_op_out)247*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_add_nd_qu8(
248*4bdc9457SAndroid Build Coastguard Worker     uint8_t input1_zero_point,
249*4bdc9457SAndroid Build Coastguard Worker     float input1_scale,
250*4bdc9457SAndroid Build Coastguard Worker     uint8_t input2_zero_point,
251*4bdc9457SAndroid Build Coastguard Worker     float input2_scale,
252*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_zero_point,
253*4bdc9457SAndroid Build Coastguard Worker     float output_scale,
254*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_min,
255*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_max,
256*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
257*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* add_op_out)
258*4bdc9457SAndroid Build Coastguard Worker {
259*4bdc9457SAndroid Build Coastguard Worker   if (input1_scale <= 0.0f || !isnormal(input1_scale)) {
260*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
261*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 1 scale: scale must be finite and positive",
262*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qu8), input1_scale);
263*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
264*4bdc9457SAndroid Build Coastguard Worker   }
265*4bdc9457SAndroid Build Coastguard Worker 
266*4bdc9457SAndroid Build Coastguard Worker   if (input2_scale <= 0.0f || !isnormal(input2_scale)) {
267*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
268*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 2 scale: scale must be finite and positive",
269*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qu8), input2_scale);
270*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
271*4bdc9457SAndroid Build Coastguard Worker   }
272*4bdc9457SAndroid Build Coastguard Worker 
273*4bdc9457SAndroid Build Coastguard Worker   if (output_scale <= 0.0f || !isnormal(output_scale)) {
274*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
275*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g output scale: scale must be finite and positive",
276*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qu8), output_scale);
277*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
278*4bdc9457SAndroid Build Coastguard Worker   }
279*4bdc9457SAndroid Build Coastguard Worker 
280*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
281*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
282*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: lower bound must be below upper bound",
283*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qu8), output_min, output_max);
284*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
285*4bdc9457SAndroid Build Coastguard Worker   }
286*4bdc9457SAndroid Build Coastguard Worker 
287*4bdc9457SAndroid Build Coastguard Worker   const float input1_output_scale = input1_scale / output_scale;
288*4bdc9457SAndroid Build Coastguard Worker   if (input1_output_scale < 0x1.0p-10f || input1_output_scale >= 0x1.0p+8f) {
289*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
290*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input1-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
291*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qu8), input1_output_scale);
292*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
293*4bdc9457SAndroid Build Coastguard Worker   }
294*4bdc9457SAndroid Build Coastguard Worker 
295*4bdc9457SAndroid Build Coastguard Worker   const float input2_output_scale = input2_scale / output_scale;
296*4bdc9457SAndroid Build Coastguard Worker   if (input2_output_scale < 0x1.0p-10f || input2_output_scale >= 0x1.0p+8f) {
297*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
298*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input2-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
299*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_add_nd_qu8), input2_output_scale);
300*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
301*4bdc9457SAndroid Build Coastguard Worker   }
302*4bdc9457SAndroid Build Coastguard Worker 
303*4bdc9457SAndroid Build Coastguard Worker   struct {
304*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_add_minmax_params qu8_add;
305*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_add_minmax_params qu8_radd;
306*4bdc9457SAndroid Build Coastguard Worker   } params;
307*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.qu8.vadd.init.qu8_add != NULL) {
308*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd.init.qu8_add(
309*4bdc9457SAndroid Build Coastguard Worker       &params.qu8_add, input1_zero_point, input2_zero_point, output_zero_point,
310*4bdc9457SAndroid Build Coastguard Worker       input1_output_scale, input2_output_scale, output_min, output_max);
311*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd.init.qu8_add(
312*4bdc9457SAndroid Build Coastguard Worker       &params.qu8_radd, input2_zero_point, input1_zero_point, output_zero_point,
313*4bdc9457SAndroid Build Coastguard Worker       input2_output_scale, input1_output_scale, output_min, output_max);
314*4bdc9457SAndroid Build Coastguard Worker   }
315*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
316*4bdc9457SAndroid Build Coastguard Worker     flags,
317*4bdc9457SAndroid Build Coastguard Worker     &params,
318*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
319*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_QU8,
320*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_add_nd_qu8,
321*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qu8.vadd.minmax,
322*4bdc9457SAndroid Build Coastguard Worker     add_op_out);
323*4bdc9457SAndroid Build Coastguard Worker }
324*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_add_nd_f16(float output_min,float output_max,uint32_t flags,xnn_operator_t * add_op_out)325*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_add_nd_f16(
326*4bdc9457SAndroid Build Coastguard Worker     float output_min,
327*4bdc9457SAndroid Build Coastguard Worker     float output_max,
328*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
329*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* add_op_out)
330*4bdc9457SAndroid Build Coastguard Worker {
331*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f16(
332*4bdc9457SAndroid Build Coastguard Worker     output_min,
333*4bdc9457SAndroid Build Coastguard Worker     output_max,
334*4bdc9457SAndroid Build Coastguard Worker     flags,
335*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_add_nd_f16,
336*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vadd,
337*4bdc9457SAndroid Build Coastguard Worker     add_op_out);
338*4bdc9457SAndroid Build Coastguard Worker }
339*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_add_nd_f32(float output_min,float output_max,uint32_t flags,xnn_operator_t * add_op_out)340*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_add_nd_f32(
341*4bdc9457SAndroid Build Coastguard Worker     float output_min,
342*4bdc9457SAndroid Build Coastguard Worker     float output_max,
343*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
344*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* add_op_out)
345*4bdc9457SAndroid Build Coastguard Worker {
346*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f32(
347*4bdc9457SAndroid Build Coastguard Worker     output_min,
348*4bdc9457SAndroid Build Coastguard Worker     output_max,
349*4bdc9457SAndroid Build Coastguard Worker     flags,
350*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_add_nd_f32,
351*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vadd,
352*4bdc9457SAndroid Build Coastguard Worker     add_op_out);
353*4bdc9457SAndroid Build Coastguard Worker }
354*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_divide_nd_f16(float output_min,float output_max,uint32_t flags,xnn_operator_t * divide_op_out)355*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_divide_nd_f16(
356*4bdc9457SAndroid Build Coastguard Worker     float output_min,
357*4bdc9457SAndroid Build Coastguard Worker     float output_max,
358*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
359*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* divide_op_out)
360*4bdc9457SAndroid Build Coastguard Worker {
361*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f16(
362*4bdc9457SAndroid Build Coastguard Worker     output_min,
363*4bdc9457SAndroid Build Coastguard Worker     output_max,
364*4bdc9457SAndroid Build Coastguard Worker     flags,
365*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_divide_nd_f16,
366*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vdiv,
367*4bdc9457SAndroid Build Coastguard Worker     divide_op_out);
368*4bdc9457SAndroid Build Coastguard Worker }
369*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_divide_nd_f32(float output_min,float output_max,uint32_t flags,xnn_operator_t * divide_op_out)370*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_divide_nd_f32(
371*4bdc9457SAndroid Build Coastguard Worker     float output_min,
372*4bdc9457SAndroid Build Coastguard Worker     float output_max,
373*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
374*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* divide_op_out)
375*4bdc9457SAndroid Build Coastguard Worker {
376*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f32(
377*4bdc9457SAndroid Build Coastguard Worker     output_min,
378*4bdc9457SAndroid Build Coastguard Worker     output_max,
379*4bdc9457SAndroid Build Coastguard Worker     flags,
380*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_divide_nd_f32,
381*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vdiv,
382*4bdc9457SAndroid Build Coastguard Worker     divide_op_out);
383*4bdc9457SAndroid Build Coastguard Worker }
384*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_maximum_nd_f16(uint32_t flags,xnn_operator_t * maximum_op_out)385*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_maximum_nd_f16(
386*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
387*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* maximum_op_out)
388*4bdc9457SAndroid Build Coastguard Worker {
389*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
390*4bdc9457SAndroid Build Coastguard Worker     flags,
391*4bdc9457SAndroid Build Coastguard Worker     NULL,
392*4bdc9457SAndroid Build Coastguard Worker     0,
393*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F16,
394*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_maximum_nd_f16,
395*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vmax.minmax,
396*4bdc9457SAndroid Build Coastguard Worker     maximum_op_out);
397*4bdc9457SAndroid Build Coastguard Worker }
398*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_maximum_nd_f32(uint32_t flags,xnn_operator_t * maximum_op_out)399*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_maximum_nd_f32(
400*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
401*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* maximum_op_out)
402*4bdc9457SAndroid Build Coastguard Worker {
403*4bdc9457SAndroid Build Coastguard Worker   union xnn_f32_default_params params;
404*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.f32.vmin.init.f32_default != NULL) {
405*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmin.init.f32_default(&params);
406*4bdc9457SAndroid Build Coastguard Worker   }
407*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
408*4bdc9457SAndroid Build Coastguard Worker     flags,
409*4bdc9457SAndroid Build Coastguard Worker     &params,
410*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
411*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F32,
412*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_maximum_nd_f32,
413*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vmax.minmax,
414*4bdc9457SAndroid Build Coastguard Worker     maximum_op_out);
415*4bdc9457SAndroid Build Coastguard Worker }
416*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_minimum_nd_f16(uint32_t flags,xnn_operator_t * minimum_op_out)417*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_minimum_nd_f16(
418*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
419*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* minimum_op_out)
420*4bdc9457SAndroid Build Coastguard Worker {
421*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
422*4bdc9457SAndroid Build Coastguard Worker     flags,
423*4bdc9457SAndroid Build Coastguard Worker     NULL,
424*4bdc9457SAndroid Build Coastguard Worker     0,
425*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F16,
426*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_minimum_nd_f16,
427*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vmin.minmax,
428*4bdc9457SAndroid Build Coastguard Worker     minimum_op_out);
429*4bdc9457SAndroid Build Coastguard Worker }
430*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_minimum_nd_f32(uint32_t flags,xnn_operator_t * minimum_op_out)431*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_minimum_nd_f32(
432*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
433*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* minimum_op_out)
434*4bdc9457SAndroid Build Coastguard Worker {
435*4bdc9457SAndroid Build Coastguard Worker   union xnn_f32_default_params params;
436*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.f32.vmin.init.f32_default != NULL) {
437*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmin.init.f32_default(&params);
438*4bdc9457SAndroid Build Coastguard Worker   }
439*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
440*4bdc9457SAndroid Build Coastguard Worker     flags,
441*4bdc9457SAndroid Build Coastguard Worker     &params,
442*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
443*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F32,
444*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_minimum_nd_f32,
445*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vmin.minmax,
446*4bdc9457SAndroid Build Coastguard Worker     minimum_op_out);
447*4bdc9457SAndroid Build Coastguard Worker }
448*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_multiply_nd_f16(float output_min,float output_max,uint32_t flags,xnn_operator_t * multiply_op_out)449*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_multiply_nd_f16(
450*4bdc9457SAndroid Build Coastguard Worker     float output_min,
451*4bdc9457SAndroid Build Coastguard Worker     float output_max,
452*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
453*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* multiply_op_out)
454*4bdc9457SAndroid Build Coastguard Worker {
455*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f16(
456*4bdc9457SAndroid Build Coastguard Worker     output_min,
457*4bdc9457SAndroid Build Coastguard Worker     output_max,
458*4bdc9457SAndroid Build Coastguard Worker     flags,
459*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_multiply_nd_f16,
460*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vmul,
461*4bdc9457SAndroid Build Coastguard Worker     multiply_op_out);
462*4bdc9457SAndroid Build Coastguard Worker }
463*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_multiply_nd_f32(float output_min,float output_max,uint32_t flags,xnn_operator_t * multiply_op_out)464*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_multiply_nd_f32(
465*4bdc9457SAndroid Build Coastguard Worker     float output_min,
466*4bdc9457SAndroid Build Coastguard Worker     float output_max,
467*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
468*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* multiply_op_out)
469*4bdc9457SAndroid Build Coastguard Worker {
470*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f32(
471*4bdc9457SAndroid Build Coastguard Worker     output_min,
472*4bdc9457SAndroid Build Coastguard Worker     output_max,
473*4bdc9457SAndroid Build Coastguard Worker     flags,
474*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_multiply_nd_f32,
475*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vmul,
476*4bdc9457SAndroid Build Coastguard Worker     multiply_op_out);
477*4bdc9457SAndroid Build Coastguard Worker }
478*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_multiply_nd_qs8(int8_t input1_zero_point,float input1_scale,int8_t input2_zero_point,float input2_scale,int8_t output_zero_point,float output_scale,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * multiply_op_out)479*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_multiply_nd_qs8(
480*4bdc9457SAndroid Build Coastguard Worker     int8_t input1_zero_point,
481*4bdc9457SAndroid Build Coastguard Worker     float input1_scale,
482*4bdc9457SAndroid Build Coastguard Worker     int8_t input2_zero_point,
483*4bdc9457SAndroid Build Coastguard Worker     float input2_scale,
484*4bdc9457SAndroid Build Coastguard Worker     int8_t output_zero_point,
485*4bdc9457SAndroid Build Coastguard Worker     float output_scale,
486*4bdc9457SAndroid Build Coastguard Worker     int8_t output_min,
487*4bdc9457SAndroid Build Coastguard Worker     int8_t output_max,
488*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
489*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* multiply_op_out)
490*4bdc9457SAndroid Build Coastguard Worker {
491*4bdc9457SAndroid Build Coastguard Worker   if (input1_scale <= 0.0f || !isnormal(input1_scale)) {
492*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
493*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 1 scale: scale must be finite and positive",
494*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qs8), input1_scale);
495*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
496*4bdc9457SAndroid Build Coastguard Worker   }
497*4bdc9457SAndroid Build Coastguard Worker 
498*4bdc9457SAndroid Build Coastguard Worker   if (input2_scale <= 0.0f || !isnormal(input2_scale)) {
499*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
500*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 2 scale: scale must be finite and positive",
501*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qs8), input2_scale);
502*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
503*4bdc9457SAndroid Build Coastguard Worker   }
504*4bdc9457SAndroid Build Coastguard Worker 
505*4bdc9457SAndroid Build Coastguard Worker   if (output_scale <= 0.0f || !isnormal(output_scale)) {
506*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
507*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g output scale: scale must be finite and positive",
508*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qs8), output_scale);
509*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
510*4bdc9457SAndroid Build Coastguard Worker   }
511*4bdc9457SAndroid Build Coastguard Worker 
512*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
513*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
514*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: lower bound must be below upper bound",
515*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qs8), output_min, output_max);
516*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
517*4bdc9457SAndroid Build Coastguard Worker   }
518*4bdc9457SAndroid Build Coastguard Worker 
519*4bdc9457SAndroid Build Coastguard Worker   const float product_scale = input1_scale * input2_scale;
520*4bdc9457SAndroid Build Coastguard Worker   const float product_output_scale = product_scale / output_scale;
521*4bdc9457SAndroid Build Coastguard Worker   if (product_output_scale < 0x1.0p-16f || product_output_scale >= 0x1.0p+8f) {
522*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
523*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g product-to-output scale ratio: scale ratio must be in [2**-16, 2**8) range",
524*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qs8), product_output_scale);
525*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
526*4bdc9457SAndroid Build Coastguard Worker   }
527*4bdc9457SAndroid Build Coastguard Worker 
528*4bdc9457SAndroid Build Coastguard Worker   struct {
529*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_mul_minmax_params qs8_mul;
530*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_mul_minmax_params qs8_rmul;
531*4bdc9457SAndroid Build Coastguard Worker   } params;
532*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.qs8.vmul.init.qs8_mul != NULL) {
533*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vmul.init.qs8_mul(
534*4bdc9457SAndroid Build Coastguard Worker       &params.qs8_mul, input1_zero_point, input2_zero_point, output_zero_point,
535*4bdc9457SAndroid Build Coastguard Worker       product_output_scale, output_min, output_max);
536*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vmul.init.qs8_mul(
537*4bdc9457SAndroid Build Coastguard Worker       &params.qs8_rmul, input2_zero_point, input1_zero_point, output_zero_point,
538*4bdc9457SAndroid Build Coastguard Worker       product_output_scale, output_min, output_max);
539*4bdc9457SAndroid Build Coastguard Worker   }
540*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
541*4bdc9457SAndroid Build Coastguard Worker     flags,
542*4bdc9457SAndroid Build Coastguard Worker     &params,
543*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
544*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_QS8,
545*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_multiply_nd_qs8,
546*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qs8.vmul.minmax,
547*4bdc9457SAndroid Build Coastguard Worker     multiply_op_out);
548*4bdc9457SAndroid Build Coastguard Worker }
549*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_multiply_nd_qu8(uint8_t input1_zero_point,float input1_scale,uint8_t input2_zero_point,float input2_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * multiply_op_out)550*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_multiply_nd_qu8(
551*4bdc9457SAndroid Build Coastguard Worker     uint8_t input1_zero_point,
552*4bdc9457SAndroid Build Coastguard Worker     float input1_scale,
553*4bdc9457SAndroid Build Coastguard Worker     uint8_t input2_zero_point,
554*4bdc9457SAndroid Build Coastguard Worker     float input2_scale,
555*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_zero_point,
556*4bdc9457SAndroid Build Coastguard Worker     float output_scale,
557*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_min,
558*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_max,
559*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
560*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* multiply_op_out)
561*4bdc9457SAndroid Build Coastguard Worker {
562*4bdc9457SAndroid Build Coastguard Worker   if (input1_scale <= 0.0f || !isnormal(input1_scale)) {
563*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
564*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 1 scale: scale must be finite and positive",
565*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qu8), input1_scale);
566*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
567*4bdc9457SAndroid Build Coastguard Worker   }
568*4bdc9457SAndroid Build Coastguard Worker 
569*4bdc9457SAndroid Build Coastguard Worker   if (input2_scale <= 0.0f || !isnormal(input2_scale)) {
570*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
571*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 2 scale: scale must be finite and positive",
572*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qu8), input2_scale);
573*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
574*4bdc9457SAndroid Build Coastguard Worker   }
575*4bdc9457SAndroid Build Coastguard Worker 
576*4bdc9457SAndroid Build Coastguard Worker   if (output_scale <= 0.0f || !isnormal(output_scale)) {
577*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
578*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g output scale: scale must be finite and positive",
579*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qu8), output_scale);
580*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
581*4bdc9457SAndroid Build Coastguard Worker   }
582*4bdc9457SAndroid Build Coastguard Worker 
583*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
584*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
585*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: lower bound must be below upper bound",
586*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qu8), output_min, output_max);
587*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
588*4bdc9457SAndroid Build Coastguard Worker   }
589*4bdc9457SAndroid Build Coastguard Worker 
590*4bdc9457SAndroid Build Coastguard Worker   const float product_scale = input1_scale * input2_scale;
591*4bdc9457SAndroid Build Coastguard Worker   const float product_output_scale = product_scale / output_scale;
592*4bdc9457SAndroid Build Coastguard Worker   if (product_output_scale < 0x1.0p-16f || product_output_scale >= 0x1.0p+8f) {
593*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
594*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g product-to-output scale ratio: scale ratio must be in [2**-16, 2**8) range",
595*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_multiply_nd_qu8), product_output_scale);
596*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
597*4bdc9457SAndroid Build Coastguard Worker   }
598*4bdc9457SAndroid Build Coastguard Worker 
599*4bdc9457SAndroid Build Coastguard Worker   struct {
600*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_mul_minmax_params qu8_mul;
601*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_mul_minmax_params qu8_rmul;
602*4bdc9457SAndroid Build Coastguard Worker   } params;
603*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.qu8.vmul.init.qu8_mul != NULL) {
604*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vmul.init.qu8_mul(
605*4bdc9457SAndroid Build Coastguard Worker       &params.qu8_mul, input1_zero_point, input2_zero_point, output_zero_point,
606*4bdc9457SAndroid Build Coastguard Worker       product_output_scale, output_min, output_max);
607*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vmul.init.qu8_mul(
608*4bdc9457SAndroid Build Coastguard Worker       &params.qu8_rmul, input2_zero_point, input1_zero_point, output_zero_point,
609*4bdc9457SAndroid Build Coastguard Worker       product_output_scale, output_min, output_max);
610*4bdc9457SAndroid Build Coastguard Worker   }
611*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
612*4bdc9457SAndroid Build Coastguard Worker     flags,
613*4bdc9457SAndroid Build Coastguard Worker     &params,
614*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
615*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_QU8,
616*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_multiply_nd_qu8,
617*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qu8.vmul.minmax,
618*4bdc9457SAndroid Build Coastguard Worker     multiply_op_out);
619*4bdc9457SAndroid Build Coastguard Worker }
620*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_squared_difference_nd_f16(uint32_t flags,xnn_operator_t * squared_difference_op_out)621*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_squared_difference_nd_f16(
622*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
623*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* squared_difference_op_out)
624*4bdc9457SAndroid Build Coastguard Worker {
625*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
626*4bdc9457SAndroid Build Coastguard Worker     flags,
627*4bdc9457SAndroid Build Coastguard Worker     NULL,
628*4bdc9457SAndroid Build Coastguard Worker     0,
629*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F16,
630*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_squared_difference_nd_f16,
631*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vsqrdiff.minmax,
632*4bdc9457SAndroid Build Coastguard Worker     squared_difference_op_out);
633*4bdc9457SAndroid Build Coastguard Worker }
634*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_squared_difference_nd_f32(uint32_t flags,xnn_operator_t * squared_difference_op_out)635*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_squared_difference_nd_f32(
636*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
637*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* squared_difference_op_out)
638*4bdc9457SAndroid Build Coastguard Worker {
639*4bdc9457SAndroid Build Coastguard Worker   union xnn_f32_default_params params;
640*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.f32.vmin.init.f32_default != NULL) {
641*4bdc9457SAndroid Build Coastguard Worker     xnn_params.f32.vmin.init.f32_default(&params);
642*4bdc9457SAndroid Build Coastguard Worker   }
643*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
644*4bdc9457SAndroid Build Coastguard Worker     flags,
645*4bdc9457SAndroid Build Coastguard Worker     &params,
646*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
647*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_F32,
648*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_squared_difference_nd_f32,
649*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vsqrdiff.minmax,
650*4bdc9457SAndroid Build Coastguard Worker     squared_difference_op_out);
651*4bdc9457SAndroid Build Coastguard Worker }
652*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_subtract_nd_f16(float output_min,float output_max,uint32_t flags,xnn_operator_t * subtract_op_out)653*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_subtract_nd_f16(
654*4bdc9457SAndroid Build Coastguard Worker     float output_min,
655*4bdc9457SAndroid Build Coastguard Worker     float output_max,
656*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
657*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* subtract_op_out)
658*4bdc9457SAndroid Build Coastguard Worker {
659*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f16(
660*4bdc9457SAndroid Build Coastguard Worker     output_min,
661*4bdc9457SAndroid Build Coastguard Worker     output_max,
662*4bdc9457SAndroid Build Coastguard Worker     flags,
663*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_subtract_nd_f16,
664*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vsub,
665*4bdc9457SAndroid Build Coastguard Worker     subtract_op_out);
666*4bdc9457SAndroid Build Coastguard Worker }
667*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_subtract_nd_f32(float output_min,float output_max,uint32_t flags,xnn_operator_t * subtract_op_out)668*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_subtract_nd_f32(
669*4bdc9457SAndroid Build Coastguard Worker     float output_min,
670*4bdc9457SAndroid Build Coastguard Worker     float output_max,
671*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
672*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* subtract_op_out)
673*4bdc9457SAndroid Build Coastguard Worker {
674*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd_f32(
675*4bdc9457SAndroid Build Coastguard Worker     output_min,
676*4bdc9457SAndroid Build Coastguard Worker     output_max,
677*4bdc9457SAndroid Build Coastguard Worker     flags,
678*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_subtract_nd_f32,
679*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vsub,
680*4bdc9457SAndroid Build Coastguard Worker     subtract_op_out);
681*4bdc9457SAndroid Build Coastguard Worker }
682*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_subtract_nd_qs8(int8_t input1_zero_point,float input1_scale,int8_t input2_zero_point,float input2_scale,int8_t output_zero_point,float output_scale,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * subtract_op_out)683*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_subtract_nd_qs8(
684*4bdc9457SAndroid Build Coastguard Worker     int8_t input1_zero_point,
685*4bdc9457SAndroid Build Coastguard Worker     float input1_scale,
686*4bdc9457SAndroid Build Coastguard Worker     int8_t input2_zero_point,
687*4bdc9457SAndroid Build Coastguard Worker     float input2_scale,
688*4bdc9457SAndroid Build Coastguard Worker     int8_t output_zero_point,
689*4bdc9457SAndroid Build Coastguard Worker     float output_scale,
690*4bdc9457SAndroid Build Coastguard Worker     int8_t output_min,
691*4bdc9457SAndroid Build Coastguard Worker     int8_t output_max,
692*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
693*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* subtract_op_out)
694*4bdc9457SAndroid Build Coastguard Worker {
695*4bdc9457SAndroid Build Coastguard Worker   if (input1_scale <= 0.0f || !isnormal(input1_scale)) {
696*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
697*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 1 scale: scale must be finite and positive",
698*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qs8), input1_scale);
699*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
700*4bdc9457SAndroid Build Coastguard Worker   }
701*4bdc9457SAndroid Build Coastguard Worker 
702*4bdc9457SAndroid Build Coastguard Worker   if (input2_scale <= 0.0f || !isnormal(input2_scale)) {
703*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
704*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 2 scale: scale must be finite and positive",
705*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qs8), input2_scale);
706*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
707*4bdc9457SAndroid Build Coastguard Worker   }
708*4bdc9457SAndroid Build Coastguard Worker 
709*4bdc9457SAndroid Build Coastguard Worker   if (output_scale <= 0.0f || !isnormal(output_scale)) {
710*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
711*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g output scale: scale must be finite and positive",
712*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qs8), output_scale);
713*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
714*4bdc9457SAndroid Build Coastguard Worker   }
715*4bdc9457SAndroid Build Coastguard Worker 
716*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
717*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
718*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: lower bound must be below upper bound",
719*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qs8), output_min, output_max);
720*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
721*4bdc9457SAndroid Build Coastguard Worker   }
722*4bdc9457SAndroid Build Coastguard Worker 
723*4bdc9457SAndroid Build Coastguard Worker   const float input1_output_scale = input1_scale / output_scale;
724*4bdc9457SAndroid Build Coastguard Worker   if (input1_output_scale < 0x1.0p-10f || input1_output_scale >= 0x1.0p+8f) {
725*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
726*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input1-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
727*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qs8), input1_output_scale);
728*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
729*4bdc9457SAndroid Build Coastguard Worker   }
730*4bdc9457SAndroid Build Coastguard Worker 
731*4bdc9457SAndroid Build Coastguard Worker   const float input2_output_scale = input2_scale / output_scale;
732*4bdc9457SAndroid Build Coastguard Worker   if (input2_output_scale < 0x1.0p-10f || input2_output_scale >= 0x1.0p+8f) {
733*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
734*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input2-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
735*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qs8), input2_output_scale);
736*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
737*4bdc9457SAndroid Build Coastguard Worker   }
738*4bdc9457SAndroid Build Coastguard Worker 
739*4bdc9457SAndroid Build Coastguard Worker   struct {
740*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_add_minmax_params qs8_add;
741*4bdc9457SAndroid Build Coastguard Worker     union xnn_qs8_add_minmax_params qs8_radd;
742*4bdc9457SAndroid Build Coastguard Worker   } params;
743*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.qs8.vadd.init.qs8_add != NULL) {
744*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd.init.qs8_add(
745*4bdc9457SAndroid Build Coastguard Worker       &params.qs8_add, input1_zero_point, input2_zero_point, output_zero_point,
746*4bdc9457SAndroid Build Coastguard Worker       input1_output_scale, -input2_output_scale, output_min, output_max);
747*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qs8.vadd.init.qs8_add(
748*4bdc9457SAndroid Build Coastguard Worker       &params.qs8_radd, input2_zero_point, input1_zero_point, output_zero_point,
749*4bdc9457SAndroid Build Coastguard Worker       -input2_output_scale, input1_output_scale, output_min, output_max);
750*4bdc9457SAndroid Build Coastguard Worker   }
751*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
752*4bdc9457SAndroid Build Coastguard Worker     flags,
753*4bdc9457SAndroid Build Coastguard Worker     &params,
754*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
755*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_QS8,
756*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_subtract_nd_qs8,
757*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qs8.vadd.minmax,
758*4bdc9457SAndroid Build Coastguard Worker     subtract_op_out);
759*4bdc9457SAndroid Build Coastguard Worker }
760*4bdc9457SAndroid Build Coastguard Worker 
xnn_create_subtract_nd_qu8(uint8_t input1_zero_point,float input1_scale,uint8_t input2_zero_point,float input2_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * subtract_op_out)761*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_create_subtract_nd_qu8(
762*4bdc9457SAndroid Build Coastguard Worker     uint8_t input1_zero_point,
763*4bdc9457SAndroid Build Coastguard Worker     float input1_scale,
764*4bdc9457SAndroid Build Coastguard Worker     uint8_t input2_zero_point,
765*4bdc9457SAndroid Build Coastguard Worker     float input2_scale,
766*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_zero_point,
767*4bdc9457SAndroid Build Coastguard Worker     float output_scale,
768*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_min,
769*4bdc9457SAndroid Build Coastguard Worker     uint8_t output_max,
770*4bdc9457SAndroid Build Coastguard Worker     uint32_t flags,
771*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t* subtract_op_out)
772*4bdc9457SAndroid Build Coastguard Worker {
773*4bdc9457SAndroid Build Coastguard Worker   if (input1_scale <= 0.0f || !isnormal(input1_scale)) {
774*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
775*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 1 scale: scale must be finite and positive",
776*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qu8), input1_scale);
777*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
778*4bdc9457SAndroid Build Coastguard Worker   }
779*4bdc9457SAndroid Build Coastguard Worker 
780*4bdc9457SAndroid Build Coastguard Worker   if (input2_scale <= 0.0f || !isnormal(input2_scale)) {
781*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
782*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input 2 scale: scale must be finite and positive",
783*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qu8), input2_scale);
784*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
785*4bdc9457SAndroid Build Coastguard Worker   }
786*4bdc9457SAndroid Build Coastguard Worker 
787*4bdc9457SAndroid Build Coastguard Worker   if (output_scale <= 0.0f || !isnormal(output_scale)) {
788*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
789*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g output scale: scale must be finite and positive",
790*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qu8), output_scale);
791*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
792*4bdc9457SAndroid Build Coastguard Worker   }
793*4bdc9457SAndroid Build Coastguard Worker 
794*4bdc9457SAndroid Build Coastguard Worker   if (output_min >= output_max) {
795*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
796*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: lower bound must be below upper bound",
797*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qu8), output_min, output_max);
798*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
799*4bdc9457SAndroid Build Coastguard Worker   }
800*4bdc9457SAndroid Build Coastguard Worker 
801*4bdc9457SAndroid Build Coastguard Worker   const float input1_output_scale = input1_scale / output_scale;
802*4bdc9457SAndroid Build Coastguard Worker   if (input1_output_scale < 0x1.0p-10f || input1_output_scale >= 0x1.0p+8f) {
803*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
804*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input1-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
805*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qu8), input1_output_scale);
806*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
807*4bdc9457SAndroid Build Coastguard Worker   }
808*4bdc9457SAndroid Build Coastguard Worker 
809*4bdc9457SAndroid Build Coastguard Worker   const float input2_output_scale = input2_scale / output_scale;
810*4bdc9457SAndroid Build Coastguard Worker   if (input2_output_scale < 0x1.0p-10f || input2_output_scale >= 0x1.0p+8f) {
811*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
812*4bdc9457SAndroid Build Coastguard Worker       "failed to create %s operator with %.7g input2-to-output scale ratio: scale ratio must be in [2**-10, 2**8) range",
813*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(xnn_operator_type_subtract_nd_qu8), input2_output_scale);
814*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
815*4bdc9457SAndroid Build Coastguard Worker   }
816*4bdc9457SAndroid Build Coastguard Worker 
817*4bdc9457SAndroid Build Coastguard Worker   struct {
818*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_add_minmax_params qu8_add;
819*4bdc9457SAndroid Build Coastguard Worker     union xnn_qu8_add_minmax_params qu8_radd;
820*4bdc9457SAndroid Build Coastguard Worker   } params;
821*4bdc9457SAndroid Build Coastguard Worker   if (xnn_params.qu8.vadd.init.qu8_add != NULL) {
822*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd.init.qu8_add(
823*4bdc9457SAndroid Build Coastguard Worker       &params.qu8_add, input1_zero_point, input2_zero_point, output_zero_point,
824*4bdc9457SAndroid Build Coastguard Worker       input1_output_scale, -input2_output_scale, output_min, output_max);
825*4bdc9457SAndroid Build Coastguard Worker     xnn_params.qu8.vadd.init.qu8_add(
826*4bdc9457SAndroid Build Coastguard Worker       &params.qu8_radd, input2_zero_point, input1_zero_point, output_zero_point,
827*4bdc9457SAndroid Build Coastguard Worker       -input2_output_scale, input1_output_scale, output_min, output_max);
828*4bdc9457SAndroid Build Coastguard Worker   }
829*4bdc9457SAndroid Build Coastguard Worker   return create_binary_elementwise_nd(
830*4bdc9457SAndroid Build Coastguard Worker     flags,
831*4bdc9457SAndroid Build Coastguard Worker     &params,
832*4bdc9457SAndroid Build Coastguard Worker     sizeof(params),
833*4bdc9457SAndroid Build Coastguard Worker     XNN_INIT_FLAG_QU8,
834*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_type_subtract_nd_qu8,
835*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qu8.vadd.minmax,
836*4bdc9457SAndroid Build Coastguard Worker     subtract_op_out);
837*4bdc9457SAndroid Build Coastguard Worker }
838*4bdc9457SAndroid Build Coastguard Worker 
setup_binary_elementwise_nd(xnn_operator_t binary_elementwise_op,enum xnn_operator_type expected_operator_type,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,uint32_t log2_element_size,const void * params,size_t params_size,const void * reversed_params,size_t reversed_params_size,const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)839*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status setup_binary_elementwise_nd(
840*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t binary_elementwise_op,
841*4bdc9457SAndroid Build Coastguard Worker     enum xnn_operator_type expected_operator_type,
842*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
843*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
844*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
845*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
846*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
847*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
848*4bdc9457SAndroid Build Coastguard Worker     void* output,
849*4bdc9457SAndroid Build Coastguard Worker     uint32_t log2_element_size,
850*4bdc9457SAndroid Build Coastguard Worker     const void* params,
851*4bdc9457SAndroid Build Coastguard Worker     size_t params_size,
852*4bdc9457SAndroid Build Coastguard Worker     const void* reversed_params,
853*4bdc9457SAndroid Build Coastguard Worker     size_t reversed_params_size,
854*4bdc9457SAndroid Build Coastguard Worker     const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS(1)],
855*4bdc9457SAndroid Build Coastguard Worker     size_t num_threads)
856*4bdc9457SAndroid Build Coastguard Worker {
857*4bdc9457SAndroid Build Coastguard Worker   if (binary_elementwise_op->type != expected_operator_type) {
858*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
859*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(expected_operator_type),
860*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(binary_elementwise_op->type));
861*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_invalid_parameter;
862*4bdc9457SAndroid Build Coastguard Worker   }
863*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->state = xnn_run_state_invalid;
864*4bdc9457SAndroid Build Coastguard Worker 
865*4bdc9457SAndroid Build Coastguard Worker   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
866*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
867*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(binary_elementwise_op->type));
868*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_uninitialized;
869*4bdc9457SAndroid Build Coastguard Worker   }
870*4bdc9457SAndroid Build Coastguard Worker 
871*4bdc9457SAndroid Build Coastguard Worker   if (max(num_input1_dims, num_input2_dims) > XNN_MAX_TENSOR_DIMS) {
872*4bdc9457SAndroid Build Coastguard Worker     xnn_log_error(
873*4bdc9457SAndroid Build Coastguard Worker       "failed to setup %s operator with %zu and %zu dimensions in input shapes: "
874*4bdc9457SAndroid Build Coastguard Worker       "the number of input dimensions must not exceed %d",
875*4bdc9457SAndroid Build Coastguard Worker       xnn_operator_type_to_string(binary_elementwise_op->type), num_input1_dims, num_input2_dims, XNN_MAX_TENSOR_DIMS);
876*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_unsupported_parameter;
877*4bdc9457SAndroid Build Coastguard Worker   }
878*4bdc9457SAndroid Build Coastguard Worker 
879*4bdc9457SAndroid Build Coastguard Worker   size_t num_compressed_dims = 0;
880*4bdc9457SAndroid Build Coastguard Worker   size_t compressed_input1_shape[XNN_MAX_TENSOR_DIMS];
881*4bdc9457SAndroid Build Coastguard Worker   size_t compressed_input2_shape[XNN_MAX_TENSOR_DIMS];
882*4bdc9457SAndroid Build Coastguard Worker   size_t compressed_output_shape[XNN_MAX_TENSOR_DIMS];
883*4bdc9457SAndroid Build Coastguard Worker   for (size_t i = 0; i < XNN_MAX_TENSOR_DIMS; i++) {
884*4bdc9457SAndroid Build Coastguard Worker     compressed_input1_shape[i] = 1;
885*4bdc9457SAndroid Build Coastguard Worker     compressed_input2_shape[i] = 1;
886*4bdc9457SAndroid Build Coastguard Worker     compressed_output_shape[i] = 1;
887*4bdc9457SAndroid Build Coastguard Worker   }
888*4bdc9457SAndroid Build Coastguard Worker   bool broadcast_input1 = false;
889*4bdc9457SAndroid Build Coastguard Worker   bool broadcast_input2 = false;
890*4bdc9457SAndroid Build Coastguard Worker   bool first_nonunit = true;
891*4bdc9457SAndroid Build Coastguard Worker   bool degenerate_shape = false;
892*4bdc9457SAndroid Build Coastguard Worker   const size_t num_common_dims = min(num_input1_dims, num_input2_dims);
893*4bdc9457SAndroid Build Coastguard Worker   for (size_t i = 1; i <= num_common_dims; i++) {
894*4bdc9457SAndroid Build Coastguard Worker     const size_t input1_dim = input1_shape[num_input1_dims - i];
895*4bdc9457SAndroid Build Coastguard Worker     const size_t input2_dim = input2_shape[num_input2_dims - i];
896*4bdc9457SAndroid Build Coastguard Worker     degenerate_shape |= input1_dim == 0;
897*4bdc9457SAndroid Build Coastguard Worker     degenerate_shape |= input2_dim == 0;
898*4bdc9457SAndroid Build Coastguard Worker     if (input1_dim == 1 && input2_dim == 1) {
899*4bdc9457SAndroid Build Coastguard Worker       continue;
900*4bdc9457SAndroid Build Coastguard Worker     }
901*4bdc9457SAndroid Build Coastguard Worker     assert(!broadcast_input1 || !broadcast_input2);
902*4bdc9457SAndroid Build Coastguard Worker 
903*4bdc9457SAndroid Build Coastguard Worker     if (input1_dim == 1) {
904*4bdc9457SAndroid Build Coastguard Worker       if (!broadcast_input1) {
905*4bdc9457SAndroid Build Coastguard Worker         broadcast_input1 = true;
906*4bdc9457SAndroid Build Coastguard Worker         broadcast_input2 = false;
907*4bdc9457SAndroid Build Coastguard Worker         num_compressed_dims++;
908*4bdc9457SAndroid Build Coastguard Worker       }
909*4bdc9457SAndroid Build Coastguard Worker       compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
910*4bdc9457SAndroid Build Coastguard Worker       compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
911*4bdc9457SAndroid Build Coastguard Worker     } else if (input2_dim == 1) {
912*4bdc9457SAndroid Build Coastguard Worker       if (!broadcast_input2) {
913*4bdc9457SAndroid Build Coastguard Worker         broadcast_input1 = false;
914*4bdc9457SAndroid Build Coastguard Worker         broadcast_input2 = true;
915*4bdc9457SAndroid Build Coastguard Worker         num_compressed_dims++;
916*4bdc9457SAndroid Build Coastguard Worker       }
917*4bdc9457SAndroid Build Coastguard Worker       compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
918*4bdc9457SAndroid Build Coastguard Worker       compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
919*4bdc9457SAndroid Build Coastguard Worker     } else if (input1_dim == input2_dim) {
920*4bdc9457SAndroid Build Coastguard Worker       if (broadcast_input1 || broadcast_input2 || first_nonunit) {
921*4bdc9457SAndroid Build Coastguard Worker         broadcast_input1 = false;
922*4bdc9457SAndroid Build Coastguard Worker         broadcast_input2 = false;
923*4bdc9457SAndroid Build Coastguard Worker         num_compressed_dims++;
924*4bdc9457SAndroid Build Coastguard Worker       }
925*4bdc9457SAndroid Build Coastguard Worker       compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
926*4bdc9457SAndroid Build Coastguard Worker       compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
927*4bdc9457SAndroid Build Coastguard Worker       compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
928*4bdc9457SAndroid Build Coastguard Worker     } else {
929*4bdc9457SAndroid Build Coastguard Worker       xnn_log_error(
930*4bdc9457SAndroid Build Coastguard Worker         "failed to setup %s operator: "
931*4bdc9457SAndroid Build Coastguard Worker         "shape dimension #%zu of input1 (%zu) does not match shape dimension #%zu of input2 (%zu)",
932*4bdc9457SAndroid Build Coastguard Worker         xnn_operator_type_to_string(binary_elementwise_op->type),
933*4bdc9457SAndroid Build Coastguard Worker         num_input1_dims - i, input1_dim, num_input2_dims - i, input2_dim);
934*4bdc9457SAndroid Build Coastguard Worker       return xnn_status_invalid_parameter;
935*4bdc9457SAndroid Build Coastguard Worker     }
936*4bdc9457SAndroid Build Coastguard Worker     first_nonunit = false;
937*4bdc9457SAndroid Build Coastguard Worker   }
938*4bdc9457SAndroid Build Coastguard Worker   if (num_input1_dims > num_input2_dims) {
939*4bdc9457SAndroid Build Coastguard Worker     if (!broadcast_input2) {
940*4bdc9457SAndroid Build Coastguard Worker       num_compressed_dims++;
941*4bdc9457SAndroid Build Coastguard Worker     }
942*4bdc9457SAndroid Build Coastguard Worker     for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++) {
943*4bdc9457SAndroid Build Coastguard Worker       const size_t input1_dim = input1_shape[i];
944*4bdc9457SAndroid Build Coastguard Worker       degenerate_shape |= input1_dim == 0;
945*4bdc9457SAndroid Build Coastguard Worker       compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
946*4bdc9457SAndroid Build Coastguard Worker       compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
947*4bdc9457SAndroid Build Coastguard Worker     }
948*4bdc9457SAndroid Build Coastguard Worker   } else if (num_input2_dims > num_input1_dims) {
949*4bdc9457SAndroid Build Coastguard Worker     if (!broadcast_input1) {
950*4bdc9457SAndroid Build Coastguard Worker       num_compressed_dims++;
951*4bdc9457SAndroid Build Coastguard Worker     }
952*4bdc9457SAndroid Build Coastguard Worker     for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++) {
953*4bdc9457SAndroid Build Coastguard Worker       const size_t input2_dim = input2_shape[i];
954*4bdc9457SAndroid Build Coastguard Worker       degenerate_shape |= input2_dim == 0;
955*4bdc9457SAndroid Build Coastguard Worker       compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
956*4bdc9457SAndroid Build Coastguard Worker       compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
957*4bdc9457SAndroid Build Coastguard Worker     }
958*4bdc9457SAndroid Build Coastguard Worker   }
959*4bdc9457SAndroid Build Coastguard Worker   num_compressed_dims = max(num_compressed_dims, 1);
960*4bdc9457SAndroid Build Coastguard Worker 
961*4bdc9457SAndroid Build Coastguard Worker   // Early exit without setting up context if any shape dimension is zero.
962*4bdc9457SAndroid Build Coastguard Worker   if (degenerate_shape) {
963*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->state = xnn_run_state_skip;
964*4bdc9457SAndroid Build Coastguard Worker     return xnn_status_success;
965*4bdc9457SAndroid Build Coastguard Worker   }
966*4bdc9457SAndroid Build Coastguard Worker 
967*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->context.elementwise_binary = (struct elementwise_binary_context) {
968*4bdc9457SAndroid Build Coastguard Worker     .a = input1,
969*4bdc9457SAndroid Build Coastguard Worker     .b = input2,
970*4bdc9457SAndroid Build Coastguard Worker     .y = output,
971*4bdc9457SAndroid Build Coastguard Worker     .elements = compressed_output_shape[0] << log2_element_size,
972*4bdc9457SAndroid Build Coastguard Worker   };
973*4bdc9457SAndroid Build Coastguard Worker   if (params_size != 0) {
974*4bdc9457SAndroid Build Coastguard Worker     memcpy(&binary_elementwise_op->context.elementwise_binary.params, params, params_size);
975*4bdc9457SAndroid Build Coastguard Worker   }
976*4bdc9457SAndroid Build Coastguard Worker 
977*4bdc9457SAndroid Build Coastguard Worker   const size_t* compressed_a_shape = compressed_input1_shape;
978*4bdc9457SAndroid Build Coastguard Worker   const size_t* compressed_b_shape = compressed_input2_shape;
979*4bdc9457SAndroid Build Coastguard Worker   if (compressed_input1_shape[0] == 1) {
980*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->context.elementwise_binary.ukernel = binary_elementwise_op->ukernel.vbinary.ropc_function;
981*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->context.elementwise_binary.a = input2;
982*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->context.elementwise_binary.b = input1;
983*4bdc9457SAndroid Build Coastguard Worker     compressed_a_shape = compressed_input2_shape;
984*4bdc9457SAndroid Build Coastguard Worker     compressed_b_shape = compressed_input1_shape;
985*4bdc9457SAndroid Build Coastguard Worker     if (reversed_params_size != 0) {
986*4bdc9457SAndroid Build Coastguard Worker       memcpy(&binary_elementwise_op->context.elementwise_binary.params, reversed_params, reversed_params_size);
987*4bdc9457SAndroid Build Coastguard Worker     }
988*4bdc9457SAndroid Build Coastguard Worker   } else if (compressed_input2_shape[0] == 1) {
989*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->context.elementwise_binary.ukernel = binary_elementwise_op->ukernel.vbinary.opc_function;
990*4bdc9457SAndroid Build Coastguard Worker   } else if (compressed_input1_shape[0] == compressed_input2_shape[0]) {
991*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->context.elementwise_binary.ukernel = binary_elementwise_op->ukernel.vbinary.op_function;
992*4bdc9457SAndroid Build Coastguard Worker   }
993*4bdc9457SAndroid Build Coastguard Worker   size_t a_stride = compressed_a_shape[0], b_stride = compressed_b_shape[0], y_stride = compressed_output_shape[0];
994*4bdc9457SAndroid Build Coastguard Worker   for (size_t i = 1; i < num_compressed_dims; i++) {
995*4bdc9457SAndroid Build Coastguard Worker     if (compressed_a_shape[i] != 1) {
996*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->context.elementwise_binary.a_stride[XNN_MAX_TENSOR_DIMS - 1 - i] = a_stride << log2_element_size;
997*4bdc9457SAndroid Build Coastguard Worker     }
998*4bdc9457SAndroid Build Coastguard Worker     if (compressed_b_shape[i] != 1) {
999*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->context.elementwise_binary.b_stride[XNN_MAX_TENSOR_DIMS - 1 - i] = b_stride << log2_element_size;
1000*4bdc9457SAndroid Build Coastguard Worker     }
1001*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->context.elementwise_binary.y_stride[XNN_MAX_TENSOR_DIMS - 1 - i] = y_stride << log2_element_size;
1002*4bdc9457SAndroid Build Coastguard Worker     a_stride *= compressed_a_shape[i];
1003*4bdc9457SAndroid Build Coastguard Worker     b_stride *= compressed_b_shape[i];
1004*4bdc9457SAndroid Build Coastguard Worker     y_stride *= compressed_output_shape[i];
1005*4bdc9457SAndroid Build Coastguard Worker   }
1006*4bdc9457SAndroid Build Coastguard Worker 
1007*4bdc9457SAndroid Build Coastguard Worker   if (compressed_output_shape[5] == 1) {
1008*4bdc9457SAndroid Build Coastguard Worker     if (compressed_output_shape[4] == 1) {
1009*4bdc9457SAndroid Build Coastguard Worker       if (compressed_output_shape[3] == 1) {
1010*4bdc9457SAndroid Build Coastguard Worker         if (compressed_output_shape[2] == 1) {
1011*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.type = xnn_parallelization_type_1d;
1012*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_elementwise_binary_1d;
1013*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.range[0] = compressed_output_shape[1];
1014*4bdc9457SAndroid Build Coastguard Worker         } else {
1015*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.type = xnn_parallelization_type_2d;
1016*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_elementwise_binary_2d;
1017*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.range[0] = compressed_output_shape[2];
1018*4bdc9457SAndroid Build Coastguard Worker           binary_elementwise_op->compute.range[1] = compressed_output_shape[1];
1019*4bdc9457SAndroid Build Coastguard Worker         }
1020*4bdc9457SAndroid Build Coastguard Worker       } else {
1021*4bdc9457SAndroid Build Coastguard Worker         binary_elementwise_op->compute.type = xnn_parallelization_type_3d;
1022*4bdc9457SAndroid Build Coastguard Worker         binary_elementwise_op->compute.task_3d = (pthreadpool_task_3d_t) xnn_compute_elementwise_binary_3d;
1023*4bdc9457SAndroid Build Coastguard Worker         binary_elementwise_op->compute.range[0] = compressed_output_shape[3];
1024*4bdc9457SAndroid Build Coastguard Worker         binary_elementwise_op->compute.range[1] = compressed_output_shape[2];
1025*4bdc9457SAndroid Build Coastguard Worker         binary_elementwise_op->compute.range[2] = compressed_output_shape[1];
1026*4bdc9457SAndroid Build Coastguard Worker       }
1027*4bdc9457SAndroid Build Coastguard Worker     } else {
1028*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->compute.type = xnn_parallelization_type_4d;
1029*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->compute.task_4d = (pthreadpool_task_4d_t) xnn_compute_elementwise_binary_4d;
1030*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->compute.range[0] = compressed_output_shape[4];
1031*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->compute.range[1] = compressed_output_shape[3];
1032*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->compute.range[2] = compressed_output_shape[2];
1033*4bdc9457SAndroid Build Coastguard Worker       binary_elementwise_op->compute.range[3] = compressed_output_shape[1];
1034*4bdc9457SAndroid Build Coastguard Worker     }
1035*4bdc9457SAndroid Build Coastguard Worker   } else {
1036*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.type = xnn_parallelization_type_5d;
1037*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.task_5d = (pthreadpool_task_5d_t) xnn_compute_elementwise_binary_5d;
1038*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.range[0] = compressed_output_shape[5];
1039*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.range[1] = compressed_output_shape[4];
1040*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.range[2] = compressed_output_shape[3];
1041*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.range[3] = compressed_output_shape[2];
1042*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op->compute.range[4] = compressed_output_shape[1];
1043*4bdc9457SAndroid Build Coastguard Worker   }
1044*4bdc9457SAndroid Build Coastguard Worker   binary_elementwise_op->state = xnn_run_state_ready;
1045*4bdc9457SAndroid Build Coastguard Worker 
1046*4bdc9457SAndroid Build Coastguard Worker   return xnn_status_success;
1047*4bdc9457SAndroid Build Coastguard Worker }
1048*4bdc9457SAndroid Build Coastguard Worker 
setup_binary_elementwise_nd_f16(xnn_operator_t binary_elementwise_op,enum xnn_operator_type expected_operator_type,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)1049*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status setup_binary_elementwise_nd_f16(
1050*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t binary_elementwise_op,
1051*4bdc9457SAndroid Build Coastguard Worker     enum xnn_operator_type expected_operator_type,
1052*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1053*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1054*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1055*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1056*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1057*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1058*4bdc9457SAndroid Build Coastguard Worker     void* output,
1059*4bdc9457SAndroid Build Coastguard Worker     const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS(1)],
1060*4bdc9457SAndroid Build Coastguard Worker     size_t num_threads)
1061*4bdc9457SAndroid Build Coastguard Worker {
1062*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1063*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op,
1064*4bdc9457SAndroid Build Coastguard Worker     expected_operator_type,
1065*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims,
1066*4bdc9457SAndroid Build Coastguard Worker     input1_shape,
1067*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims,
1068*4bdc9457SAndroid Build Coastguard Worker     input2_shape,
1069*4bdc9457SAndroid Build Coastguard Worker     input1,
1070*4bdc9457SAndroid Build Coastguard Worker     input2,
1071*4bdc9457SAndroid Build Coastguard Worker     output,
1072*4bdc9457SAndroid Build Coastguard Worker     1 /* log2(sizeof(half)) */,
1073*4bdc9457SAndroid Build Coastguard Worker     &binary_elementwise_op->params.f16_minmax, sizeof(binary_elementwise_op->params.f16_minmax),
1074*4bdc9457SAndroid Build Coastguard Worker     &binary_elementwise_op->params.f16_minmax, sizeof(binary_elementwise_op->params.f16_minmax),
1075*4bdc9457SAndroid Build Coastguard Worker     vbinary,
1076*4bdc9457SAndroid Build Coastguard Worker     num_threads);
1077*4bdc9457SAndroid Build Coastguard Worker }
1078*4bdc9457SAndroid Build Coastguard Worker 
setup_binary_elementwise_nd_f32(xnn_operator_t binary_elementwise_op,enum xnn_operator_type expected_operator_type,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)1079*4bdc9457SAndroid Build Coastguard Worker static enum xnn_status setup_binary_elementwise_nd_f32(
1080*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t binary_elementwise_op,
1081*4bdc9457SAndroid Build Coastguard Worker     enum xnn_operator_type expected_operator_type,
1082*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1083*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1084*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1085*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1086*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1087*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1088*4bdc9457SAndroid Build Coastguard Worker     float* output,
1089*4bdc9457SAndroid Build Coastguard Worker     const struct vbinary_parameters vbinary[restrict XNN_MIN_ELEMENTS(1)],
1090*4bdc9457SAndroid Build Coastguard Worker     size_t num_threads)
1091*4bdc9457SAndroid Build Coastguard Worker {
1092*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1093*4bdc9457SAndroid Build Coastguard Worker     binary_elementwise_op, expected_operator_type,
1094*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1095*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1096*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1097*4bdc9457SAndroid Build Coastguard Worker     2 /* log2(sizeof(float)) */,
1098*4bdc9457SAndroid Build Coastguard Worker     &binary_elementwise_op->params.f32_minmax, sizeof(binary_elementwise_op->params.f32_minmax),
1099*4bdc9457SAndroid Build Coastguard Worker     &binary_elementwise_op->params.f32_minmax, sizeof(binary_elementwise_op->params.f32_minmax),
1100*4bdc9457SAndroid Build Coastguard Worker     vbinary,
1101*4bdc9457SAndroid Build Coastguard Worker     num_threads);
1102*4bdc9457SAndroid Build Coastguard Worker }
1103*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_add_nd_f16(xnn_operator_t add_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1104*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_add_nd_f16(
1105*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t add_op,
1106*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1107*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1108*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1109*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1110*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1111*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1112*4bdc9457SAndroid Build Coastguard Worker     void* output,
1113*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1114*4bdc9457SAndroid Build Coastguard Worker {
1115*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1116*4bdc9457SAndroid Build Coastguard Worker     add_op, xnn_operator_type_add_nd_f16,
1117*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1118*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1119*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1120*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vadd,
1121*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1122*4bdc9457SAndroid Build Coastguard Worker }
1123*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_add_nd_f32(xnn_operator_t add_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1124*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_add_nd_f32(
1125*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t add_op,
1126*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1127*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1128*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1129*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1130*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1131*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1132*4bdc9457SAndroid Build Coastguard Worker     float* output,
1133*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1134*4bdc9457SAndroid Build Coastguard Worker {
1135*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1136*4bdc9457SAndroid Build Coastguard Worker     add_op, xnn_operator_type_add_nd_f32,
1137*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1138*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1139*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1140*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vadd,
1141*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1142*4bdc9457SAndroid Build Coastguard Worker }
1143*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_add_nd_qs8(xnn_operator_t add_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const int8_t * input1,const int8_t * input2,int8_t * output,pthreadpool_t threadpool)1144*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_add_nd_qs8(
1145*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t add_op,
1146*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1147*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1148*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1149*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1150*4bdc9457SAndroid Build Coastguard Worker     const int8_t* input1,
1151*4bdc9457SAndroid Build Coastguard Worker     const int8_t* input2,
1152*4bdc9457SAndroid Build Coastguard Worker     int8_t* output,
1153*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1154*4bdc9457SAndroid Build Coastguard Worker {
1155*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1156*4bdc9457SAndroid Build Coastguard Worker     add_op, xnn_operator_type_add_nd_qs8,
1157*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1158*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1159*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1160*4bdc9457SAndroid Build Coastguard Worker     0 /* log2(sizeof(int8_t))) */,
1161*4bdc9457SAndroid Build Coastguard Worker     &add_op->params.qs8_add, sizeof(add_op->params.qs8_add),
1162*4bdc9457SAndroid Build Coastguard Worker     &add_op->params.qs8_radd, sizeof(add_op->params.qs8_radd),
1163*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qs8.vadd,
1164*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1165*4bdc9457SAndroid Build Coastguard Worker }
1166*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_add_nd_qu8(xnn_operator_t add_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const uint8_t * input1,const uint8_t * input2,uint8_t * output,pthreadpool_t threadpool)1167*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_add_nd_qu8(
1168*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t add_op,
1169*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1170*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1171*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1172*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1173*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input1,
1174*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input2,
1175*4bdc9457SAndroid Build Coastguard Worker     uint8_t* output,
1176*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1177*4bdc9457SAndroid Build Coastguard Worker {
1178*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1179*4bdc9457SAndroid Build Coastguard Worker     add_op, xnn_operator_type_add_nd_qu8,
1180*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1181*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1182*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1183*4bdc9457SAndroid Build Coastguard Worker     0 /* log2(sizeof(uint8_t))) */,
1184*4bdc9457SAndroid Build Coastguard Worker     &add_op->params.qu8_add, sizeof(add_op->params.qu8_add),
1185*4bdc9457SAndroid Build Coastguard Worker     &add_op->params.qu8_radd, sizeof(add_op->params.qu8_radd),
1186*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qu8.vadd,
1187*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1188*4bdc9457SAndroid Build Coastguard Worker }
1189*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_divide_nd_f16(xnn_operator_t divide_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1190*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_divide_nd_f16(
1191*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t divide_op,
1192*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1193*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1194*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1195*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1196*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1197*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1198*4bdc9457SAndroid Build Coastguard Worker     void* output,
1199*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1200*4bdc9457SAndroid Build Coastguard Worker {
1201*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1202*4bdc9457SAndroid Build Coastguard Worker     divide_op, xnn_operator_type_divide_nd_f16,
1203*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1204*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1205*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1206*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vdiv,
1207*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1208*4bdc9457SAndroid Build Coastguard Worker }
1209*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_divide_nd_f32(xnn_operator_t divide_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1210*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_divide_nd_f32(
1211*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t divide_op,
1212*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1213*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1214*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1215*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1216*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1217*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1218*4bdc9457SAndroid Build Coastguard Worker     float* output,
1219*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1220*4bdc9457SAndroid Build Coastguard Worker {
1221*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1222*4bdc9457SAndroid Build Coastguard Worker     divide_op, xnn_operator_type_divide_nd_f32,
1223*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1224*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1225*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1226*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vdiv,
1227*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1228*4bdc9457SAndroid Build Coastguard Worker }
1229*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_maximum_nd_f16(xnn_operator_t maximum_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1230*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_maximum_nd_f16(
1231*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t maximum_op,
1232*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1233*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1234*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1235*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1236*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1237*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1238*4bdc9457SAndroid Build Coastguard Worker     void* output,
1239*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1240*4bdc9457SAndroid Build Coastguard Worker {
1241*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1242*4bdc9457SAndroid Build Coastguard Worker     maximum_op, xnn_operator_type_maximum_nd_f16,
1243*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1244*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1245*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1246*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vmax,
1247*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1248*4bdc9457SAndroid Build Coastguard Worker }
1249*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_maximum_nd_f32(xnn_operator_t maximum_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1250*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_maximum_nd_f32(
1251*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t maximum_op,
1252*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1253*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1254*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1255*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1256*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1257*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1258*4bdc9457SAndroid Build Coastguard Worker     float* output,
1259*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1260*4bdc9457SAndroid Build Coastguard Worker {
1261*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1262*4bdc9457SAndroid Build Coastguard Worker     maximum_op, xnn_operator_type_maximum_nd_f32,
1263*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1264*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1265*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1266*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vmax,
1267*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1268*4bdc9457SAndroid Build Coastguard Worker }
1269*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_minimum_nd_f16(xnn_operator_t minimum_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1270*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_minimum_nd_f16(
1271*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t minimum_op,
1272*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1273*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1274*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1275*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1276*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1277*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1278*4bdc9457SAndroid Build Coastguard Worker     void* output,
1279*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1280*4bdc9457SAndroid Build Coastguard Worker {
1281*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1282*4bdc9457SAndroid Build Coastguard Worker     minimum_op, xnn_operator_type_minimum_nd_f16,
1283*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1284*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1285*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1286*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vmin,
1287*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1288*4bdc9457SAndroid Build Coastguard Worker }
1289*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_minimum_nd_f32(xnn_operator_t minimum_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1290*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_minimum_nd_f32(
1291*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t minimum_op,
1292*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1293*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1294*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1295*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1296*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1297*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1298*4bdc9457SAndroid Build Coastguard Worker     float* output,
1299*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1300*4bdc9457SAndroid Build Coastguard Worker {
1301*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1302*4bdc9457SAndroid Build Coastguard Worker     minimum_op, xnn_operator_type_minimum_nd_f32,
1303*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1304*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1305*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1306*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vmin,
1307*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1308*4bdc9457SAndroid Build Coastguard Worker }
1309*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_multiply_nd_f16(xnn_operator_t multiply_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1310*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_multiply_nd_f16(
1311*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t multiply_op,
1312*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1313*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1314*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1315*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1316*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1317*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1318*4bdc9457SAndroid Build Coastguard Worker     void* output,
1319*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1320*4bdc9457SAndroid Build Coastguard Worker {
1321*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1322*4bdc9457SAndroid Build Coastguard Worker     multiply_op, xnn_operator_type_multiply_nd_f16,
1323*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1324*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1325*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1326*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vmul,
1327*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1328*4bdc9457SAndroid Build Coastguard Worker }
1329*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_multiply_nd_f32(xnn_operator_t multiply_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1330*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_multiply_nd_f32(
1331*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t multiply_op,
1332*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1333*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1334*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1335*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1336*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1337*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1338*4bdc9457SAndroid Build Coastguard Worker     float* output,
1339*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1340*4bdc9457SAndroid Build Coastguard Worker {
1341*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1342*4bdc9457SAndroid Build Coastguard Worker     multiply_op, xnn_operator_type_multiply_nd_f32,
1343*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1344*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1345*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1346*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vmul,
1347*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1348*4bdc9457SAndroid Build Coastguard Worker }
1349*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_multiply_nd_qs8(xnn_operator_t multiply_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const int8_t * input1,const int8_t * input2,int8_t * output,pthreadpool_t threadpool)1350*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_multiply_nd_qs8(
1351*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t multiply_op,
1352*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1353*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1354*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1355*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1356*4bdc9457SAndroid Build Coastguard Worker     const int8_t* input1,
1357*4bdc9457SAndroid Build Coastguard Worker     const int8_t* input2,
1358*4bdc9457SAndroid Build Coastguard Worker     int8_t* output,
1359*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1360*4bdc9457SAndroid Build Coastguard Worker {
1361*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1362*4bdc9457SAndroid Build Coastguard Worker     multiply_op, xnn_operator_type_multiply_nd_qs8,
1363*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1364*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1365*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1366*4bdc9457SAndroid Build Coastguard Worker     0 /* log2(sizeof(int8_t))) */,
1367*4bdc9457SAndroid Build Coastguard Worker     &multiply_op->params.qs8_mul, sizeof(multiply_op->params.qs8_mul),
1368*4bdc9457SAndroid Build Coastguard Worker     &multiply_op->params.qs8_rmul, sizeof(multiply_op->params.qs8_rmul),
1369*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qs8.vmul,
1370*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1371*4bdc9457SAndroid Build Coastguard Worker }
1372*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_multiply_nd_qu8(xnn_operator_t multiply_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const uint8_t * input1,const uint8_t * input2,uint8_t * output,pthreadpool_t threadpool)1373*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_multiply_nd_qu8(
1374*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t multiply_op,
1375*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1376*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1377*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1378*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1379*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input1,
1380*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input2,
1381*4bdc9457SAndroid Build Coastguard Worker     uint8_t* output,
1382*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1383*4bdc9457SAndroid Build Coastguard Worker {
1384*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1385*4bdc9457SAndroid Build Coastguard Worker     multiply_op, xnn_operator_type_multiply_nd_qu8,
1386*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1387*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1388*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1389*4bdc9457SAndroid Build Coastguard Worker     0 /* log2(sizeof(uint8_t))) */,
1390*4bdc9457SAndroid Build Coastguard Worker     &multiply_op->params.qu8_mul, sizeof(multiply_op->params.qu8_mul),
1391*4bdc9457SAndroid Build Coastguard Worker     &multiply_op->params.qu8_rmul, sizeof(multiply_op->params.qu8_rmul),
1392*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qu8.vmul,
1393*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1394*4bdc9457SAndroid Build Coastguard Worker }
1395*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_squared_difference_nd_f16(xnn_operator_t squared_difference_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1396*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_squared_difference_nd_f16(
1397*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t squared_difference_op,
1398*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1399*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1400*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1401*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1402*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1403*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1404*4bdc9457SAndroid Build Coastguard Worker     void* output,
1405*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1406*4bdc9457SAndroid Build Coastguard Worker {
1407*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1408*4bdc9457SAndroid Build Coastguard Worker     squared_difference_op, xnn_operator_type_squared_difference_nd_f16,
1409*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1410*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1411*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1412*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vsqrdiff,
1413*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1414*4bdc9457SAndroid Build Coastguard Worker }
1415*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_squared_difference_nd_f32(xnn_operator_t squared_difference_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1416*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_squared_difference_nd_f32(
1417*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t squared_difference_op,
1418*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1419*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1420*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1421*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1422*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1423*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1424*4bdc9457SAndroid Build Coastguard Worker     float* output,
1425*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1426*4bdc9457SAndroid Build Coastguard Worker {
1427*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1428*4bdc9457SAndroid Build Coastguard Worker     squared_difference_op, xnn_operator_type_squared_difference_nd_f32,
1429*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1430*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1431*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1432*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vsqrdiff,
1433*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1434*4bdc9457SAndroid Build Coastguard Worker }
1435*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_subtract_nd_f16(xnn_operator_t subtract_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const void * input1,const void * input2,void * output,pthreadpool_t threadpool)1436*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_subtract_nd_f16(
1437*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t subtract_op,
1438*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1439*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1440*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1441*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1442*4bdc9457SAndroid Build Coastguard Worker     const void* input1,
1443*4bdc9457SAndroid Build Coastguard Worker     const void* input2,
1444*4bdc9457SAndroid Build Coastguard Worker     void* output,
1445*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1446*4bdc9457SAndroid Build Coastguard Worker {
1447*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f16(
1448*4bdc9457SAndroid Build Coastguard Worker     subtract_op, xnn_operator_type_subtract_nd_f16,
1449*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1450*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1451*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1452*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f16.vsub,
1453*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1454*4bdc9457SAndroid Build Coastguard Worker }
1455*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_subtract_nd_f32(xnn_operator_t subtract_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const float * input1,const float * input2,float * output,pthreadpool_t threadpool)1456*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_subtract_nd_f32(
1457*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t subtract_op,
1458*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1459*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1460*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1461*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1462*4bdc9457SAndroid Build Coastguard Worker     const float* input1,
1463*4bdc9457SAndroid Build Coastguard Worker     const float* input2,
1464*4bdc9457SAndroid Build Coastguard Worker     float* output,
1465*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1466*4bdc9457SAndroid Build Coastguard Worker {
1467*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd_f32(
1468*4bdc9457SAndroid Build Coastguard Worker     subtract_op, xnn_operator_type_subtract_nd_f32,
1469*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1470*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1471*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1472*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.f32.vsub,
1473*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1474*4bdc9457SAndroid Build Coastguard Worker }
1475*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_subtract_nd_qs8(xnn_operator_t subtract_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const int8_t * input1,const int8_t * input2,int8_t * output,pthreadpool_t threadpool)1476*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_subtract_nd_qs8(
1477*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t subtract_op,
1478*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1479*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1480*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1481*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1482*4bdc9457SAndroid Build Coastguard Worker     const int8_t* input1,
1483*4bdc9457SAndroid Build Coastguard Worker     const int8_t* input2,
1484*4bdc9457SAndroid Build Coastguard Worker     int8_t* output,
1485*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1486*4bdc9457SAndroid Build Coastguard Worker {
1487*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1488*4bdc9457SAndroid Build Coastguard Worker     subtract_op, xnn_operator_type_subtract_nd_qs8,
1489*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1490*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1491*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1492*4bdc9457SAndroid Build Coastguard Worker     0 /* log2(sizeof(int8_t))) */,
1493*4bdc9457SAndroid Build Coastguard Worker     &subtract_op->params.qs8_add, sizeof(subtract_op->params.qs8_add),
1494*4bdc9457SAndroid Build Coastguard Worker     &subtract_op->params.qs8_radd, sizeof(subtract_op->params.qs8_radd),
1495*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qs8.vadd,
1496*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1497*4bdc9457SAndroid Build Coastguard Worker }
1498*4bdc9457SAndroid Build Coastguard Worker 
xnn_setup_subtract_nd_qu8(xnn_operator_t subtract_op,size_t num_input1_dims,const size_t * input1_shape,size_t num_input2_dims,const size_t * input2_shape,const uint8_t * input1,const uint8_t * input2,uint8_t * output,pthreadpool_t threadpool)1499*4bdc9457SAndroid Build Coastguard Worker enum xnn_status xnn_setup_subtract_nd_qu8(
1500*4bdc9457SAndroid Build Coastguard Worker     xnn_operator_t subtract_op,
1501*4bdc9457SAndroid Build Coastguard Worker     size_t num_input1_dims,
1502*4bdc9457SAndroid Build Coastguard Worker     const size_t* input1_shape,
1503*4bdc9457SAndroid Build Coastguard Worker     size_t num_input2_dims,
1504*4bdc9457SAndroid Build Coastguard Worker     const size_t* input2_shape,
1505*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input1,
1506*4bdc9457SAndroid Build Coastguard Worker     const uint8_t* input2,
1507*4bdc9457SAndroid Build Coastguard Worker     uint8_t* output,
1508*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_t threadpool)
1509*4bdc9457SAndroid Build Coastguard Worker {
1510*4bdc9457SAndroid Build Coastguard Worker   return setup_binary_elementwise_nd(
1511*4bdc9457SAndroid Build Coastguard Worker     subtract_op, xnn_operator_type_subtract_nd_qu8,
1512*4bdc9457SAndroid Build Coastguard Worker     num_input1_dims, input1_shape,
1513*4bdc9457SAndroid Build Coastguard Worker     num_input2_dims, input2_shape,
1514*4bdc9457SAndroid Build Coastguard Worker     input1, input2, output,
1515*4bdc9457SAndroid Build Coastguard Worker     0 /* log2(sizeof(uint8_t))) */,
1516*4bdc9457SAndroid Build Coastguard Worker     &subtract_op->params.qu8_add, sizeof(subtract_op->params.qu8_add),
1517*4bdc9457SAndroid Build Coastguard Worker     &subtract_op->params.qu8_radd, sizeof(subtract_op->params.qu8_radd),
1518*4bdc9457SAndroid Build Coastguard Worker     &xnn_params.qu8.vadd,
1519*4bdc9457SAndroid Build Coastguard Worker     pthreadpool_get_threads_count(threadpool));
1520*4bdc9457SAndroid Build Coastguard Worker }
1521