xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/xla/service/cpu/runtime_conv2d_acl.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_CONV2D_ACL_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_CONV2D_ACL_H_
18 
19 #include "tensorflow/core/platform/types.h"
20 
21 #ifdef XLA_CPU_USE_ACL
22 #include "arm_compute/runtime/NEON/NEFunctions.h"
23 #include "arm_compute/runtime/NEON/NEScheduler.h"
24 #include "utils/Utils.h"
25 
26 extern "C" {
27 struct acl_depthwise_conv_obj_t {
28   arm_compute::NEDepthwiseConvolutionLayer depthwise_conv;
29   arm_compute::NEArithmeticAddition add;
30   arm_compute::NEActivationLayer act;
31   arm_compute::Tensor input_tensor;
32   arm_compute::Tensor kernel_tensor;
33   arm_compute::Tensor bia_tensor;
34   arm_compute::Tensor output_tensor;
35   arm_compute::Tensor output_acc_tensor;
36 };
37 
38 struct acl_gemm_conv_obj_t {
39   arm_compute::NEGEMMConvolutionLayer gemm_conv;
40   arm_compute::NEArithmeticAddition add;
41   arm_compute::NEActivationLayer act;
42   arm_compute::Tensor input_tensor;
43   arm_compute::Tensor kernel_tensor;
44   arm_compute::Tensor bia_tensor;
45   arm_compute::Tensor output_tensor;
46   arm_compute::Tensor output_acc_tensor;
47 };
48 
49 struct acl_conv_conf_t {
50   bool with_bias;
51   bool is_int8;
52   bool sum_with_eltwise;
53   bool fast_math;
54   arm_compute::TensorInfo input_info;
55   arm_compute::TensorInfo kernel_info;
56   arm_compute::TensorInfo bia_info;
57   arm_compute::TensorInfo output_info;
58   arm_compute::PadStrideInfo padstride_info;
59   arm_compute::Size2D dilation_info;
60   arm_compute::WeightsInfo kernel_wei_info;
61   arm_compute::ActivationLayerInfo act_info;
62 };
63 
64 extern void __xla_cpu_runtime_ACLConv2DF32(
65     const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out,
66     float* lhs, float* rhs, int64_t input_batch, int64_t input_rows,
67     int64_t input_cols, int64_t input_channels, int64_t kernel_rows,
68     int64_t kernel_cols, int64_t kernel_channels, int64_t kernel_filters,
69     int64_t output_rows, int64_t output_cols, int64_t row_stride,
70     int64_t col_stride, int64_t padding_top, int64_t padding_bottom,
71     int64_t padding_left, int64_t padding_right, int64_t lhs_row_dilation,
72     int64_t lhs_col_dilation, int64_t rhs_row_dilation,
73     int64_t rhs_col_dilation, int64_t feature_group_count);
74 }
75 #else
76 extern "C" {
__xla_cpu_runtime_ACLConv2DF32(const void * run_options_ptr,float * out,float * lhs,float * rhs,int64_t input_batch,int64_t input_rows,int64_t input_cols,int64_t input_channels,int64_t kernel_rows,int64_t kernel_cols,int64_t kernel_channels,int64_t kernel_filters,int64_t output_rows,int64_t output_cols,int64_t row_stride,int64_t col_stride,int64_t padding_top,int64_t padding_bottom,int64_t padding_left,int64_t padding_right,int64_t lhs_row_dilation,int64_t lhs_col_dilation,int64_t rhs_row_dilation,int64_t rhs_col_dilation,int64_t feature_group_count)77 extern void __xla_cpu_runtime_ACLConv2DF32(
78     const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out,
79     float* lhs, float* rhs, int64_t input_batch, int64_t input_rows,
80     int64_t input_cols, int64_t input_channels, int64_t kernel_rows,
81     int64_t kernel_cols, int64_t kernel_channels, int64_t kernel_filters,
82     int64_t output_rows, int64_t output_cols, int64_t row_stride,
83     int64_t col_stride, int64_t padding_top, int64_t padding_bottom,
84     int64_t padding_left, int64_t padding_right, int64_t lhs_row_dilation,
85     int64_t lhs_col_dilation, int64_t rhs_row_dilation,
86     int64_t rhs_col_dilation, int64_t feature_group_count) {
87   std::cerr
88       << "Attempt to call ACL Conv2D runtime library without defining "
89          "XLA_CPU_USE_ACL. Add --define=build_with_acl=true to build with ACL.";
90   exit(1);
91 }
92 }
93 #endif  // XLA_CPU_USE_ACL
94 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_CONV2D_ACL_H_
95