// Copyright 2020 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #pragma once #include #include #include #include #include #include #if defined(EMSCRIPTEN) #include #elif XNN_PLATFORM_WINDOWS #include #else #include #endif #define XNN_MAX_INPUTS 4 #define XNN_MAX_OUTPUTS 4 #define XNN_MAX_RUNTIME_INPUTS 4 #define XNN_MAX_RUNTIME_OUTPUTS 4 #define XNN_INVALID_NODE_ID UINT32_MAX #define XNN_MAX_OPERATOR_OBJECTS 4 /// Disable fusion of nodes in subgraph. Fusion is enabled by default, set this flag to turn it off. #define XNN_FLAG_NO_OPERATOR_FUSION 0x80000000 #ifdef __cplusplus extern "C" { #endif struct xnn_shape { size_t num_dims; size_t dim[XNN_MAX_TENSOR_DIMS]; }; enum xnn_value_type { xnn_value_type_invalid = 0, xnn_value_type_dense_tensor = 1, }; enum xnn_layout_type { xnn_layout_type_nhwc = 0, xnn_layout_type_nchw = 1, }; /// Abstraction for a collections of elements produced and consumed by nodes. struct xnn_value { /// Unique ID for the value. uint32_t id; /// Type of the collection of elements. /// /// Currently only dense tensors are supported. /// Other types (e.g. sparse tensors) might be supported in the future. enum xnn_value_type type; /// Type of elements in the collection. enum xnn_datatype datatype; /// Per-value quantization parameters. struct { /// Offset from zero of the quantized elements. int32_t zero_point; union { /// Multiplication factor to convert quantized elements to real representation. float scale; struct { /// Per-channel multiplication factor to convert quantized elements to real representation. const float* channelwise_scale; /// Index of the channel dimension with per-channel quantization parameters. size_t channel_dimension; }; }; } quantization; /// Tensor shape. struct xnn_shape shape; /// Binary features of the tensor. Supported values are any combination of: /// - XNN_VALUE_FLAG_EXTERNAL_INPUT /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT uint32_t flags; /// Static initialization data. Must be null for non-static values. const void* data; /// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input. uint32_t producer; /// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the /// graph (e.g. Value is an external output). uint32_t first_consumer; /// Number of Nodes that consume the value. /// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times. /// If the Value is an external output, it counts as having an extra consumer. uint32_t num_consumers; uint32_t num_nchw_compatible_consumers; enum xnn_layout_type layout; /// Set during analysis in xnn_subgraph_rewrite_for_fp16. /// Indicates that this value should be converted to FP16. bool fp16_compatible; /// Set during analysis in xnn_subgraph_rewrite_for_fp16. /// Indicates Value ID of the FP16 variant of this Value. uint32_t fp16_id; /// Set during analysis in xnn_subgraph_rewrite_for_fp16. /// Indicates Value ID of the FP32 variant of this Value. uint32_t fp32_id; }; XNN_INLINE bool xnn_value_is_external(const struct xnn_value* value) { return (value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0; } XNN_INLINE bool xnn_value_is_external_output(const struct xnn_value* value) { return (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) != 0; } XNN_INLINE bool xnn_value_is_external_input(const struct xnn_value* value) { return (value->flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) != 0; } enum xnn_allocation_type { xnn_allocation_type_invalid = 0, /// Static data that is provided by caller, needs to outlive the xnn_runtime. xnn_allocation_type_static, /// Lives in XNNPACK-managed internal workspace. xnn_allocation_type_workspace, /// Non-static data that is external to the runtime, provided by caller, specified in xnn_setup_runtime. xnn_allocation_type_external, }; struct xnn_blob { /// Size in bytes. size_t size; /// Data pointer. void* data; enum xnn_allocation_type allocation_type; }; struct xnn_node; struct xnn_operator_data; typedef enum xnn_status (*xnn_create_operator_fn)( const struct xnn_node* node, const struct xnn_value* values, size_t num_values, struct xnn_operator_data* opdata, const struct xnn_caches* caches); typedef enum xnn_status (*xnn_setup_operator_fn)( const struct xnn_operator_data* opdata, const struct xnn_blob* blobs, size_t num_blobs, pthreadpool_t threadpool); enum xnn_compute_type { xnn_compute_type_invalid = 0, xnn_compute_type_fp32, xnn_compute_type_fp16, xnn_compute_type_qc8, xnn_compute_type_qs8, xnn_compute_type_qu8, xnn_compute_type_fp32_to_fp16, xnn_compute_type_fp32_to_qs8, xnn_compute_type_fp32_to_qu8, xnn_compute_type_fp16_to_fp32, xnn_compute_type_qs8_to_fp32, xnn_compute_type_qu8_to_fp32, }; struct xnn_node { enum xnn_node_type type; uint32_t id; enum xnn_compute_type compute_type; /// Static parameters of the operator node. union { struct { uint32_t input_padding_top; uint32_t input_padding_right; uint32_t input_padding_bottom; uint32_t input_padding_left; uint32_t kernel_height; uint32_t kernel_width; uint32_t subsampling_height; uint32_t subsampling_width; uint32_t dilation_height; uint32_t dilation_width; uint32_t groups; size_t group_input_channels; size_t group_output_channels; } convolution_2d; struct { uint32_t padding_top; uint32_t padding_right; uint32_t padding_bottom; uint32_t padding_left; uint32_t adjustment_height; uint32_t adjustment_width; uint32_t kernel_height; uint32_t kernel_width; uint32_t upsampling_height; uint32_t upsampling_width; uint32_t dilation_height; uint32_t dilation_width; uint32_t groups; size_t group_input_channels; size_t group_output_channels; } deconvolution_2d; struct { uint32_t input_padding_top; uint32_t input_padding_right; uint32_t input_padding_bottom; uint32_t input_padding_left; uint32_t kernel_height; uint32_t kernel_width; uint32_t subsampling_height; uint32_t subsampling_width; uint32_t dilation_height; uint32_t dilation_width; uint32_t depth_multiplier; size_t input_channels; } depthwise_convolution_2d; struct { uint32_t block_size; } depth_to_space; struct { uint32_t padding_top; uint32_t padding_right; uint32_t padding_bottom; uint32_t padding_left; uint32_t pooling_height; uint32_t pooling_width; uint32_t stride_height; uint32_t stride_width; uint32_t dilation_height; uint32_t dilation_width; } pooling_2d; struct { float alpha; } elu; struct { float negative_slope; } leaky_relu; struct { size_t pre_paddings[XNN_MAX_TENSOR_DIMS]; size_t post_paddings[XNN_MAX_TENSOR_DIMS]; uint32_t padding_value; } static_pad; struct { struct xnn_shape new_shape; } static_reshape; struct { size_t new_height; size_t new_width; } static_resize; struct { size_t axis; } concatenate; struct { size_t axis; } even_split; struct { size_t perm[XNN_MAX_TENSOR_DIMS]; size_t num_dims; } transpose; } params; struct { float output_min; float output_max; } activation; /// Value IDs for node inputs. uint32_t inputs[XNN_MAX_INPUTS]; uint32_t num_inputs; /// Value IDs for node outputs. uint32_t outputs[XNN_MAX_OUTPUTS]; uint32_t num_outputs; uint32_t flags; uint32_t layout_flags; uint32_t cluster_leader; // Number of filter parameters in all 1x1 Convolutions of the sparse cluster. // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions. size_t num_params; // Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster. // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions. size_t num_zeroes; // Factory function to create an operator object from the node. xnn_create_operator_fn create; // Function to setup an operator using opdata. xnn_setup_operator_fn setup; }; #ifdef __MACH__ typedef uint64_t xnn_timestamp; #elif __EMSCRIPTEN__ typedef double xnn_timestamp; #elif XNN_PLATFORM_WINDOWS typedef LARGE_INTEGER xnn_timestamp; #else typedef struct timespec xnn_timestamp; #endif struct xnn_operator_data { xnn_operator_t operator_objects[XNN_MAX_OPERATOR_OBJECTS]; xnn_setup_operator_fn setup; size_t batch_size; size_t input_height; size_t input_width; size_t output_height; size_t output_width; struct xnn_shape shape1; struct xnn_shape shape2; size_t pre_paddings[XNN_MAX_TENSOR_DIMS]; size_t post_paddings[XNN_MAX_TENSOR_DIMS]; uint32_t adjustment_height; uint32_t adjustment_width; uint32_t inputs[XNN_MAX_RUNTIME_INPUTS]; uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS]; xnn_timestamp end_ts[XNN_MAX_OPERATOR_OBJECTS]; }; struct xnn_subgraph { /// Number of Value IDs reserved for communication with external graph representation. /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range. uint32_t external_value_ids; uint32_t num_reserved_values; uint32_t num_values; struct xnn_value* values; uint32_t num_reserved_nodes; uint32_t num_nodes; struct xnn_node* nodes; }; /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values. struct xnn_runtime { uint32_t num_external_values; /// List of operators in the execution plan, in execution order. struct xnn_operator_data* opdata; /// Number of operators in the execution plan. size_t num_ops; struct xnn_blob* blobs; size_t num_blobs; struct xnn_workspace* workspace; struct xnn_runtime* next_workspace_user; #if XNN_PLATFORM_JIT struct xnn_code_cache code_cache; #endif // XNN_PLATFORM_JIT pthreadpool_t threadpool; bool profiling; // The start timestamp of the first operator in the subgraph. This is set when profiling is true. xnn_timestamp start_ts; }; struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph); struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph); void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes); size_t xnn_tensor_get_size( xnn_subgraph_t subgraph, uint32_t value_id); // Product of all shape dimensions size_t xnn_shape_multiply_all_dims( const struct xnn_shape shape[1]); // Product of all shape dimensions, except for the specified number of the last dimensions size_t xnn_shape_multiply_batch_dims( const struct xnn_shape shape[1], size_t num_nonbatch_dims); // Product of all shape dimensions, except for the last (channel) one size_t xnn_shape_multiply_non_channel_dims( const struct xnn_shape shape[1]); enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags); void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph); // Rewrites subgraph for FP16, returns true if success, false if rewrite failed. bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph); void xnn_node_clear(struct xnn_node* node); void xnn_value_clear(struct xnn_value* value); void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value); void xnn_init_convert_node( struct xnn_node* node, enum xnn_compute_type compute_type, uint32_t input_id, uint32_t output_id, uint32_t flags); struct xnn_workspace { void* data; size_t size; struct xnn_runtime* first_user; // Workspace will be destroyed in xnn_delete_runtime or xnn_delete_workspace if num_users reaches 0. size_t ref_count; }; #ifdef __cplusplus } // extern "C" #endif