xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/mlir/lite/quantization/ir/QuantOps.td (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7    http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15//
16//===----------------------------------------------------------------------===//
17//
18// This is the operation definition file for Quantization.
19//
20//===----------------------------------------------------------------------===//
21
22#ifndef QUANT_FORK_OPS
23#define QUANT_FORK_OPS
24
25include "tensorflow/compiler/mlir/lite/quantization/ir/QuantOpsBase.td"
26include "mlir/Dialect/Quant/QuantOpsBase.td"
27include "mlir/Interfaces/InferTypeOpInterface.td"
28include "mlir/Interfaces/SideEffectInterfaces.td"
29
30//===----------------------------------------------------------------------===//
31// Base classes
32//===----------------------------------------------------------------------===//
33
34class quantfork_Op<string mnemonic, list<Trait> traits> :
35    Op<QuantizationFork_Dialect, mnemonic, traits>;
36
37//===----------------------------------------------------------------------===//
38// Quantization casts
39//===----------------------------------------------------------------------===//
40// A QuantizeCast (qcast) represents a potential type shift from a quantizable
41// type to a quantized type.
42//
43// At runtime, a qcast will apply the transformation expressed by its
44// operand and result type. For flexibility during transformation, it is also
45// possible to have a qcast that performs no transformation (both its
46// operand and result type are quantizable).
47//
48// A qcast will typically originate from either:
49//   a) An expressed or implied constraint in the source dialect which signals
50//      that a certain level of quantization is possible or required.
51//   b) An inference made by a quantization algorithm indicating that a
52//      quantized representation may be acceptable.
53//
54// Especially early in transformation, it is common to have pairs of
55// qcast/dcast at points where a transition to a quantized type is
56// required. In addition, it is also common to have an identity qcast
57// (where the operand and result type are not quantized) at all points where
58// it is legal to use a quantized representation (but is not known to be
59// acceptable).
60def quantfork_QuantizeCastOp : quantfork_Op<"qcast", [NoSideEffect]> {
61  let arguments = (ins quant_RealValueType:$arg);
62  let results = (outs quant_RealValueType);
63}
64
65// A DequantizeCast op (dcast) represents the inverse of a qcast,
66// converting back from a quantized to quantizable (expressed) type.
67//
68// Like qcasts, a dcast is allowed to have both its operand and result
69// as non quantized types. This facilitates transformations and marks edges
70// where the computation must be carried out in the expressed type.
71//
72// Especially early in transformation, it is common to have dcasts on
73// all operands to ops that must operate with the expressed type (typically
74// math ops prior to lowering to target-specific, quantized kernels).
75def quantfork_DequantizeCastOp : quantfork_Op<"dcast", [NoSideEffect]> {
76  let arguments = (ins quant_RealValueType:$arg);
77  let results = (outs quant_RealValueType);
78}
79
80// A StorageCast (scast) represents a cast from or to a type based on the
81// storage type and a type based on a corresponding quantized type.
82//
83// This op exists to ensure type coherency for between parts of the computation
84// which are operating directly on an underlying storage type and those which
85// operate on quantized values.
86//
87// Examples from storage to quantized type:
88//   i8 -> !quant<"uniform[i8:f32]{1.0}">
89//   tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">>
90//   vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">>
91def quantfork_StorageCastOp : quantfork_Op<"scast", [NoSideEffect]> {
92  let arguments = (ins quant_RealOrStorageValueType:$arg);
93  let results = (outs quant_RealOrStorageValueType);
94  let hasFolder = 1;
95}
96
97// A QuantizeRegion (region) represents a quantization unit which wraps
98// high-precision ops with quantization specifications for all the inputs
99// and outputs. Some quantization specifications can be undetermined and
100// derived from other ports by the target specification of the kernel.
101def quantfork_QuantizeRegionOp : quantfork_Op<"region", [
102    NoSideEffect,
103    IsolatedFromAbove,
104    SingleBlockImplicitTerminator<"ReturnOp">]> {
105  let summary = [{
106    The `region` operation wraps high-precision ops as a logical low-precision
107    quantized kernel.
108  }];
109
110  let arguments = (ins Variadic<AnyType>:$inputs,
111                    TypeArrayAttr:$input_specs,
112                    TypeArrayAttr:$output_specs,
113                    StrAttr:$logical_kernel);
114  let results = (outs Variadic<AnyType>:$outputs);
115  let regions = (region SizedRegion<1>:$body);
116  let hasVerifier = 1;
117}
118
119def quantfork_ReturnOp : quantfork_Op<"return", [Terminator]> {
120  let summary = [{
121    The `return` operation terminates a quantize region and returns values.
122  }];
123
124  let arguments = (ins Variadic<AnyTensor>:$results);
125}
126
127//===----------------------------------------------------------------------===//
128// Training integration and instrumentation ops
129//===----------------------------------------------------------------------===//
130
131def quantfork_ConstFakeQuant : quantfork_Op<"const_fake_quant",
132                                    [SameOperandsAndResultType, NoSideEffect]> {
133  let summary = [{
134    Simulates the effect of uniform quantization with const range.
135  }];
136
137  let description = [{
138    Given a const min, max, num_bits and narrow_range attribute, applies the
139    same uniform quantization simulation as is done by the TensorFlow
140    fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility
141    method and the quant-convert-simulated-quantization pass for further details.
142  }];
143
144  let arguments = (ins
145    F32Tensor:$inputs,
146    F32Attr:$min,
147    F32Attr:$max,
148    // The bitwidth of the quantization; between 2 and 16, inclusive.
149    I64Attr:$num_bits,
150    // Quantization range starts from 0 or 1; starts from 1 if true.
151    DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
152    // The sign of the quantization.
153    DefaultValuedAttr<BoolAttr, "false">:$is_signed
154  );
155
156  let results = (outs
157    F32Tensor:$outputs
158  );
159}
160
161def quantfork_ConstFakeQuantPerAxis : quantfork_Op<"const_fake_quant_per_axis",
162                                    [SameOperandsAndResultType, NoSideEffect]> {
163  let summary = [{
164    Simulates the effect of per axis uniform quantization with const range.
165  }];
166
167  let description = [{
168    Given a const min, max, num_bits and narrow_range attribute, applies the
169    same per axis uniform quantization simulation as is done by the TensorFlow
170    fake_quant_with_min_max_vars_per_channel op. See the fakeQuantAttrsToType()
171    utility method and the quant-convert-simulated-quantization pass for further
172    details.
173  }];
174
175  let arguments = (ins
176    F32Tensor:$inputs,
177    F32ArrayAttr:$min,
178    F32ArrayAttr:$max,
179    // The quantized dimension of the inputs tensor.
180    I64Attr:$axis,
181    // The bitwidth of the quantization; between 2 and 16, inclusive.
182    I64Attr:$num_bits,
183    // Quantization range starts from 0 or 1; starts from 1 if true.
184    DefaultValuedAttr<BoolAttr, "false">:$narrow_range,
185    // The sign of the quantization.
186    DefaultValuedAttr<BoolAttr, "false">:$is_signed
187  );
188
189  let results = (outs
190    F32Tensor:$outputs
191  );
192}
193
194def quantfork_StatisticsRefOp : quantfork_Op<"stats_ref", [SameOperandsAndResultType]> {
195  let summary = "Indicates that statistics are resolved by reference.";
196
197  let description = [{
198    This op acts as an identity that, when encountered at runtime, should result
199    in statistics being collected about about the value of its operand/result.
200    Such statistics will be stored with the provided key, allowing this node
201    to later be converted to a 'stats' op if statistics with that key have been
202    encountered.
203  }];
204
205  let arguments = (ins
206    quant_RealValueType:$arg,
207    StrAttr:$statsKey
208  );
209  let results = (outs quant_RealValueType);
210}
211
212def quantfork_StatisticsOp : quantfork_Op<"stats", [SameOperandsAndResultType]> {
213  let summary = "Identity op which associates statistics with the value.";
214
215  let description = [{
216    Associates statistics about the runtime ranges of values observed for
217    evaluations of this node.
218
219    Statistics about the entire type are reported in the 'layerStats' attribute
220    and those for each axis, in the (optional) `axisStats` attribute. The
221    interpretation of each is determined by the last dimension of its shape.
222    Currently, only dim=2 is supported, which is interpreted as [min, max].
223
224    `layerStats` must be a rank 1 tensor: [2]
225    `axisStats` must be a rank 2 tensor: [N, 2], where N=the slice size
226      splitted by the `axis` dimension. For example:
227
228    ```
229    <?x?x3x2>, axis=3 => N=2
230    <?x?x3x2>, axis=2 => N=6
231    ```
232  }];
233
234  let arguments = (ins
235    quant_RealValueType:$arg,
236    ElementsAttr:$layerStats,
237    OptionalAttr<ElementsAttr>:$axisStats,
238    OptionalAttr<I64Attr>:$axis);
239  let results = (outs quant_RealValueType);
240  let hasVerifier = 1;
241}
242
243def quantfork_CoupledRefOp : quantfork_Op<"coupled_ref", [SameOperandsAndResultType]> {
244  let summary = [{
245    Indicates that one point of the computation is coupled to another.
246  }];
247
248  let description = [{
249    Ordinarily, relationships between ops for the purposes of determining
250    compatible quantized types is explicit based on the use-def chain. However,
251    in some situations, a use may be separated from its def by arbitrary
252    external connections. In such a case, during analysis, all coupled_ref
253    nodes in a module which share a coupledKey will be considered to be
254    directly connected as via an identity op for the purpose of type inference.
255  }];
256
257  let arguments = (ins
258    quant_RealValueType:$arg,
259    StrAttr:$coupledKey);
260  let results = (outs quant_RealValueType);
261}
262
263#endif // QUANT_FORK_OPS
264