1/* Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 3Licensed under the Apache License, Version 2.0 (the "License"); 4you may not use this file except in compliance with the License. 5You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9Unless required by applicable law or agreed to in writing, software 10distributed under the License is distributed on an "AS IS" BASIS, 11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12See the License for the specific language governing permissions and 13limitations under the License. 14==============================================================================*/ 15// 16//===----------------------------------------------------------------------===// 17// 18// This is the operation definition file for Quantization. 19// 20//===----------------------------------------------------------------------===// 21 22#ifndef QUANT_FORK_OPS 23#define QUANT_FORK_OPS 24 25include "tensorflow/compiler/mlir/lite/quantization/ir/QuantOpsBase.td" 26include "mlir/Dialect/Quant/QuantOpsBase.td" 27include "mlir/Interfaces/InferTypeOpInterface.td" 28include "mlir/Interfaces/SideEffectInterfaces.td" 29 30//===----------------------------------------------------------------------===// 31// Base classes 32//===----------------------------------------------------------------------===// 33 34class quantfork_Op<string mnemonic, list<Trait> traits> : 35 Op<QuantizationFork_Dialect, mnemonic, traits>; 36 37//===----------------------------------------------------------------------===// 38// Quantization casts 39//===----------------------------------------------------------------------===// 40// A QuantizeCast (qcast) represents a potential type shift from a quantizable 41// type to a quantized type. 42// 43// At runtime, a qcast will apply the transformation expressed by its 44// operand and result type. For flexibility during transformation, it is also 45// possible to have a qcast that performs no transformation (both its 46// operand and result type are quantizable). 47// 48// A qcast will typically originate from either: 49// a) An expressed or implied constraint in the source dialect which signals 50// that a certain level of quantization is possible or required. 51// b) An inference made by a quantization algorithm indicating that a 52// quantized representation may be acceptable. 53// 54// Especially early in transformation, it is common to have pairs of 55// qcast/dcast at points where a transition to a quantized type is 56// required. In addition, it is also common to have an identity qcast 57// (where the operand and result type are not quantized) at all points where 58// it is legal to use a quantized representation (but is not known to be 59// acceptable). 60def quantfork_QuantizeCastOp : quantfork_Op<"qcast", [NoSideEffect]> { 61 let arguments = (ins quant_RealValueType:$arg); 62 let results = (outs quant_RealValueType); 63} 64 65// A DequantizeCast op (dcast) represents the inverse of a qcast, 66// converting back from a quantized to quantizable (expressed) type. 67// 68// Like qcasts, a dcast is allowed to have both its operand and result 69// as non quantized types. This facilitates transformations and marks edges 70// where the computation must be carried out in the expressed type. 71// 72// Especially early in transformation, it is common to have dcasts on 73// all operands to ops that must operate with the expressed type (typically 74// math ops prior to lowering to target-specific, quantized kernels). 75def quantfork_DequantizeCastOp : quantfork_Op<"dcast", [NoSideEffect]> { 76 let arguments = (ins quant_RealValueType:$arg); 77 let results = (outs quant_RealValueType); 78} 79 80// A StorageCast (scast) represents a cast from or to a type based on the 81// storage type and a type based on a corresponding quantized type. 82// 83// This op exists to ensure type coherency for between parts of the computation 84// which are operating directly on an underlying storage type and those which 85// operate on quantized values. 86// 87// Examples from storage to quantized type: 88// i8 -> !quant<"uniform[i8:f32]{1.0}"> 89// tensor<4xi8> -> tensor<4x!quant<"uniform[i8:f32]{1.0}">> 90// vector<4xi8> -> vector<4x!quant<"uniform[i8:f32]{1.0}">> 91def quantfork_StorageCastOp : quantfork_Op<"scast", [NoSideEffect]> { 92 let arguments = (ins quant_RealOrStorageValueType:$arg); 93 let results = (outs quant_RealOrStorageValueType); 94 let hasFolder = 1; 95} 96 97// A QuantizeRegion (region) represents a quantization unit which wraps 98// high-precision ops with quantization specifications for all the inputs 99// and outputs. Some quantization specifications can be undetermined and 100// derived from other ports by the target specification of the kernel. 101def quantfork_QuantizeRegionOp : quantfork_Op<"region", [ 102 NoSideEffect, 103 IsolatedFromAbove, 104 SingleBlockImplicitTerminator<"ReturnOp">]> { 105 let summary = [{ 106 The `region` operation wraps high-precision ops as a logical low-precision 107 quantized kernel. 108 }]; 109 110 let arguments = (ins Variadic<AnyType>:$inputs, 111 TypeArrayAttr:$input_specs, 112 TypeArrayAttr:$output_specs, 113 StrAttr:$logical_kernel); 114 let results = (outs Variadic<AnyType>:$outputs); 115 let regions = (region SizedRegion<1>:$body); 116 let hasVerifier = 1; 117} 118 119def quantfork_ReturnOp : quantfork_Op<"return", [Terminator]> { 120 let summary = [{ 121 The `return` operation terminates a quantize region and returns values. 122 }]; 123 124 let arguments = (ins Variadic<AnyTensor>:$results); 125} 126 127//===----------------------------------------------------------------------===// 128// Training integration and instrumentation ops 129//===----------------------------------------------------------------------===// 130 131def quantfork_ConstFakeQuant : quantfork_Op<"const_fake_quant", 132 [SameOperandsAndResultType, NoSideEffect]> { 133 let summary = [{ 134 Simulates the effect of uniform quantization with const range. 135 }]; 136 137 let description = [{ 138 Given a const min, max, num_bits and narrow_range attribute, applies the 139 same uniform quantization simulation as is done by the TensorFlow 140 fake_quant_with_min_max_args op. See the fakeQuantAttrsToType() utility 141 method and the quant-convert-simulated-quantization pass for further details. 142 }]; 143 144 let arguments = (ins 145 F32Tensor:$inputs, 146 F32Attr:$min, 147 F32Attr:$max, 148 // The bitwidth of the quantization; between 2 and 16, inclusive. 149 I64Attr:$num_bits, 150 // Quantization range starts from 0 or 1; starts from 1 if true. 151 DefaultValuedAttr<BoolAttr, "false">:$narrow_range, 152 // The sign of the quantization. 153 DefaultValuedAttr<BoolAttr, "false">:$is_signed 154 ); 155 156 let results = (outs 157 F32Tensor:$outputs 158 ); 159} 160 161def quantfork_ConstFakeQuantPerAxis : quantfork_Op<"const_fake_quant_per_axis", 162 [SameOperandsAndResultType, NoSideEffect]> { 163 let summary = [{ 164 Simulates the effect of per axis uniform quantization with const range. 165 }]; 166 167 let description = [{ 168 Given a const min, max, num_bits and narrow_range attribute, applies the 169 same per axis uniform quantization simulation as is done by the TensorFlow 170 fake_quant_with_min_max_vars_per_channel op. See the fakeQuantAttrsToType() 171 utility method and the quant-convert-simulated-quantization pass for further 172 details. 173 }]; 174 175 let arguments = (ins 176 F32Tensor:$inputs, 177 F32ArrayAttr:$min, 178 F32ArrayAttr:$max, 179 // The quantized dimension of the inputs tensor. 180 I64Attr:$axis, 181 // The bitwidth of the quantization; between 2 and 16, inclusive. 182 I64Attr:$num_bits, 183 // Quantization range starts from 0 or 1; starts from 1 if true. 184 DefaultValuedAttr<BoolAttr, "false">:$narrow_range, 185 // The sign of the quantization. 186 DefaultValuedAttr<BoolAttr, "false">:$is_signed 187 ); 188 189 let results = (outs 190 F32Tensor:$outputs 191 ); 192} 193 194def quantfork_StatisticsRefOp : quantfork_Op<"stats_ref", [SameOperandsAndResultType]> { 195 let summary = "Indicates that statistics are resolved by reference."; 196 197 let description = [{ 198 This op acts as an identity that, when encountered at runtime, should result 199 in statistics being collected about about the value of its operand/result. 200 Such statistics will be stored with the provided key, allowing this node 201 to later be converted to a 'stats' op if statistics with that key have been 202 encountered. 203 }]; 204 205 let arguments = (ins 206 quant_RealValueType:$arg, 207 StrAttr:$statsKey 208 ); 209 let results = (outs quant_RealValueType); 210} 211 212def quantfork_StatisticsOp : quantfork_Op<"stats", [SameOperandsAndResultType]> { 213 let summary = "Identity op which associates statistics with the value."; 214 215 let description = [{ 216 Associates statistics about the runtime ranges of values observed for 217 evaluations of this node. 218 219 Statistics about the entire type are reported in the 'layerStats' attribute 220 and those for each axis, in the (optional) `axisStats` attribute. The 221 interpretation of each is determined by the last dimension of its shape. 222 Currently, only dim=2 is supported, which is interpreted as [min, max]. 223 224 `layerStats` must be a rank 1 tensor: [2] 225 `axisStats` must be a rank 2 tensor: [N, 2], where N=the slice size 226 splitted by the `axis` dimension. For example: 227 228 ``` 229 <?x?x3x2>, axis=3 => N=2 230 <?x?x3x2>, axis=2 => N=6 231 ``` 232 }]; 233 234 let arguments = (ins 235 quant_RealValueType:$arg, 236 ElementsAttr:$layerStats, 237 OptionalAttr<ElementsAttr>:$axisStats, 238 OptionalAttr<I64Attr>:$axis); 239 let results = (outs quant_RealValueType); 240 let hasVerifier = 1; 241} 242 243def quantfork_CoupledRefOp : quantfork_Op<"coupled_ref", [SameOperandsAndResultType]> { 244 let summary = [{ 245 Indicates that one point of the computation is coupled to another. 246 }]; 247 248 let description = [{ 249 Ordinarily, relationships between ops for the purposes of determining 250 compatible quantized types is explicit based on the use-def chain. However, 251 in some situations, a use may be separated from its def by arbitrary 252 external connections. In such a case, during analysis, all coupled_ref 253 nodes in a module which share a coupledKey will be considered to be 254 directly connected as via an identity op for the purpose of type inference. 255 }]; 256 257 let arguments = (ins 258 quant_RealValueType:$arg, 259 StrAttr:$coupledKey); 260 let results = (outs quant_RealValueType); 261} 262 263#endif // QUANT_FORK_OPS 264