1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 // 16 // This file defines support utilities for interoperating with FakeQuant* based 17 // QAT (Quantized Aware Training) computations, as implemented by TFLite. Note 18 // that FakeQuant* operators mix multiple concerns specific to how TFLite 19 // originally implemented quantization. As such, utilities here enforce 20 // opinions taken by that codebase (vs providing any amount of genericity). 21 // 22 // Specifically, it combines the following concerns, each of which would be 23 // independent variables in a more generic setup: 24 // - numBits and isSigned imply storage data type (uint8, int8, int16) 25 // - numBits < 8 is promoted to uint8 or int8 26 // - "narrow_range" narrows the lower bound of the storage type's range by 27 // 1 28 // - the specified min/max values are "nudged" so that the result has a zero 29 // that can be exactly expressed 30 // - min=max=0 implies scale=0 and zero_point=0 31 // 32 // With the above assumptions applied, every conforming specified FakeQuant op 33 // can be represented by a UniformQuantizedType. This scheme is not expected to 34 // be generalized further in the future and should be considered to be a 35 // legacy set of rules. 36 // 37 // As canonically used in TensorFlow graphs, the presence of a FakeQuant node 38 // is a hint that the specific math represented here has been simulated at 39 // training time. As such, it is usually not advised to arbitrarily change 40 // quantization parameters derived from FakeQuant. 41 // 42 //===----------------------------------------------------------------------===// 43 44 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_IR_FAKEQUANTSUPPORT_H_ 45 #define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_IR_FAKEQUANTSUPPORT_H_ 46 47 #include "mlir/Dialect/Quant/QuantTypes.h" // from @llvm-project 48 49 namespace mlir { 50 namespace quantfork { 51 52 /// Converts per-layer FakeQuant attributes to the corresponding type. 53 /// In the event that the parameters cannot be converted, returns a nullptr 54 /// convertible Type and issues an appropriate error. 55 /// Note that there are multiple variants of a per-layer FakeQuant op, so 56 /// this function takes the attributes discretely vs taking a reference to the 57 /// originating op. 58 quant::UniformQuantizedType fakeQuantAttrsToType(Location loc, unsigned numBits, 59 double rmin, double rmax, 60 bool narrowRange, 61 Type expressedType, 62 bool isSigned = false); 63 64 /// Converts per-channel FakeQuant attributes to the corresponding type. 65 /// In the event that the parameters cannot be converted, returns a nullptr 66 /// convertible Type and issues an appropriate error. 67 quant::UniformQuantizedPerAxisType fakeQuantAttrsToType( 68 Location loc, unsigned numBits, int32_t quantizedDimension, 69 ArrayRef<double> rmins, ArrayRef<double> rmax, bool narrowRange, 70 Type expressedType, bool isSigned = false); 71 } // namespace quantfork 72 } // namespace mlir 73 74 #endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_IR_FAKEQUANTSUPPORT_H_ 75