1 #pragma once 2 3 #include <c10/macros/Export.h> 4 #include <string> 5 6 namespace torch::jit::mobile { 7 class Module; 8 namespace quantization { 9 /* 10 * Device side PTQ API. 11 * Once the model has been prepared for quantization on server side, such model 12 * is sent to device. On device side the model is further trained. At the end of 13 * the training, before the model is readied for inference, we need to quantize 14 * the model. 15 * Usage of this API is as follows. 16 * PTQQuanizationHelper ptq_helper; 17 * ptq_helper.quantize_dynamic(m, "forward"); 18 * Args: 19 * m: Captured by reference, an instance of mobile::Module. This module will be 20 * mutated in place to replace its <method_name> method with quantized 21 * equivalent. method:name: Name of the method to be quantized. AOT preparation 22 * for quantization must also have been done for this method. Returns: In place 23 * mutated `m` whose size should be smaller due to weight quantization and whose 24 * <method_name> method should use quantized ops 25 */ 26 class TORCH_API PTQQuanizationHelper { 27 public: 28 PTQQuanizationHelper() = default; 29 void quantize_dynamic( 30 torch::jit::mobile::Module& m, 31 const std::string& method_name); 32 }; 33 } // namespace quantization 34 } // namespace torch::jit::mobile 35