"use strict";(self.webpackChunkrspress_doc_template=self.webpackChunkrspress_doc_template||[]).push([["78301"],{62044:function(e,n,s){s.r(n);var i=s(85893),a=s(50065);function r(e){let n=Object.assign({h1:"h1",a:"a",p:"p",h2:"h2",pre:"pre",code:"code",span:"span",div:"div",ol:"ol",li:"li",h3:"h3"},(0,a.ah)(),e.components);return(0,i.jsxs)(i.Fragment,{children:[(0,i.jsxs)(n.h1,{id:"quantized-awareness-training-guide",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#quantized-awareness-training-guide",children:"#"}),"Quantized Awareness Training Guide"]}),"\n",(0,i.jsx)(n.p,{children:"The quantized awareness training is performed by inserting some pseudo-quantized nodes into the model,\nso as to minimize the loss of accuracy when the model obtained through quantized awareness training is converted into a fixed-point model.\nThe quantized awareness training is no different from traditional model training in that one can start from scratch, build a pseudo-quantized model, and then train on that pseudo-quantized model.\nDue to the limitations of the deployed hardware platform, it is challenging to understand these limitations and build a pseudo-quantization model based on them.\nThe quantized awareness training tool reduces the challenges of developing quantized models by automatically inserting pseudo-quantization operators into the provided floating-point model based on the limitations of the deployment platform."}),"\n",(0,i.jsx)(n.p,{children:"The quantized awareness training is generally more difficult than the training of pure floating-point models due to the various restrictions imposed.\nThe goal of the quantized awareness training tool is to reduce the difficulty of quantized awareness training and to reduce the engineering difficulty of quantized model deployment."}),"\n",(0,i.jsxs)(n.h2,{id:"process-and-example",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#process-and-example",children:"#"}),"Process and Example"]}),"\n",(0,i.jsx)(n.p,{children:"Although it is not mandatory for a quantized awareness training tool to start with a pre-trained floating-point model,\nexperience has shown that typically starting quantized awareness training with a pre-trained high-precision floating-point model can greatly reduce the difficulty of quantized awareness training."}),"\n",(0,i.jsx)(n.pre,{className:"code",children:(0,i.jsx)(n.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(n.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"from"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"quantization "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"import"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" get_default_qconfig"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# convert the model to QAT state"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"default_qat_8bit_fake_quant_qconfig "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"get_default_qconfig"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    activation_fake_quant"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"fake_quant"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_fake_quant"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"fake_quant"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    activation_observer"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"min_max"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_observer"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"min_max"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    activation_qkwargs"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_qkwargs"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"{"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"qscheme"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:": torch.per_channel_symmetric,"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"ch_axis"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:": "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    },"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"default_qat_8bit_weight_32bit_out_fake_quant_qconfig "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"get_default_qconfig"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    activation_fake_quant"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_fake_quant"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"fake_quant"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    activation_observer"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_observer"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"min_max"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    activation_qkwargs"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    weight_qkwargs"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"{"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"qscheme"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:": torch.per_channel_symmetric,"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"ch_axis"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:": "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"0"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    },"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"qat_model "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"prepare"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    float_model,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    example_input,"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qconfig_setter "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:" horizon.quantization.qconfig_template.default_qat_qconfig_setter,"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"to"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(device)"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# load the quantization parameters in the Calibration model"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"qat_model"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"load_state_dict"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(calib_model."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"state_dict"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"())"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# perform quantized awareness training"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# as a filetune process, quantized awareness training generally requires setting a small learning rate"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"optimizer "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" torch"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"optim"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"SGD"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qat_model."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"parameters"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(), lr"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"0.0001"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:", weight_decay"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"2e-4"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number"}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"for"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" nepoch "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"in"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"range"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(epoch_num):"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# note the method of controlling the training state of the QAT model here"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    qat_model"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"train"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"set_fake_quantize"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(qat_model, FakeQuantState.QAT)"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number"}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"train_one_epoch"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qat_model,"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        nn."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"CrossEntropyLoss"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        optimizer,"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"None"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        train_data_loader,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        device,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number"}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# note the method of controlling the eval state of the QAT model here"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    qat_model"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"eval"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"set_fake_quantize"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(qat_model, FakeQuantState.VALIDATION)"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number"}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# test qat model accuracy"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    top1"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" top5 "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"evaluate"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        qat_model,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        eval_data_loader,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        device,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"print"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"QAT model: evaluation Acc@1 '}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:" Acc@5 "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"format"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"            top1.avg, top5.avg"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        )"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number"}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-comment)"},children:"# test quantized model accuracy"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"qat_hbir_model "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" horizon_plugin_pytorch"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"quantization"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"hbdk4"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"export"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    qat_model. example_input"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"quantized_hbir_model "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" hbdk4"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"compiler"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"convert"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(qat_hbir_model)"})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number"}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"top1"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" top5 "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"evaluate"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    quantized_hbir_model,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    eval_data_loader,"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"print"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"Quantized model: evaluation Acc@1 '}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:" Acc@5 "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"{"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:":.3f"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"}"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-string-expression)"},children:'"'}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"format"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"("})]}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"        top1.avg, top5.avg"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"    )"})}),"\n",(0,i.jsx)(n.span,{className:"line line-number",children:(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:")"})}),"\n"]})})}),"\n",(0,i.jsxs)(n.div,{className:"rspress-directive warning",children:[(0,i.jsx)(n.div,{className:"rspress-directive-title",children:"Attention"}),(0,i.jsx)(n.div,{className:"rspress-directive-content",children:(0,i.jsx)(n.p,{children:"Due to the underlying limitations of the deployment platform, the QAT model cannot fully represent the final on-board accuracy,\nplease make sure to monitor the quantized model accuracy to ensure that the quantized model accuracy is normal, otherwise the model on-board dropout problem may occur."})})]}),"\n",(0,i.jsx)(n.p,{children:"As can be seen from the above sample code, there are two additional steps in quantized awareness training compared to traditional pure floating-point model training:"}),"\n",(0,i.jsxs)(n.ol,{children:["\n",(0,i.jsx)(n.li,{children:"prepare."}),"\n",(0,i.jsx)(n.li,{children:"Load the Calibration model parameters."}),"\n"]}),"\n",(0,i.jsxs)(n.h3,{id:"prepare",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#prepare",children:"#"}),"prepare"]}),"\n",(0,i.jsx)(n.p,{children:"The goal of this step is to transform the floating-point network and insert pseudo-quantized nodes."}),"\n",(0,i.jsxs)(n.h3,{id:"load-the-calibration-model-parameters",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#load-the-calibration-model-parameters",children:"#"}),"Load the Calibration Model Parameters"]}),"\n",(0,i.jsx)(n.p,{children:"A better initialization is obtained by loading the pseudo-quantization parameters obtained from Calibration."}),"\n",(0,i.jsxs)(n.h3,{id:"iterative-training",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#iterative-training",children:"#"}),"Iterative Training"]}),"\n",(0,i.jsx)(n.p,{children:"At this point, the construction of the pseudo-quantized model and the initialization of the parameters are completed,\nand then the regular training iterations and model parameter updates can be performed, and the quantized model accuracy can be monitored."}),"\n",(0,i.jsxs)(n.h2,{id:"pseudo-quantized-operator",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#pseudo-quantized-operator",children:"#"}),"Pseudo-quantized Operator"]}),"\n",(0,i.jsx)(n.p,{children:"The main difference between the quantized awareness training and the traditional floating-point model's training is the insertion of pseudo-quantization operators,\nand, as different quantized awareness training algorithms are also represented by pseudo-quantization operators, here we introduce the pseudo-quantization operators."}),"\n",(0,i.jsxs)(n.div,{className:"rspress-directive info",children:[(0,i.jsx)(n.div,{className:"rspress-directive-title",children:"Note"}),(0,i.jsx)(n.div,{className:"rspress-directive-content",children:(0,i.jsx)(n.p,{children:"\nSince the BPU only supports symmetric quantization, here we take the symmetric quantization as an example."})})]}),"\n",(0,i.jsxs)(n.h3,{id:"pseudo-quantization-process",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#pseudo-quantization-process",children:"#"}),"Pseudo-quantization Process"]}),"\n",(0,i.jsx)(n.p,{children:"Take the int8 quantized awareness training as an example, in general, the pseudo-quantization operator is computed as follows:"}),"\n",(0,i.jsx)(n.p,{children:(0,i.jsx)(n.code,{children:"fake_quant_x = clip(round(x / scale)\uFF0C-128, 127) * scale"})}),"\n",(0,i.jsx)(n.p,{children:"Similar to Conv2d, which optimizes the weight and bias parameters through training, the pseudo-quantization operator needs to be trained to optimize the scale parameter.\nHowever, the gradient of round as a step function is 0, which makes it impossible to train the pseudo-quantization operator by backpropagation of the gradient directly.\nTo solve this problem, there are usually two solutions: a statistical-based approach and a learning-based approach."}),"\n",(0,i.jsxs)(n.h3,{id:"statistical-based-approach",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#statistical-based-approach",children:"#"}),"Statistical-based Approach"]}),"\n",(0,i.jsx)(n.p,{children:"The goal of quantization is to uniformly map the floating point numbers in Tensor to the range [-128, 127] represented by int8 via the scale parameter.\nSince the mapping is uniform, it is easy to see how scale is calculated:"}),"\n",(0,i.jsx)(n.pre,{className:"code",children:(0,i.jsx)(n.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(n.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"def"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"compute_scale"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"("}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-parameter)"},children:"x"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:":"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" Tensor):"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    xmin"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:","}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" xmax "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" x"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"max"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(),"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" maxv "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"="}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" x"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"min"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"return"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"max"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(xmin."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"abs"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"(), xmax."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"abs"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"())"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"/"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-constant)"},children:"256.0"})]}),"\n"]})})}),"\n",(0,i.jsxs)(n.p,{children:["Due to the uneven distribution of data in Tensor and the outlier problem, different methods for calculating xmin and xmax have been developed. See ",(0,i.jsx)(n.code,{children:"MovingAverageMinMaxObserver"})," and so on."]}),"\n",(0,i.jsxs)(n.p,{children:["Please refer to ",(0,i.jsx)(n.code,{children:"default_qat_8bit_fake_quant_qconfig"})," and its related interfaces for the usage in the tool."]}),"\n",(0,i.jsxs)(n.h3,{id:"learning-based-approach",children:[(0,i.jsx)(n.a,{className:"header-anchor","aria-hidden":"true",href:"#learning-based-approach",children:"#"}),"Learning-based Approach"]}),"\n",(0,i.jsx)(n.p,{children:"Although the gradient of round is 0, the researcher found experimentally that in this scenario,\nif the gradient is directly set to 1, the model can also be made to converge to the expected accuracy."}),"\n",(0,i.jsx)(n.pre,{className:"code",children:(0,i.jsx)(n.pre,{className:"shiki css-variables has-line-number",style:{backgroundColor:"var(--shiki-color-background)"},tabIndex:"0",children:(0,i.jsxs)(n.code,{className:"language-python",meta:"",children:[(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"def"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"round_ste"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"("}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-parameter)"},children:"x"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:":"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" Tensor):"})]}),"\n",(0,i.jsxs)(n.span,{className:"line line-number",children:[(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:"    "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"return"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" (x"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"round"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"-"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" x)"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"."}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-function)"},children:"detach"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-punctuation)"},children:"()"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" "}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-token-keyword)"},children:"+"}),(0,i.jsx)(n.span,{style:{color:"var(--shiki-color-text)"},children:" x"})]}),"\n"]})})}),"\n",(0,i.jsxs)(n.p,{children:["Please refer to ",(0,i.jsx)(n.code,{children:"default_qat_8bit_lsq_quant_qconfig"})," and its related interfaces for the usage in the tool."]}),"\n",(0,i.jsxs)(n.p,{children:["If you are interested in learning more, you can refer to the paper ",(0,i.jsx)(n.a,{href:"https://arxiv.org/abs/1902.08153",target:"_blank",rel:"noopener noreferrer",children:"Learned Step Size Quantization"}),"."]})]})}function o(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},{wrapper:n}=Object.assign({},(0,a.ah)(),e.components);return n?(0,i.jsx)(n,Object.assign({},e,{children:(0,i.jsx)(r,e)})):r(e)}n.default=o,o.__RSPRESS_PAGE_META={},o.__RSPRESS_PAGE_META["3.0.17%2Fen%2Fguide%2Fplugin%2Fuser_guide%2Fqat_guide.mdx"]={toc:[{id:"process-and-example",text:"Process and Example",depth:2},{id:"prepare",text:"prepare",depth:3},{id:"load-the-calibration-model-parameters",text:"Load the Calibration Model Parameters",depth:3},{id:"iterative-training",text:"Iterative Training",depth:3},{id:"pseudo-quantized-operator",text:"Pseudo-quantized Operator",depth:2},{id:"pseudo-quantization-process",text:"Pseudo-quantization Process",depth:3},{id:"statistical-based-approach",text:"Statistical-based Approach",depth:3},{id:"learning-based-approach",text:"Learning-based Approach",depth:3}],title:"Quantized Awareness Training Guide",frontmatter:{}}}}]);