
     Ngd#                        d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ	 d dl
Z
d dlmZmZmZmZ d dlmZ ddlmZ ddlmZ  ej        e          Z G d d	          Zd
 Zedk    r e            Zej        re                    ej                   ej        Z ej!        Z"ej#        $                    e"          r(e%                    de" d            e&de" d           e
j'        e           Z( ee(ej)        ej*        ej+                  Z,e,-                                 e,j(        .                    e"d           dS dS )    N)ListTuple)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4   )	ONNXModel)attribute_to_kwargc                       e Zd ZdZdZdZddededefdZe	d	e
e         d
eeef         fd            Zdej        d
ej        fdZdede
e         d
efdZde
e         fdZd ZdS )MatMulBnb4QuantizerzMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typer   r
   Nmodel
quant_type
block_sizec                     |pg }|t           j        t           j        fv sJ t          |          | _        || _        || _        t          |          | _        d S N)	r   FP4NF4r   r   r   r   setnodes_to_exclude)selfr   r   r   r   s        j/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/quantization/matmul_bnb4_quantizer.py__init__zMatMulBnb4Quantizer.__init__&   s_    +1r157J7NOOOOOu%%
$$ #$4 5 5    
graph_pathreturnc                     t          t          |          dz
  dd          D ]'}||         }|j        D ]}|j        | k    r||fc c S (dS )Nr
   )NN)rangeleninitializername)r#   r   gidgraphtensors        r   __get_initializerz%MatMulBnb4Quantizer.__get_initializer.   sw    Z1,b"55 	) 	)CsOE+ ) );$&&!5=((((( ') zr   fpweightc           	         t          |j                  dk    rt          d          |                                                                }|j        \  }}||z  }| j        }||z   dz
  |z  }|dz   dz  }t          j        |d          }	t          j        ||j                  }
t          |	||
|| j
        ||           |	|
fS )z4b quantize fp32/fp16 weight   z9Current bnb4 block quantization only supports 2D tensors!r
   uint8)dtype)r!   shape
ValueError	transposecopyr   npzerosr,   r	   r   )r   r(   
fpweight_trowscolsnumelr   
num_blocksquantized_numelpackedabsmaxs              r   bnb4_block_quantz$MatMulBnb4Quantizer.bnb4_block_quant7   s     x~!##XYYY ''))..00
^
dt_
j(1,;
 19*/999*HN;;;VZT_VZ\`aaar   nodegraph_stackc                    |j         dk    r|S t                              d|j         d           |j        | j        v r%t                              d|j         d           |S |j        d         }t                              ||          \  }}|t                              d           |S t          j	        
                    |          }t          |j                  d	k    rt                              d
           |S |                     |          \  }}t          j	                            |          }	|j        dz   |	_        |j        D ])}
|
j        |k    r|j                            |
            n*t          j	                            |          }|j        dz   |_        |j                            |	|g           i }|j        \  }}||d<   ||d<   | j        |d<   | j        |d<   t          j        j        	 d|j        d         |	j        |j        g|j        d         g|j        r
|j        dz   nddd|}t                              d|j         d           |S )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...r
   Nz2MatMul doesn't have const weight. Skip to quantizer*   z)MatMul weight is not 2D. Skip to quantize_Bnb4_absmaxKNr   r   
MatMulBnb4r    com.microsoft)inputsoutputsr#   domainzcomplete quantization of )rD   )op_typeloggerdebugr#   r   inputr   %_MatMulBnb4Quantizer__get_initializeronnxnumpy_helperto_arrayr!   r-   r;   
from_arrayremover"   extendr   r   helper	make_nodeoutput)r   r<   r=   inputBBBs_graphB_arrayr9   r:   B_quantrM   absmax_tensorkwargsr4   r5   matmul_bnb4_nodes                   r   _bnb4_matmul_node_weightz,MatMulBnb4Quantizer._bnb4_matmul_node_weightM   ss    <8##K9$)999:::9---LL_	___```KA);;FKPP89LLMNNNK#,,Q//w}""LLDEEEK..w77#..v66v'^ 	 	EzV##%%e,,, $ )44V<<Vi/##Wm$<===]
dss#|#|;0
JqM7<1CD[^$(,	9W$$r"
 
 
 
 	@@@@AAAr   c                    g }|d         }|j         D ]k}d |j        D             }t          |          ri }|j        D ]}|j        t          j        j        k    r7|                    |j                   |j	        | 
                    |          i}n}|j        t          j        j        k    rTg }	|j        D ]@}
|                    |
           |	                    | 
                    |          g           A|j	        |	i}nt          |          }|                    |           t	          j        j        |j        |j        |j        fd|j	        i|}|                    |                     ||                     m|                    d           |j                             |           |                                 |S )Nr   c                 z    g | ]8}|j         t          j        j        k    s|j         t          j        j        k    6|9S  )typerO   AttributeProtoGRAPHGRAPHS).0attrs     r   
<listcomp>z9MatMulBnb4Quantizer._process_subgraph.<locals>.<listcomp>   sG       9 3 999TY$J]Jd=d=d =d=d=dr   r#   r<   )r<   	attributer!   rd   rO   re   rf   appendgr#   _process_subgraphrg   graphsrT   r   updaterU   rV   rJ   rM   rW   r`   
ClearFieldpop)r   r=   	new_nodesr%   r<   graph_attrsr^   ri   kvvaluesubgraphs              r   rn   z%MatMulBnb4Quantizer._process_subgraph   s   	BJ 	O 	OD  N  K
 ;  N & &DyD$7$===#**46222"i)?)?)L)LMd&9&@@@ "(, P PH'..x888!LL$*@*@*M*M)NOOOO"i//55MM"%%%%{,L$*dk @D	MS  T::4MMNNNN   
)$$$r   c                 \   | j                                         g}| j                                         }d}|D ]}|j        dk    rd}|s4|                    t
          j                            dd          g           |                     |           | j         	                                 d S )NFrF   Tr
   )
r   r%   opset_importrI   rT   rO   rU   make_opsetidrn   clean_initializers)r   r=   ry   has_ms_domainopsets        r   processzMatMulBnb4Quantizer.process   s    z''))*z..00! 	% 	%E|.. $ 	P!9!9/1!M!M NOOO{+++
%%'''''r   r   )__name__
__module____qualname____doc__r   r   r   intr   staticmethodr   r   r   r   rN   npt	ArrayLiker1   ndarrayr;   r   r`   rn   r~   rc   r   r   r   r      s       WW C C6 6j 6c 6s 6 6 6 6 D,< {T^G^A_    \   2:        ,5 Y 5 T*EU 5 Zc 5  5  5  5 n$T*-= $ $ $ $L( ( ( ( (r   r   c                     t          j        d          } |                     ddd           |                     ddd           |                     d	d
dt          j        t          j        gd           |                     dd
dd           |                     ddd
d           |                     d
           |                     ddt          d
g d           |                                 S )Na  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFr
   z&Quantization data type. 0: FP4, 1: NF4)r   defaultchoicesr   z--block_size@   zVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r   r   r   z-vz	--verbose
store_true)r   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrd   r   r   r   )	argparseArgumentParseradd_argumentr   r   r   set_defaultsstr
parse_args)parsers    r   r   r      s5   $  F $=[\\\
(4>]^^^
$(*=*AB5     e	     kE,OOO
&&&
Q     r   __main__zfile z already exists)r   T)/r   loggingostypingr   r   numpyr1   numpy.typingr   rO   onnx.onnx_pbr   r   r   r   onnxruntime.capi._pybind_stater	   
onnx_modelr   quant_utilsr   	getLoggerr   rK   r   r   argsr   setLevelDEBUGinput_modelinput_model_pathoutput_modeloutput_model_pathpathexistserror	Exceptionloadr   r   r   r   quantr~   save_model_to_filerc   r   r   <module>r      s     				                    G G G G G G G G G G G G ? ? ? ? ? ? ! ! ! ! ! ! + + + + + +		8	$	$^( ^( ^( ^( ^( ^( ^( ^(B$ $ $N z:<<D| '&&&')	w~~'(( D?.???@@@iB 1BBBCCCDI&''EtZ^ZopppE	MMOOO	K""#4d;;;;; r   