
    gxu                        d dl Z d dlZd dlZd dlmZ d dlmZ ddlmZ  ej	        e
          Z G d dej                  Z G d d	ej                  Z G d
 dej                  Z G d dej                  Z G d dej                  Z G d dej                  Zd dZd dZd dZ G d de          Z G d de          Z G d de          Zd!dZ G d de          ZdS )"    N)nn)Function   )loggingc                   >     e Zd ZdZ	 	 	 	 	 	 	 	 	 d	 fd	Zd
dZ xZS )QuantEmbeddinga  
    Quantized version of `torch.nn.Embedding`. Adds quantization-specific arguments on top of `torch.nn.Embedding`.

    Args:
        weight_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the quantized weight.
        momentum (`float`, *optional*, defaults to `0.95`):
            Momentum for updating the activation quantization range.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    N       @F   ffffff?c                    t                                                       || _        || _        || _        || _        || _        || _        || _        t          j
        t          j        ||g                    | _        |                     dt          j        d                     |                     dt          j        | j                             |	| _        |
| _        || _        d| _        t(          j        | _        d S )Nweight_scaling_factor   weight_integerF)super__init__num_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparser   	Parametertorchzerosweightregister_buffer
zeros_like
weight_bitmomentum
quant_modepercentile_modeSymmetricQuantFunctionapplyweight_function)selfnum_embeddingsembedding_dimr   r   r   r   r   _weightr   r    r!   	__class__s               c/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/ibert/quant_modules.pyr   zQuantEmbedding.__init__,   s     	"	 & ""4l5;/N#O#OPP4ek!nnEEE-u/?/L/LMMM$ $$5;    c           	         | j         sEt          j                            || j        | j        | j        | j        | j        | j	                  d fS | j        }|j
                                        }|                                                    d          }|                                                    d          }t          | j        ||d          | _        |                     | j        | j        | j        | j                  | _        t          j                            || j        | j        | j        | j        | j        | j	                  }|| j        z  | j        fS )Nr   F)r!   r   
functional	embeddingr   r   r   r   r   r   datadetachminexpandmax$symmetric_linear_quantization_paramsr   r   r%   r"   r   )	r&   x	positionsincremental_stateww_transformw_minw_maxemb_ints	            r+   forwardzQuantEmbedding.forwardM   s@    	''K$MN+K    Kfmmoo!!((++!!((++%I$/[`bgin%o%o""22K$*>@Z
 
 -))MN#K
 
 33T5OOOr,   )	NNr	   FFNr
   r   FNN)__name__
__module____qualname____doc__r   r>   __classcell__r*   s   @r+   r   r      s        
 
   < < < < < <B"P "P "P "P "P "P "P "Pr,   r   c                   <     e Zd ZdZd fd	Zd Z	 	 	 	 	 d	dZ xZS )
QuantActap  
    Quantizes the given activation.

    Args:
        activation_bit (`int`):
            Bitwidth for the quantized activation.
        act_range_momentum (`float`, *optional*, defaults to `0.95`):
            Momentum for updating the activation quantization range.
        per_channel (`bool`, *optional*, defaults to `False`):
            Whether to or not use channel-wise quantization.
        channel_len (`int`, *optional*):
            Specify the channel length when set the *per_channel* True.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    r   FNc                    t                                                       || _        || _        || _        || _        d| _        t          j        | _	        | j        s| 
                    dt          j        d                     | 
                    dt          j        d                     | 
                    dt          j        d                     | xj        dz  c_        | xj        dz  c_        d S t          d          )NFx_minr   x_maxact_scaling_factorgh㈵>;per-channel mode is not currently supported for activation.)r   r   activation_bitact_range_momentumr!   per_channel
percentiler#   r$   act_functionr   r   r   rI   rJ   NotImplementedError)r&   rM   rN   rO   channel_lenr!   r*   s         r+   r   zQuantAct.__init__   s    ,"4$&28 	e  %+a..999  %+a..999  !5u{1~~FFFJJ$JJJJ$JJJJ%&cdddr,   c           
          | j         j         d| j         d| j         d| j                                        dd| j                                        dd
S )Nz(activation_bit=z, quant_mode: z, Act_min: z.2fz, Act_max: ))r*   r@   rM   r!   rI   itemrJ   )r&   s    r+   __repr__zQuantAct.__repr__   st    ~& 1 18K 1 1?1 17;z7H7HP1 1
))01 1 1	
r,   c                    ||n||z   }| j         r| j        r
J d            | j        r
J d            |j                                        }|j                                        }	|	                                                                dk    r*|                                                                dk    s
J d            | j                                        dk    r<| j	                                        dk     r| j        |z   | _        | j	        |	z   | _	        n| j
        dk    r?t          j        | j        |          | _        t          j        | j	        |	          | _	        nD| j        | j
        z  |d| j
        z
  z  z   | _        | j	        | j
        z  |	d| j
        z
  z  z   | _	        | j        s|d fS || j        n|}|| j	        n|}	t          | j        ||	| j        	          | _        |(|                     || j        | j        | j                  }
n)t"                              ||| j        | j        ||          }
| j                            d          }|
|z  | j        fS )
Nz:percentile mode is not currently supported for activation.rL   r   z5NaN detected when computing min/max of the activationg&|g&|>r   )rO   )trainingrP   rO   r0   r2   r4   isnansumrI   rJ   rN   r   r!   r5   rM   rK   rQ   FixedPointMulr$   view)r&   r6   pre_act_scaling_factoridentityidentity_scaling_factorspecified_minspecified_maxx_actrI   rJ   quant_act_intcorrect_output_scales               r+   r>   zQuantAct.forward   st    %8a<= 	jdd(dddd'ff)ffffJNN$$EJNN$$E !!##q((U[[]]->->-@-@A-E-E-EF .F-E-E z~~'))djnn.>.>.G.G!Z%/
!Z%/

 (B.."Ytz599
"Ytz599

!Z$*AAEQQUQhMhDii
!Z$*AAEQQUQhMhDii
 	$;+3

+3

"F4;K#
 #
 #
 ") --a1DdoW[WnooMM)//&#'' M  $6;;B??33T5LLLr,   )r   FNF)NNNNNr@   rA   rB   rC   r   rW   r>   rD   rE   s   @r+   rG   rG   r   s          e e e e e e&
 
 
  $ $<M <M <M <M <M <M <M <Mr,   rG   c                   8     e Zd ZdZ	 d
 fd	Z fdZdd	Z xZS )QuantLineara8  
    Quantized version of `torch.nn.Linear`. Adds quantization-specific arguments on top of `torch.nn.Linear`.

    Args:
        weight_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the quantized weight.
        bias_bit (`int`, *optional*, defaults to `32`):
            Bitwidth for the quantized bias.
        per_channel (`bool`, *optional*, defaults to `False`):
            Whether or not to use channel-wise quantization.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    Tr
       Fc                    t                                                       || _        || _        t	          j        t          j        ||g                    | _        | 	                    dt          j
        | j                             | 	                    dt          j        | j                             |rXt	          j        t          j        |                    | _        | 	                    dt          j
        | j                             || _        || _        || _        || _        || _        d| _        t"          j        | _        d S )Nr   fc_scaling_factorbias_integerF)r   r   in_featuresout_featuresr   r   r   r   r   r   r   biasr   r!   rO   bias_bitr"   r#   r$   r%   )	r&   rn   ro   rp   r   rq   rO   r!   r*   s	           r+   r   zQuantLinear.__init__   s    	&(l5;k/J#K#KLL-u/?/L/LMMM0%+d>O2P2PQQQ 	NU[%>%>??DI  1A$)1L1LMMM$$& $$5;r,   c                 t    t                                                      }d| d| j         d| j         d}|S )N(z weight_bit=z, quant_mode=rU   )r   rW   r   r!   )r&   sr*   s     r+   rW   zQuantLinear.__repr__  sA    GGOOOtOOT_OOOr,   Nc                    | j         s.t          j                            || j        | j                  d fS ||j        dk    s
J d            | j        }|j                                        }| j	        r5t          j        |dd           \  }}t          j        |dd           \  }}nN|                                                    d          }|                                                    d          }t          | j        ||| j	                  | _        |                     | j        | j        | j        | j                  | _        | j        |z  }| j        '|                     | j        | j        d|          | _        |                    dd          }||z  }	t          j                            |	| j        | j                  |z  |fS )N)r   rp   )r   zInput activation to the QuantLinear layer should be globally (non-channel-wise) quantized. Please add a QuantAct layer with `per_channel = True` before this QuantAct layerr   )r   outFrY   )r!   r   r.   linearr   rp   shaper0   r1   rO   r   r2   r4   r3   r5   r   rl   r%   r"   r   rq   rm   r^   )
r&   r6   prev_act_scaling_factorr9   r:   r;   _r<   bias_scaling_factorx_ints
             r+   r>   zQuantLinear.forward  s    	U=''$+DI'NNPTTT '27N7TX\7\7\7\_ 8]7\7\
 Kfmmoo 	0y!>>>HE1y!>>>HE11OO%%,,Q//EOO%%,,Q//E!EdoW\^ceieu!v!v"22K$*>@V
 
 #47NN9  $ 4 4TYuVi j jD"9">">q""E"E++ M  t/BIZ [[^qq
 	
r,   )Tr
   rj   FFNrg   rE   s   @r+   ri   ri      s{          ns< < < < < <,    
#
 #
 #
 #
 #
 #
 #
 #
r,   ri   c                   2     e Zd ZdZd fd	Zd Zd	dZ xZS )
IntGELUa}  
    Quantized version of `torch.nn.GELU`. Adds quantization-specific arguments on top of `torch.nn.GELU`.

    Args:
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "gelu" or "nonlinear" is given.
    Tnonec                 L   t                                                       || _        |dv r!t                              d           d| _        | j        st          j                    | _        d| _        d| _	        g d| _
        | j
        dxx         | j
        d         z  cc<   d S )	N)	nonlineargeluzForce dequantize geluFg-?   )g]m{ҿgMr      r   )r   r   r!   loggerinfor   GELUactivation_fnkconstcoeff)r&   r!   force_dequantr*   s      r+   r   zIntGELU.__init__7  s    $111KK/000#DO 	+!#D
)))

1A&r,   c                    t          j        | j        d         |z            }t          j        | j        d         |dz  z            }t          j        |          }t          j        t          j        |          |           }|||z   dz  |z   z  }|dz  | j        d         z  }t                              |d| j        z  z            }|d| j        z  z  }||fS Nr   r   r   )	r   floorr   signr2   abs	floor_ster$   r   )r&   r|   scaling_factorb_intc_intr   abs_inty_ints           r+   int_erfzIntGELU.int_erfG  s    DJqMN:;;DJqMNA,==>>z%  )EIe,,uf555Q.67'*TZ]: 4: 566'!TZ-7n$$r,   Nc                     | j         s|                     |          d fS ||z  }|                     ||| j        z            \  }}d|z  }|||z   z  }||z  dz  }||z  |fS )N      ?r   )r!   r   r   r   )r&   r6   r   r|   sigmoid_intsigmoid_scaling_factor	shift_ints          r+   r>   zIntGELU.forwardV  s     	/%%a(($..N".2ll5.SWSYBY.Z.Z++11	y01'*@@1D~%~55r,   )Tr   r}   )r@   rA   rB   rC   r   r   r>   rD   rE   s   @r+   r   r   ,  sj         ' ' ' ' ' ' % % %6 6 6 6 6 6 6 6r,   r   c                   6     e Zd ZdZd fd	Zd Zd Zd Z xZS )	
IntSoftmaxa  
    Quantized version of `torch.nn.Softmax`. Adds quantization-specific arguments on top of `torch.nn.Softmax`.

    Args:
        output_bit (`int`):
            Bitwidth for the layer output activation.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "softmax" or "nonlinear" is given.
    Fr   c                    t                                                       || _        d| _        || _        |dv r!t
                              d           d| _        t          d| j                  | _        d| _	        d| _
        g d	| _        | j        d
xx         | j        d         z  cc<   | j        dxx         | j        d         z  cc<   d S )Nrj   )r   softmaxzForce dequantize softmaxF   r!   gvq-   )gN$?g'|:?r   r   r   r   )r   r   
output_bitmax_bitr!   r   r   rG   actx0r   coef)r&   r   r!   r   r*   s       r+   r   zIntSoftmax.__init__r  s    $$444KK2333#DOB4?;;;
111		!	!$	!	!$r,   c                 *   t          j                    5  t          j        | j        d         |z            }t          j        | j        d         |dz  z            }d d d            n# 1 swxY w Y   ||z   |z  |z   }| j        d         |dz  z  }||fS r   )r   no_gradr   r   )r&   r|   r   r   r   zs         r+   int_polynomialzIntSoftmax.int_polynomial  s    ]__ 	B 	BK	!~ =>>EK	!~q/@ @AAE	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B U]e#e+1(99.  s   AA((A,/A,c                    t          j                    5  t          j        | j        |z            }d d d            n# 1 swxY w Y   t          j        || j        |z            }t                              ||z            }|||z  z
  }|                     ||          \  }}t          j	        t                              |d| j        |z
  z  z            d          }|d| j        z  z  }||fS )Nr   r   r2   )
r   r   r   r   r4   r   r   r$   r   clamp)r&   r|   r   x0_intqrexp_intexp_scaling_factors           r+   int_expzIntSoftmax.int_exp  s   ]__ 	; 	;[>!9::F	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	;	%f!455OOEFN++FQJ&*&9&9!^&L&L##+ioogdj1n8M.MNNTUVVV+am;&&s   =AAc                    | j         s#t          j                            |d          d fS ||z  }|                    dd          \  }}||z
  }|                     ||          \  }}|                     ||          \  }}||z  }|                    dd          }	t          	                    d| j
        z  |	z            }
t          	                    ||
z  d| j
        | j        z
  z  z            }dd| j        z  z  }||z  |fS )NrY   r   T)r   keepdimr   r   )r!   r   r.   r   r4   r   r   r\   r   r$   r   r   )r&   r6   r   r|   	x_int_maxrz   r   r   expexp_int_sumfactors              r+   r>   zIntSoftmax.forward  s    	:=(((33T99N"yyRy66	1	!&*ll5.&I&I## #'((74F"G"G**kkb$k77DL;!>??//'F"2Q4<$/;Y5Z"Z[[Q//'77r,   )Fr   )	r@   rA   rB   rC   r   r   r   r>   rD   rE   s   @r+   r   r   e  st        
 
% % % % % %"! ! !
' 
' 
'8 8 8 8 8 8 8r,   r   c                   8     e Zd ZdZd
 fd	Zd Zd Zdd	Z xZS )IntLayerNorma  
    Quantized version of `torch.nn.LayerNorm`. Adds quantization-specific arguments on top of `torch.nn.LayerNorm`.

    Args:
        output_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the layer output activation.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "layernorm" or "nonlinear" is given.
    r
   Fr   c                 "   t                                                       || _        || _        t	          j        t          j        |                    | _        t	          j        t          j        |                    | _	        || _
        |dv r!t                              d           d| _
        |                     dt          j        d                     || _        d| _        d | _        t#          | j        | j
                  | _        d S )N)r   	layernormzForce dequantize layernormFshiftr   rj   r   )r   r   normalized_shapeepsr   r   r   r   r   rp   r!   r   r   r   r   r   dim_sqrtrG   
activation)r&   r   r   r   r!   r   r*   s         r+   r   zIntLayerNorm.__init__  s     0l5;/?#@#@AAL-=!>!>??	$666KK4555#DOWek!nn555$"4?tOOOr,   c           	      
   t          j                    5  |dz  }t          j        |dd          }t          j        t          j        |d| j        z  z                                                                                      }| j        }t          j        | j        |          | _        t          
                    dt          |           dt          | j                              d d d            d S # 1 swxY w Y   d S )Nr   Taxisr   zDynamic shift adjustment: z -> )r   r   r\   log2sqrtr   ceilr4   r   r   r   int)r&   r   y_sq_intvar_intr   	shift_olds         r+   	set_shiftzIntLayerNorm.set_shift  s.   ]__ 	\ 	\axHiq$???GZ
7Q_+D E EFFKKMMRRTTE
I4:u55DJKKZS^^ZZTZZZ[[[	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\ 	\s   CC88C<?C<c                     |                      |           t                              |d| j        z  z            }|dz  }t	          j        |dd          }|S )z
        This fallback function is called when overflow is detected during training time, and adjusts the `self.shift`
        to avoid overflow in the subsequent runs.
        r   Tr   )r   r   r$   r   r   r\   )r&   r   y_int_shiftedr   r   s        r+   overflow_fallbackzIntLayerNorm.overflow_fallback  sW    
 	u!4:(=>> !#)H1d;;;r,   Nc                 R   | j         sk|                    dd          }||z
  }t          j        |dz  dd          }|t          j        | j        |z             z  }|| j        z  | j        z   }|d fS | j        \t          j        |j	        d         t          j
                  }t          j        |                              |j                  | _        ||z  }t                              |                    dd                    }||z
  }	t                              |	d| j        z  z            }
|
dz  }t          j        |dd          }| j        rb|                                d| j        z  k    rB|                     |	          }|                                d| j        z  dz   k     s
J d            t                              t          j        |                    d| j        z  z  }t                              d|z            }t                              |	|z  dz            }	| j        dz  }| j        j                                        | j        j                                        z  }t                              ||z            }|	|z   }	|| j        z  }|	|z  }||fS )	Nr   Tr   )dtypeg?zfError detected in overflow handling: `var_int` exceeds `self.max_bit` (the maximum possible bit width)l        i   @)r!   meanr   r   r   r   rp   r   tensorrx   floattodevice	round_ster$   r   r   r\   rZ   r4   r   r   r0   r1   )r&   r6   r   r   yvarnr|   mean_intr   r   r   r   std_intr   rp   bias_ints                    r+   r>   zIntLayerNorm.forward  sl    	66q$6//DDA*QT4888CEJtx#~...ADK$)+Ad7N = QWQZu{;;;A!JqMM,,QX66DM N"??5::1d:#C#CDD !4:(=>> !#)H1d;;; = 	{{}}4<//0077{{}}q$,'<<<<X =<< //%*W"5"566DJF11 233. y~$$&&$+*:*A*A*C*CD??4.#899 '$+5N".  r,   )r
   Fr   r}   )	r@   rA   rB   rC   r   r   r   r>   rD   rE   s   @r+   r   r     s        
 
P P P P P P&\ \ \	 	 	.! .! .! .! .! .! .! .!r,   r   Fc                 Z   | j         d         }t          |d|dz  z
  z            }t          ||z  dz            }t          j        | |          j        }|dk    r|dz  }nt          j        |  |          j         }|s(|                                }|                                }||fS )a  
    Calculate the percentile max and min values in a given tensor

    Args:
        input (`torch.Tensor`):
            The target tensor to calculate percentile max and min.
        lower_percentile (`float`):
            If 0.1, means we return the value of the smallest 0.1% value in the tensor as percentile min.
        upper_percentile (`float`):
            If 99.9, means we return the value of the largest 0.1% value in the tensor as percentile max.
        output_tensor (`bool`, *optional*, defaults to `False`):
            If True, this function returns tensors, otherwise it returns values.

    Returns:
        `Tuple(torch.Tensor, torch.Tensor)`: Percentile min and max value of *input*
    r   r   g{Gz?)r   )rx   roundr   kthvaluevaluesrV   )	inputlower_percentileupper_percentileoutput_tensorinput_lengthlower_indexupper_indexupper_boundlower_bounds	            r+   get_percentile_min_maxr     s    " ;q>L,<t,C(CDEEK'77$>??K.+666=K1!Ao ~uf<<<CC )!&&((!&&((##r,   c                 .   t          | j                  dk    r1|                    dddd          }|                    dddd          }not          | j                  dk    r-|                    dd          }|                    dd          }n*|                    d          }|                    d          }|r?|                     d|z                                |                                           | S t          j        d|z  | z  |z             S )a?  
    Quantize single-precision input tensor to integers with the given scaling factor and zeropoint.

    Args:
        input (`torch.Tensor`):
            Single-precision input tensor to be quantized.
        scale (`torch.Tensor`):
            Scaling factor for quantization.
        zero_pint (`torch.Tensor`):
            Shift for quantization.
        inplace (`bool`, *optional*, defaults to `False`):
            Whether to compute inplace or not.

    Returns:
        `torch.Tensor`: Linearly quantized value of *input* according to *scale* and *zero_point*.
       rY   r   r   r   )lenrx   r^   mul_add_round_r   r   )r   scale
zero_pointinplaces       r+   linear_quantizer   5  s   $ 5;1

2q!Q''__RAq11

	U[		Q		

2q!!__R++



2__R((
 

3;$$Z0077999;sU{U*Z7888r,   c                    t          j                    5  d| dz
  z  dz
  }|rmt          j        t          j        |                                |                                gd          d          \  }}t          j        |d          |z  }nMt          |                                |                                          }t          j        |d          |z  }ddd           n# 1 swxY w Y   |S )a/  
    Compute the scaling factor with the given quantization range for symmetric quantization.

    Args:
        saturation_min (`torch.Tensor`):
            Lower bound for quantization range.
        saturation_max (`torch.Tensor`):
            Upper bound for quantization range.
        per_channel (`bool`, *optional*, defaults to `False`):
            Whether to or not use channel-wise quantization.

    Returns:
        `torch.Tensor`: Scaling factor that linearly quantizes the given range between *saturation_min* and
        *saturation_max*.
    r   r   r   g:0yE>r   N)r   r   r4   stackr   r   )num_bitssaturation_minsaturation_maxrO   r   r   rz   s          r+   r5   r5   X  s*   $ 
 	5 	5(Q,!# 	5yn.@.@.B.BNDVDVDXDX-Y_`!a!a!aghiiiHE1K400014EE **,,n.@.@.B.BCCEK400014E	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 	5 Ls   CC((C,/C,c                   >    e Zd ZdZed             Zed             ZdS )r#   zw
    Class to quantize the given floating-point values using symmetric quantization with given range and bitwidth.
    c                     t          j        d                              |j                  }d|dz
  z  dz
  }t	          |||d          }t          j        || |dz
            }|| _        |S )a6  
        Args:
            x (`torch.Tensor`):
                Floating point tensor to be quantized.
            k (`int`):
                Quantization bitwidth.
            percentile_mode (`bool`):
                Whether or not to use percentile calibration.
            scale (`torch.Tensor`):
                Pre-calculated scaling factor for *x*. Note that the current implementation of SymmetricQuantFunction
                requires pre-calculated scaling factor.

        Returns:
            `torch.Tensor`: Symmetric-quantized value of *input*.
        g        r   r   F)r   )r   r   r   r   r   r   r   )ctxr6   r   r"   r   r   r   new_quant_xs           r+   r>   zSymmetricQuantFunction.forward}  sq    " \#&&))%,77
!a%L1%a
EJJJk+r1q599	r,   c                 2   | j         }t          |j                  dk    r|                    dddd          }nDt          |j                  dk    r|                    dd          }n|                    d          }|                                |z  d d d d fS )Nr   rY   r   r   )r   r   rx   r^   clone)r  grad_outputr   s      r+   backwardzSymmetricQuantFunction.backward  s    	{ !!Q&&JJr1a++EE"##q((JJr1%%EEJJrNNE  ""U*D$dBBr,   Nr@   rA   rB   rC   staticmethodr>   r   r,   r+   r#   r#   x  sY            \2 
C 
C \
C 
C 
Cr,   r#   c                   >    e Zd ZdZed             Zed             ZdS )r   z;
    Straight-through Estimator(STE) for torch.floor()
    c                 *    t          j        |          S r}   )r   r   r  r6   s     r+   r>   zfloor_ste.forward      {1~~r,   c                 *    |                                 S r}   r  r  r  s     r+   r  zfloor_ste.backward        """r,   Nr	  r  r,   r+   r   r     T            \ # # \# # #r,   r   c                   >    e Zd ZdZed             Zed             ZdS )r   z;
    Straight-through Estimator(STE) for torch.round()
    c                 *    t          j        |          S r}   )r   r   r  s     r+   r>   zround_ste.forward  r  r,   c                 *    |                                 S r}   r  r  s     r+   r  zround_ste.backward  r  r,   Nr	  r  r,   r+   r   r     r  r,   r      c                    |                                  }|                     d          } t          j        |                                                                           \  }}g }|D ]o}t          t          j        |d|z  z            	                    t          j        d          t          j
                            }|                    |           pt          j        |          }t          |          |z
  }t          j        |                              | j                                      |          t          j        |                              | j                                      |          fS )z
    Decompose the scaling factor into mantissa and twos exponent.

    Args:
        scaling_factor (`torch.Tensor`):
            Target scaling factor to decompose.

    Returns:
        ``Tuple(torch.Tensor, torch.Tensor)`: mantisa and exponent
    rY   r   1)rounding)sizer^   npfrexpcpunumpyr   decimalDecimalquantizeROUND_HALF_UPappendarrayr   r   
from_numpyr   r   )inputsr   shape_of_inputoutput_moutput_etmp_mmint_m_shifteds           r+   batch_frexpr/    s<    [[]]N [[__F&**,,"4"4"6"677HhE $ $OAG,--66ws7K7KV]Vk6ll
 
 	]####xHW~~(H 	""%%fm4499.II""%%fm4499.II r,   c                   D    e Zd ZdZe	 	 dd            Zed             ZdS )r]   aQ  
    Function to perform fixed-point arithmetic that can match integer arithmetic on hardware.

    Args:
        pre_act (`torch.Tensor`):
            Input tensor.
        pre_act_scaling_factor (`torch.Tensor`):
            Scaling factor of the input tensor *pre_act*.
        bit_num (`int`):
            Quantization bitwidth.
        z_scaling_factor (`torch.Tensor`):
            Scaling factor of the output tensor.
        identity (`torch.Tensor`, *optional*):
            Identity tensor, if exists.
        identity_scaling_factor (`torch.Tensor`, *optional*):
            Scaling factor of the identity tensor *identity*, if exists.

    Returns:
        `torch.Tensor`: Output tensor(*pre_act* if *identity* is not given, otherwise the addition of *pre_act* and
        *identity*), whose scale is rescaled to *z_scaling_factor*.
    Nc                 ,   t          |j                  dk    rd }nd }|| _        d|dz
  z  dz
  }t          j                    5   ||          }| ||          }|| _        t          j        ||z            }	|                    t          j                  }
|                    t          j	                                      t          j                  }|
|z  } ||          }t          |          \  }}|	                    t          j                  |                    t          j                  z  }t          j        |d|z  z            }|t          j        ||z            }|                    t          j                  }
|                    t          j	                                      t          j                  }|
|z  } ||          }t          |          \  }}|                    t          j                  |                    t          j                  z  }t          j        |d|z  z            }||z   }t          j        |                    t          j	                  | dz
  |          cd d d            S # 1 swxY w Y   d S )Nr   c                     | S r}   r  r6   s    r+   <lambda>z'FixedPointMul.forward.<locals>.<lambda>  s     r,   c                 0    |                      ddd          S )Nr   rY   )r^   r3  s    r+   r4  z'FixedPointMul.forward.<locals>.<lambda>  s    q!R 0 0 r,   r   r   r	   )r   rx   r`   r   r   z_scaling_factorr   typedoubler   r/  r   )r  pre_actr_   bit_numr6  r`   ra   reshaper   z_int_A_B	new_scaler-  eoutputwx_intm1e1output1s                       r+   r>   zFixedPointMul.forward  s    %+,,11!kGG00G'A+"]__ !	D !	D%,W-C%D%D"#*1'2I*J*J'#3C K*@ @AAE',,U\::B"''44::5<HHBRI	**Iy))DAqZZ--u|0D0DDF[36!233F#X0G%GHH,11%,??&++EK88>>u|LLG	#GI..	$Y//B ++el33bggel6K6KK+gb&9:: 6);v{{5;77!aCCC!	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	D !	Ds   H7J		JJc                     d }| j         |                                | j        z  }|                                | j        z  d d d d |d fS r}   )r`   r  r6  )r  r  identity_grads      r+   r  zFixedPointMul.backward/  sS    <#'--//#2FFM  ""S%994tTS`bfffr,   r?   r	  r  r,   r+   r]   r]     sk         ,   $2D 2D 2D \2Dh g g \g g gr,   r]   )F)r  )r!  r   r  r   r   torch.autogradr   utilsr   
get_loggerr@   r   Moduler   rG   ri   r   r   r   r   r   r5   r#   r   r   r/  r]   r  r,   r+   <module>rL     s  $             # # # # # #       
	H	%	%PP PP PP PP PPRY PP PP PPfgM gM gM gM gMry gM gM gMTM
 M
 M
 M
 M
") M
 M
 M
`66 66 66 66 66bi 66 66 66rD8 D8 D8 D8 D8 D8 D8 D8Nb! b! b! b! b!29 b! b! b!J!$ !$ !$ !$H 9  9  9  9F   @*C *C *C *C *CX *C *C *CZ# # # # # # # ## # # # # # # #   DQg Qg Qg Qg QgH Qg Qg Qg Qg Qgr,   