
    Ng                        d Z ddlmZmZ ddlZddlmZ 	 ddlm	Z	 dZ
n# e$ r dZ
Y nw xY w	 ddlmZmZ dZn# e$ r dZY nw xY wdad	 Zdd
Z	 	 	 ddej        dedeej                 deej                 dedej        fdZ	 	 	 ddej        dee         deej                 deej                 dedej        fdZ	 	 ddej        dee         deej                 defdZ	 	 ddej        dee         deej                 dedej        f
dZdS )a   'Fast' Normalization Functions

For GroupNorm and LayerNorm these functions bypass typical AMP upcast to float32.

Additionally, for LayerNorm, the APEX fused LN is used if available (which also does not upcast)

Hacked together by / Copyright 2022 Ross Wightman
    )ListOptionalN)
functional)fused_layer_norm_affineTF)fused_rms_norm_affinefused_rms_normc                      t           S N_USE_FAST_NORM     Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/layers/fast_norm.pyis_fast_normr      s    r   c                 
    | a d S r
   r   )enables    r   set_fast_normr   #   s    NNNr   h㈵>x
num_groupsweightbiasepsreturnc                    t           j                                        rt          j        | ||||          S t          j                    rVt          j                    }|                     |          |                    |          ||                    |          nd }}} t           j        j	        
                    d          5  t          j        | ||||          cd d d            S # 1 swxY w Y   d S NF)enabled)torchjitis_scriptingF
group_normis_autocast_enabledget_autocast_gpu_dtypetocudaampautocast)r   r   r   r   r   dts         r   fast_group_normr*   (   s    y >|Az64=== "" ] )++$$r((FIIbMM$BR4772;;;X\46		 	 	 	/	/ > >|Az64==> > > > > > > > > > > > > > > > > >s   C**C.1C.normalized_shapec                 "   t           j                                        rt          j        | ||||          S t
          rt          | ||||          S t          j                    rVt          j                    }| 	                    |          |	                    |          ||	                    |          nd }}} t           j
        j                            d          5  t          j        | ||||          cd d d            S # 1 swxY w Y   d S r   )r   r   r    r!   
layer_normhas_apexr   r#   r$   r%   r&   r'   r(   )r   r+   r   r   r   r)   s         r   fast_layer_normr/   >   sT    y D|A/sCCC O&q&$8H#NNN "" ] )++$$r((FIIbMM$BR4772;;;X\46		 	 	 	/	/ D D|A/sCCD D D D D D D D D D D D D D D D D Ds   DDDc                 x   t          |          }t          j                                        r2|dk    sJ t          j        | d                              d          }n9t          t          d| dz
  d                    }t          j        | |d          }| t          j        ||z             z  } || |z  } | S )N   )dimT)r3   keepdim)	lenr   r   r    var	unsqueezetuplerangersqrt)r   r+   r   r   	norm_ndimvdimss          r   rms_normr>   W   s     $%%Iy 1 A~~~~IaR   **2..U2	zA~r2233IaT4000	EKC   AJHr   c                     t           j                                        rt          | |||          S t          r%|t          | ||          S t          | |||          S t          | |||          S r
   )r   r   r    r>   has_apex_rmsnormr   r   )r   r+   r   r   s       r   fast_rms_normrA   m   s|     y :+VS999 K>!!%5s;;;(F4DcJJJ A'555r   )T)NNr   )Nr   )__doc__typingr   r   r   torch.nnr   r!   #apex.normalization.fused_layer_normr   r.   ImportErrorr   r   r@   r   r   r   Tensorintfloatr*   r/   r>   rA   r   r   r   <module>rJ      s    " ! ! ! ! ! ! !  $ $ $ $ $ $KKKKKKHH   HHHYYYYYYYY   
       &*#'> >|>> U\"> 5<
 	>
 
> \> > > >2 &*#'D D|D3iD U\"D 5<
 	D
 
D \D D D D8 &*	 |3i U\" 
	   2 &*	6 6|63i6 U\"6 
	6
 \6 6 6 6 6 6s    ))
8 AA