
    Ng                         d Z ddlZddlmZ ddlmc mZ ddlmZm	Z	m
Z
  G d dej                  Z G d dej                  Z G d	 d
ej                  Z G d dej                  ZdS )a   Convolution with Weight Standardization (StdConv and ScaledStdConv)

StdConv:
@article{weightstandardization,
  author    = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille},
  title     = {Weight Standardization},
  journal   = {arXiv preprint arXiv:1903.10520},
  year      = {2019},
}
Code: https://github.com/joe-siyuan-qiao/WeightStandardization

ScaledStdConv:
Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
    - https://arxiv.org/abs/2101.08692
Official Deepmind JAX code: https://github.com/deepmind/deepmind-research/tree/master/nfnets

Hacked together by / copyright Ross Wightman, 2021.
    N   )get_paddingget_padding_valuepad_samec                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )		StdConv2dzConv2d with Weight Standardization. Used for BiT ResNet-V2 models.

    Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
        https://arxiv.org/abs/1903.10520v2
    r   NFư>c
           
          |t          |||          }t                                          ||||||||           |	| _        d S )Nstridepaddingdilationgroupsbias)r   super__init__eps)self
in_channelout_channelskernel_sizer   r   r   r   r   r   	__class__s             P/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/layers/std_conv.pyr   zStdConv2d.__init__    s^     ?!+vx@@Gk&hvD 	 	J 	J 	J     c           	         t          j        | j                            d| j        d          d d dd| j                                      | j                  }t          j        ||| j        | j	        | j
        | j        | j                  }|S Nr   T        )trainingmomentumr   )F
batch_normweightreshaper   r   
reshape_asconv2dr   r   r   r   r   r   xr#   s      r   forwardzStdConv2d.forward*   s    K4#4b994BDH6 6 66@j6M6M 	 HQ	4;dmUYU`aar   )r   Nr   r   Fr	   __name__
__module____qualname____doc__r   r)   __classcell__r   s   @r   r   r      s`          LP26           r   r   c                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )	StdConv2dSamezConv2d with Weight Standardization. TF compatible SAME padding. Used for ViT Hybrid model.

    Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
        https://arxiv.org/abs/1903.10520v2
    r   SAMEFr	   c
           
          t          ||||          \  }}
t                                          ||||||||           |
| _        |	| _        d S )Nr   r   r   )r   r   r   same_padr   )r   r   r   r   r   r   r   r   r   r   
is_dynamicr   s              r   r   zStdConv2dSame.__init__8   si     0V^fgggk&'\d 	 	& 	& 	& #r   c           	      h   | j         r!t          || j        | j        | j                  }t          j        | j                            d| j	        d          d d dd| j
                                      | j                  }t          j        ||| j        | j        | j        | j        | j                  }|S r   )r6   r   r   r   r   r!   r"   r#   r$   r   r   r%   r&   r   r   r   r'   s      r   r)   zStdConv2dSame.forwardB   s    = 	JD,dk4=IIAK4#4b994BDH6 6 66@j6M6M 	 HQ	4;dmUYU`aar   )r   r3   r   r   Fr	   r*   r0   s   @r   r2   r2   2   s`          LR26           r   r2   c                   .     e Zd ZdZ	 	 d	 fd	Zd Z xZS )
ScaledStdConv2da4  Conv2d layer with Scaled Weight Standardization.

    Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
        https://arxiv.org/abs/2101.08692

    NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
    r   NT      ?r	   c           
      L   |t          |||          }t                                          ||||||||           t          j        t          j        | j        dddf|                    | _        |	| j	        d         
                                dz  z  | _        |
| _        d S )Nr   r   r         )r   r   r   nn	Parametertorchfullr   gainr#   numelscaler   )r   in_channelsr   r   r   r   r   r   r   gammar   	gain_initr   s               r   r   zScaledStdConv2d.__init__U   s     ?!+vx@@G{67]e 	 	& 	& 	& LT->1a,H)!T!TUU	T[^1133t;;
r   c           	      V   t          j        | j                            d| j        d          d d | j        | j        z                      d          dd| j                  	                    | j                  }t          j
        ||| j        | j        | j        | j        | j                  S Nr   r   Tr   )r#   r   r    r   )r!   r"   r#   r$   r   rB   rD   viewr   r%   r&   r   r   r   r   r   r'   s      r   r)   zScaledStdConv2d.forwarda   s    K4#4b994I
*0044BDH6 6 6 7Aj6M6M 	 x649dk4<X\Xcdddr   )r   Nr   r   Tr;   r	   r;   r*   r0   s   @r   r:   r:   L   sg          MQLO
 
 
 
 
 
e e e e e e er   r:   c                   .     e Zd ZdZ	 	 d	 fd	Zd Z xZS )
ScaledStdConv2dSamea\  Conv2d layer with Scaled Weight Standardization and Tensorflow-like SAME padding support

    Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
        https://arxiv.org/abs/2101.08692

    NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
    r   r3   Tr;   r	   c           
      `   t          ||||          \  }}t                                          ||||||||           t          j        t          j        | j        dddf|                    | _        |	| j	        d         
                                dz  z  | _        || _        |
| _        d S )Nr5   r   r   r   r=   )r   r   r   r>   r?   r@   rA   r   rB   r#   rC   rD   r6   r   )r   rE   r   r   r   r   r   r   r   rF   r   rG   r7   r   s                r   r   zScaledStdConv2dSame.__init__r   s     0V^fggg{67]e 	 	& 	& 	& LT->1a,H)!T!TUU	T[^1133t;;
"r   c           	         | j         r!t          || j        | j        | j                  }t          j        | j                            d| j	        d          d d | j
        | j        z                      d          dd| j                                      | j                  }t          j        ||| j        | j        | j        | j        | j                  S rI   )r6   r   r   r   r   r!   r"   r#   r$   r   rB   rD   rJ   r   r%   r&   r   r   r   r'   s      r   r)   zScaledStdConv2dSame.forward~   s    = 	JD,dk4=IIAK4#4b994I
*0044BDH6 6 6 7Aj6M6M 	 x649dk4<X\Xcdddr   )r   r3   r   r   Tr;   r	   r;   r*   r0   s   @r   rL   rL   i   sg          MSLO
 
 
 
 
 
e e e e e e er   rL   )r.   r@   torch.nnr>   torch.nn.functional
functionalr!   r   r   r   r   Conv2dr   r2   r:   rL    r   r   <module>rT      s1   $                 = = = = = = = = = =    	   0    BI   4e e e e ebi e e e:e e e e e") e e e e er   