
    ڧg5}                        d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlmc mZ d dlmZmZ d dlmZmZmZ d dlmZ d dlmZmZ d d	lmZmZ d d
l m!Z! d dl"m#Z#m$Z$ d dl%m&Z& g dZ'dee(e(f         de(de(de(dee(e(f         f
dZ)dee(e(f         de(deee(e(f                  fdZ*de(de(dej        fdZ+ G d dej,                  Z- G d dej,                  Z. G d dej,                  Z/ G d d ej,                  Z0 G d! d"ej,                  Z1 G d# d$ej,                  Z2 G d% d&ej,                  Z3 G d' d(ej,                  Z4 G d) d*ej,                  Z5	 	 d=d,e(d-ee(         d.ee(         d/e6d0e(d1e(d2e	e         d3e7d4ede5fd5Z8 G d6 d7e          Z9 e             ed8e9j:        f9          dd:d;d2e	e9         d3e7d4ede5fd<                        Z;dS )>    N)OrderedDict)partial)AnyCallableListOptionalSequenceTuple)nnTensor)register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)Conv2dNormActivationSqueezeExcitation)StochasticDepth)ImageClassificationInterpolationMode)_log_api_usage_once)MaxVitMaxVit_T_Weightsmaxvit_t
input_sizekernel_sizestridepaddingreturnc                 ^    | d         |z
  d|z  z   |z  dz   | d         |z
  d|z  z   |z  dz   fS )Nr          )r   r   r   r   s       U/var/www/html/ai-engine/env/lib/python3.11/site-packages/torchvision/models/maxvit.py_get_conv_output_shaper&      sJ    	A	$q7{	2v=A	A	$q7{	2v=A     n_blocksc                     g }t          | ddd          }t          |          D ])}t          |ddd          }|                    |           *|S )zQUtil function to check that the input size is correct for a MaxVit configuration.   r"   r#   )r&   rangeappend)r   r(   shapesblock_input_shape_s        r%   _make_block_input_shapesr0       s`    F.z1aCC8__ ) )23DaANN'((((Mr'   heightwidthc                     t          j        t          j        t          j        |           t          j        |          g                    }t          j        |d          }|d d d d d f         |d d d d d f         z
  }|                    ddd                                          }|d d d d dfxx         | dz
  z  cc<   |d d d d dfxx         |dz
  z  cc<   |d d d d dfxx         d|z  dz
  z  cc<   |                    d          S )Nr#   r"   r   )torchstackmeshgridarangeflattenpermute
contiguoussum)r1   r2   coordscoords_flatrelative_coordss        r%   _get_relative_position_indexr@   *   s:   [f)=)=u|E?R?R(STTUUF-**K!!!!QQQ*-AAAtQQQJ0GGO%--aA66AACCOAAAqqq!G
*AAAqqq!G	)AAAqqq!GE	A-r"""r'   c                        e Zd ZdZ	 ddedededededed	ej        f         d
ed	ej        f         deddf fdZ	de
de
fdZ xZS )MBConva=  MBConv: Mobile Inverted Residual Bottleneck.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        expansion_ratio (float): Expansion ratio in the bottleneck.
        squeeze_ratio (float): Squeeze ratio in the SE Layer.
        stride (int): Stride of the depthwise convolution.
        activation_layer (Callable[..., nn.Module]): Activation function.
        norm_layer (Callable[..., nn.Module]): Normalization function.
        p_stochastic_dropout (float): Probability of stochastic depth.
            in_channelsout_channelsexpansion_ratiosqueeze_ratior   activation_layer.
norm_layerp_stochastic_dropoutr    Nc	                 (   t                                                       |  |dk    p||k    }	|	rOt          j        ||ddd          g}
|dk    rt          j        d|d          g|
z   }
t          j        |
 | _        nt          j                    | _        t          ||z            }t          ||z            }|rt          |d          | _
        nt          j                    | _
        t                      } ||          |d	<   t          ||ddd
||d           |d<   t          ||d|d|||d 	  	        |d<   t          ||t          j                  |d<   t          j        ||dd          |d<   t          j        |          | _        d S )Nr#   T)r   r   biasr"   r*   r   r   r   rowmodepre_normr   )r   r   r   rH   rI   inplaceconv_a)r   r   r   rH   rI   groupsrR   conv_b)
activationsqueeze_excitation)rD   rE   r   rL   conv_c)super__init__r   Conv2d	AvgPool2d
SequentialprojIdentityintr   stochastic_depthr   r   r   SiLUlayers)selfrD   rE   rF   rG   r   rH   rI   rJ   should_projr^   mid_channelssqz_channels_layers	__class__s                 r%   rZ   zMBConv.__init__C   s    	 	k@[L%@ 	&Ik<QqW[\\\]D{{61MMMNQUUt,DIIDI</9::<-788 	2$34Hu$U$U$UD!!$&KMMD!--(j55
0-!	
 	
 	
 1-!

 

 

 ):,acah(i(i(i$%I,\ghostttmG,,r'   xc                     |                      |          }|                     |                     |                    }||z   S )z
        Args:
            x (Tensor): Input tensor with expected layout of [B, C, H, W].
        Returns:
            Tensor: Output tensor with expected layout of [B, C, H / stride, W / stride].
        )r^   ra   rc   rd   rj   ress      r%   forwardzMBConv.forward   s9     iill!!$++a..11Qwr'   )rC   )__name__
__module____qualname____doc__r`   floatr   r   ModulerZ   r   rn   __classcell__ri   s   @r%   rB   rB   5   s         , '*;- ;-;- ;- 	;-
 ;- ;- #3	>2;- S")^,;- $;- 
;- ;- ;- ;- ;- ;-z	 	F 	 	 	 	 	 	 	 	r'   rB   c                   Z     e Zd ZdZdedededdf fdZdej        fdZd	edefd
Z	 xZ
S )$RelativePositionalMultiHeadAttentionzRelative Positional Multi-Head Attention.

    Args:
        feat_dim (int): Number of input features.
        head_dim (int): Number of features per head.
        max_seq_len (int): Maximum sequence length.
    feat_dimhead_dimmax_seq_lenr    Nc                 ,   t                                                       ||z  dk    rt          d| d|           ||z  | _        || _        t          t          j        |                    | _        || _	        t          j        || j        | j        z  dz            | _        |dz  | _        t          j        | j        | j        z  |          | _        t          j                            t#          j        d| j        z  dz
  d| j        z  dz
  z  | j        ft"          j                            | _        |                     d	t-          | j        | j                             t"          j
        j                            | j        d
           d S )Nr   z
feat_dim: z  must be divisible by head_dim: r*   g      r"   r#   )dtyperelative_position_index{Gz?std)rY   rZ   
ValueErrorn_headsrz   r`   mathsqrtsizer{   r   Linearto_qkvscale_factormerge	parameter	Parameterr5   emptyfloat32relative_position_bias_tableregister_bufferr@   inittrunc_normal_)rd   ry   rz   r{   ri   s       r%   rZ   z-RelativePositionalMultiHeadAttention.__init__   sl    	h!##^(^^T\^^___8+ 	+..//	&i$,*F*JKK$dNYt}t|;XFF
,.L,B,BK!di-!+DI0ABDLQY^Yfggg-
 -
) 	68TUYU^`d`i8j8jkkk##D$E4#PPPPPr'   c                 
   | j                             d          }| j        |                             | j        | j        d          }|                    ddd                                          }|                    d          S )Nr4   r"   r   r#   )r~   viewr   r{   r:   r;   	unsqueeze)rd   
bias_indexrelative_biass      r%   get_relative_positional_biaszARelativePositionalMultiHeadAttention.get_relative_positional_bias   sv    166r::
9*EJJ4K[]a]moqrr%--aA66AACC&&q)))r'   rj   c                    |j         \  }}}}| j        | j        }}|                     |          }t	          j        |dd          \  }	}
}|	                    |||||                              ddddd          }	|
                    |||||                              ddddd          }
|                    |||||                              ddddd          }|
| j        z  }
t	          j	        d|	|
          }| 
                                }t          j        ||z   d          }t	          j	        d	||          }|                    ddddd                              ||||          }|                     |          }|S )
z
        Args:
            x (Tensor): Input tensor with expected layout of [B, G, P, D].
        Returns:
            Tensor: Output tensor with expected layout of [B, G, P, D].
        r*   r4   )dimr   r#   r"      z!B G H I D, B G H J D -> B G H I Jz!B G H I J, B G H J D -> B G H I D)shaper   rz   r   r5   chunkreshaper:   r   einsumr   Fsoftmaxr   )rd   rj   BGPDHDHqkvqkvdot_prodpos_biasouts                  r%   rn   z,RelativePositionalMultiHeadAttention.forward   sx    W
1adm2kk!nn+c1"---1aIIaAq"%%--aAq!<<IIaAq"%%--aAq!<<IIaAq"%%--aAq!<<!!< CQJJ44669X0b999l>!LLkk!Q1a((00Aq!<<jjoo
r'   )ro   rp   rq   rr   r`   rZ   r5   r   r   rn   ru   rv   s   @r%   rx   rx      s         QQ Q 	Q
 
Q Q Q Q Q Q8*el * * * * F        r'   rx   c                   T     e Zd ZdZdededdf fdZdej        dej        fdZ xZ	S )	SwapAxeszPermute the axes of a tensor.abr    Nc                 d    t                                                       || _        || _        d S N)rY   rZ   r   r   )rd   r   r   ri   s      r%   rZ   zSwapAxes.__init__   s+    r'   rj   c                 F    t          j        || j        | j                  }|S r   )r5   swapaxesr   r   rl   s      r%   rn   zSwapAxes.forward   s    nQ//
r'   )
ro   rp   rq   rr   r`   rZ   r5   r   rn   ru   rv   s   @r%   r   r      s~        ''# # $      
 %,        r'   r   c                   8     e Zd ZdZd fdZdededefdZ xZS )	WindowPartitionzB
    Partition the input tensor into non-overlapping windows.
    r    Nc                 H    t                                                       d S r   rY   rZ   rd   ri   s    r%   rZ   zWindowPartition.__init__       r'   rj   pc                     |j         \  }}}}|}|                    ||||z  |||z  |          }|                    dddddd          }|                    |||z  ||z  z  ||z  |          }|S )z
        Args:
            x (Tensor): Input tensor with expected layout of [B, C, H, W].
            p (int): Number of partitions.
        Returns:
            Tensor: Output tensor with expected layout of [B, H/P, W/P, P*P, C].
        r   r"   r   r*      r#   r   r   r:   )rd   rj   r   r   Cr   Wr   s           r%   rn   zWindowPartition.forward   s     W
1aIIaAFAqAvq11IIaAq!Q''IIa!q&Q!V,a!eQ77r'   r    N	ro   rp   rq   rr   rZ   r   r`   rn   ru   rv   s   @r%   r   r      sp               C F        r'   r   c            
       @     e Zd ZdZd
 fdZdededededef
d	Z xZS )WindowDepartitionzo
    Departition the input tensor of non-overlapping windows into a feature volume of layout [B, C, H, W].
    r    Nc                 H    t                                                       d S r   r   r   s    r%   rZ   zWindowDepartition.__init__  r   r'   rj   r   h_partitionsw_partitionsc                     |j         \  }}}}|}	||}}
|                    ||
||	|	|          }|                    dddddd          }|                    |||
|	z  ||	z            }|S )ar  
        Args:
            x (Tensor): Input tensor with expected layout of [B, (H/P * W/P), P*P, C].
            p (int): Number of partitions.
            h_partitions (int): Number of vertical partitions.
            w_partitions (int): Number of horizontal partitions.
        Returns:
            Tensor: Output tensor with expected layout of [B, C, H, W].
        r   r   r#   r*   r"   r   r   )rd   rj   r   r   r   r   r   PPr   r   HPWPs               r%   rn   zWindowDepartition.forward  s{     g1b!|BIIaRAq))IIaAq!Q''IIaBFBF++r'   r   r   rv   s   @r%   r   r      s               C s # RX        r'   r   c                        e Zd ZdZdededededeeef         deded	ej	        f         d
ed	ej	        f         de
de
de
ddf fdZdedefdZ xZS )PartitionAttentionLayera  
    Layer for partitioning the input tensor into non-overlapping windows and applying attention to each window.

    Args:
        in_channels (int): Number of input channels.
        head_dim (int): Dimension of each attention head.
        partition_size (int): Size of the partitions.
        partition_type (str): Type of partitioning to use. Can be either "grid" or "window".
        grid_size (Tuple[int, int]): Size of the grid to partition the input tensor into.
        mlp_ratio (int): Ratio of the  feature size expansion in the MLP layer.
        activation_layer (Callable[..., nn.Module]): Activation function to use.
        norm_layer (Callable[..., nn.Module]): Normalization function to use.
        attention_dropout (float): Dropout probability for the attention layer.
        mlp_dropout (float): Dropout probability for the MLP layer.
        p_stochastic_dropout (float): Probability of dropping out a partition.
    rD   rz   partition_sizepartition_type	grid_size	mlp_ratiorH   .rI   attention_dropoutmlp_dropoutrJ   r    Nc           	         t                                                       ||z  | _        || _        |d         |z  | _        || _        || _        |dvrt          d          |dk    r|| j        c| _        | _	        n| j        |c| _        | _	        t                      | _        t                      | _        |dk    rt          dd          nt          j                    | _        |dk    rt          dd          nt          j                    | _        t          j         ||          t)          |||dz            t          j        |	                    | _        t          j        t          j        |          t          j        |||z             |            t          j        ||z  |          t          j        |
                    | _        t5          |d	
          | _        d S )Nr   )gridwindowz0partition_type must be either 'grid' or 'window'r   r   r"   rN   rO   )rY   rZ   r   rz   n_partitionsr   r   r   r   gr   partition_opr   departition_opr   r   r_   partition_swapdepartition_swapr]   rx   Dropout
attn_layer	LayerNormr   	mlp_layerr   stochastic_dropout)rd   rD   rz   r   r   r   r   rH   rI   r   r   rJ   ri   s               r%   rZ   z PartitionAttentionLayer.__init__,  s   " 	"h. %aLN:,"!333OPPPX%%+T->NDFDFF!.NDFDF+--/112@F2J2Jhr2...PRP[P]P]4Bf4L4LR 0 0 0RTR]R_R_-J{## 1hXYHYZZJ())
 
 L%%Ik;#:;;IkI-{;;J{##
 
 #22FU"S"S"Sr'   rj   c                    | j         d         | j        z  | j         d         | j        z  }}t          j        | j         d         | j        z  dk    o| j         d         | j        z  dk    d                    | j         | j                             |                     || j                  }|                     |          }||                     |                     |                    z   }||                     | 	                    |                    z   }| 
                    |          }|                     || j        ||          }|S )z
        Args:
            x (Tensor): Input tensor with expected layout of [B, C, H, W].
        Returns:
            Tensor: Output tensor with expected layout of [B, C, H, W].
        r   r#   z[Grid size must be divisible by partition size. Got grid size of {} and partition size of {})r   r   r5   _assertformatr   r   r   r   r   r   r   )rd   rj   ghgws       r%   rn   zPartitionAttentionLayer.forwardf  s.    "df,dnQ.?46.IBN1&!+Oq0ADF0Ja0Oipp 	
 	
 	
 a((""''(:(:;;;''q(9(9:::!!!$$462r22r'   )ro   rp   rq   rr   r`   strr
   r   r   rt   rs   rZ   r   rn   ru   rv   s   @r%   r   r     s        "8T8T 8T
 8T 8T c?8T 8T #3	>28T S")^,8T !8T 8T $8T  
!8T 8T 8T 8T 8T 8Tt F        r'   r   c                        e Zd ZdZdededededededej        f         d	edej        f         d
edededededede	eef         ddf fdZ
dedefdZ xZS )MaxVitLayera  
    MaxVit layer consisting of a MBConv layer followed by a PartitionAttentionLayer with `window` and a PartitionAttentionLayer with `grid`.

    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        expansion_ratio (float): Expansion ratio in the bottleneck.
        squeeze_ratio (float): Squeeze ratio in the SE Layer.
        stride (int): Stride of the depthwise convolution.
        activation_layer (Callable[..., nn.Module]): Activation function.
        norm_layer (Callable[..., nn.Module]): Normalization function.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Ratio of the MLP layer.
        mlp_dropout (float): Dropout probability for the MLP layer.
        attention_dropout (float): Dropout probability for the attention layer.
        p_stochastic_dropout (float): Probability of stochastic depth.
        partition_size (int): Size of the partitions.
        grid_size (Tuple[int, int]): Size of the input feature grid.
    rD   rE   rG   rF   r   rI   .rH   rz   r   r   r   rJ   r   r   r    Nc                 f   t                                                       t                      }t          ||||||||          |d<   t	          |||d||	|t
          j        ||
|          |d<   t	          |||d||	|t
          j        ||
|          |d<   t          j        |          | _        d S )N)rD   rE   rF   rG   r   rH   rI   rJ   MBconvr   )rD   rz   r   r   r   r   rH   rI   r   r   rJ   window_attentionr   grid_attention)	rY   rZ   r   rB   r   r   r   r]   rc   )rd   rD   rE   rG   rF   r   rI   rH   rz   r   r   r   rJ   r   r   rc   ri   s                   r%   rZ   zMaxVitLayer.__init__  s    * 	)mm "#%+'-!!5	
 	
 	
x &=$)#-|/#!5&
 &
 &
!" $;$)!-|/#!5$
 $
 $
  mF++r'   rj   c                 0    |                      |          }|S z
        Args:
            x (Tensor): Input tensor of shape (B, C, H, W).
        Returns:
            Tensor: Output tensor of shape (B, C, H, W).
        rc   )rd   rj   s     r%   rn   zMaxVitLayer.forward  s     KKNNr'   )ro   rp   rq   rr   r`   rs   r   r   rt   r
   rZ   r   rn   ru   rv   s   @r%   r   r     s        (?, ?, 	?,
 ?, ?, ?, S")^,?, #3	>2?, ?, ?, ?, !?,  $!?,$ %?,& c?'?,( 
)?, ?, ?, ?, ?, ?,B F        r'   r   c                        e Zd ZdZdedededededej        f         dedej        f         d	ed
edededede	eef         dede
e         ddf fdZdedefdZ xZS )MaxVitBlocka(  
    A MaxVit block consisting of `n_layers` MaxVit layers.

     Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        expansion_ratio (float): Expansion ratio in the bottleneck.
        squeeze_ratio (float): Squeeze ratio in the SE Layer.
        activation_layer (Callable[..., nn.Module]): Activation function.
        norm_layer (Callable[..., nn.Module]): Normalization function.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Ratio of the MLP layer.
        mlp_dropout (float): Dropout probability for the MLP layer.
        attention_dropout (float): Dropout probability for the attention layer.
        p_stochastic_dropout (float): Probability of stochastic depth.
        partition_size (int): Size of the partitions.
        input_grid_size (Tuple[int, int]): Size of the input feature grid.
        n_layers (int): Number of layers in the block.
        p_stochastic (List[float]): List of probabilities for stochastic depth for each layer.
    rD   rE   rG   rF   rI   .rH   rz   r   r   r   r   input_grid_sizen_layersp_stochasticr    Nc                    t                                                       t          |          |k    st          d| d| d          t	          j                    | _        t          |ddd          | _        t          |          D ]H\  }}|dk    rdnd}| xj        t          |dk    r|n||||||||||	|
|| j        |	          gz  c_        Id S )
Nz'p_stochastic must have length n_layers=z, got p_stochastic=.r*   r"   r#   rM   r   )rD   rE   rG   rF   r   rI   rH   rz   r   r   r   r   r   rJ   )rY   rZ   lenr   r   
ModuleListrc   r&   r   	enumerater   )rd   rD   rE   rG   rF   rI   rH   rz   r   r   r   r   r   r   r   idxr   r   ri   s                     r%   rZ   zMaxVitBlock.__init__  s   , 	<  H,,sxssdpssstttmoo/QWXbcddd-- 	 	FC((QQFKK/2axx\!-"/$3!)%5%' +&7#1"n)*   KKK	 	r'   rj   c                 0    | j         D ]} ||          }|S r   r   )rd   rj   layers      r%   rn   zMaxVitBlock.forward,  s)     [ 	 	EaAAr'   )ro   rp   rq   rr   r`   rs   r   r   rt   r
   r   rZ   r   rn   ru   rv   s   @r%   r   r     s"        *1 1 	1
 1 1 S")^,1 #3	>21 1 1 1 !1  !1" sCx#1& '1( 5k)1* 
+1 1 1 1 1 1f	 	F 	 	 	 	 	 	 	 	r'   r   c            !            e Zd ZdZdej        ddddddfdeeef         ded	ed
ee         dee         dede	de
edej        f                  dedej        f         de	de	dede	de	deddf  fdZdedefdZd Z xZS )r   ay  
    Implements MaxVit Transformer from the `MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_ paper.
    Args:
        input_size (Tuple[int, int]): Size of the input image.
        stem_channels (int): Number of channels in the stem.
        partition_size (int): Size of the partitions.
        block_channels (List[int]): Number of channels in each block.
        block_layers (List[int]): Number of layers in each block.
        stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
        squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
        expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
        norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
        activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
        mlp_dropout (float): Dropout probability for the MLP layer. Default: 0.0.
        attention_dropout (float): Dropout probability for the attention layer. Default: 0.0.
        num_classes (int): Number of classes. Default: 1000.
    Ng      ?r   rC   i  r   stem_channelsr   block_channelsblock_layersrz   stochastic_depth_probrI   .rH   rG   rF   r   r   r   num_classesr    c                 D   t                                                       t          |            d}|t          t          j        dd          }t          |t          |                    }t          |          D ]?\  }}|d         |z  dk    s|d         |z  dk    rt          d| d| d	| d
| d	          @t	          j
        t          ||dd||	dd           t          ||ddd d d                    | _        t          |ddd          }|| _        t	          j                    | _        |g|d d         z   }|}t#          j        d|t'          |                                                    }d}t+          |||          D ]]\  }}}| j                            t/          |||
|||	|||||||||||z                                 | j        d         j        }||z  }^t	          j
        t	          j        d          t	          j                    t	          j        |d                   t	          j        |d         |d                   t	          j                    t	          j        |d         |d                    | _        |                                  d S )Nr*   gMbP?g{Gz?)epsmomentumr   r#   zInput size z
 of block z$ is not divisible by partition size zx. Consider changing the partition size or the input size.
Current configuration yields the following block input sizes: r   r"   F)r   rI   rH   rL   rR   T)r   rI   rH   rL   rM   r4   )rD   rE   rG   rF   rI   rH   rz   r   r   r   r   r   r   r   )rL   ) rY   rZ   r   r   r   BatchNorm2dr0   r   r   r   r]   r   stemr&   r   r   blocksnplinspacer<   tolistzipr,   r   r   AdaptiveAvgPool2dFlattenr   r   Tanh
classifier_init_weights)rd   r   r  r   r  r  rz   r  rI   rH   rG   rF   r   r   r   r  input_channelsblock_input_sizesr   block_input_sizerD   rE   r   p_idx
in_channelout_channel
num_layersri   s                              r%   rZ   zMaxVit.__init__M  s#   : 	D!!!  TDIIIJ
 5Z^ATATUU%./@%A%A 	 	!C!"^3q88<LQ<OR`<`de<e<e j"2 j jc j jgu j jUfj j j   =f M %!1	 	 	 !}ad]ahl  
 
	" ,JAaYZ[[[
, moo$oss(;;%
 {1&;S=N=NOOVVXX36{LR^3_3_ 	  	 /JZK *!,"/$3)%5%' +&7#1$.'!-eej6H.H!I    $ R2JZEE - ##JLLL+,,InR(.*<==GIIInR(+EBBB
 
 	r'   rj   c                     |                      |          }| j        D ]} ||          }|                     |          }|S r   )r  r  r  )rd   rj   blocks      r%   rn   zMaxVit.forward  sG    IIaLL[ 	 	EaAAOOAr'   c                    |                                  D ]?}t          |t          j                  rRt          j                            |j        d           |j        $t          j                            |j                   ot          |t          j	                  rKt          j        
                    |j        d           t          j        
                    |j        d           t          |t          j                  rQt          j                            |j        d           |j        $t          j                            |j                   Ad S )Nr   r   r#   r   )modules
isinstancer   r[   r   normal_weightrL   zeros_r  	constant_r   )rd   ms     r%   r  zMaxVit._init_weights  s    	+ 	+A!RY'' 
+d3336%GNN16***Ar~.. +!!!(A...!!!&!,,,,Ary)) +d3336%GNN16***	+ 	+r'   )ro   rp   rq   rr   r   GELUr
   r`   r   rs   r   r   rt   rZ   r   rn   r  ru   rv   s   @r%   r   r   8  s}        J :>57W#!" #&7t t #s(Ot
 t t S	t 3it t  %t" Xc29n56#t$ #3	>2%t( )t* +t. /t0 1t2 !3t6 7t8 
9t t t t t tl F    + + + + + + +r'   r   Fr  r  r  r  r   rz   weightsprogresskwargsc                    |ot          |dt          |j        d                              |j        d         d         |j        d         d         k    sJ t          |d|j        d                    |                    dd          }	t	          d| ||||||	d|}
|*|
                    |                    |d	
                     |
S )Nr  
categoriesmin_sizer   r#   r      r0  )r  r  r  r  rz   r   r   T)r*  
check_hashr$   )r   r   metapopr   load_state_dictget_state_dict)r  r  r  r  r   rz   r)  r*  r+  r   models              r%   _maxvitr7    s    $ fmSl9S5T5TUUU|J'*gl:.Fq.IIIIIflGL4LMMML*55J 	#%!3%	 	 	 	E g44hSW4XXYYYLr'   c                   n    e Zd Z ed eeddej                  edddddd	d
idddd          Z	e	Z
dS )r   z9https://download.pytorch.org/models/maxvit_t-bc5ab103.pthr0  )	crop_sizeresize_sizeinterpolationir/  zLhttps://github.com/pytorch/vision/tree/main/references/classification#maxvitzImageNet-1KgT@g|?5.X@)zacc@1zacc@5gZd;@gK7]@zThese weights reproduce closely the results of the paper using a similar training recipe.
            They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.)r-  
num_paramsr.  recipe_metrics_ops
_file_size_docs)url
transformsr2  N)ro   rp   rq   r   r   r   r   BICUBICr   IMAGENET1K_V1DEFAULTr$   r'   r%   r   r     s        GG73CO`Oh
 
 
 /""d##    !g
 
  M. GGGr'   r   
pretrained)r)  T)r)  r*  c                 h    t                               |           } t          ddg dg dddd| |d|S )	a  
    Constructs a maxvit_t architecture from
    `MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_.

    Args:
        weights (:class:`~torchvision.models.MaxVit_T_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.MaxVit_T_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.maxvit.MaxVit``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/maxvit.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.MaxVit_T_Weights
        :members:
    @   )rI        i   )r"   r"   r   r"       g?   )r  r  r  rz   r  r   r)  r*  r$   )r   verifyr7  )r)  r*  r+  s      r%   r   r     s_    . %%g..G 
***!\\!
 
 
 
 
r'   )NF)<r   collectionsr   	functoolsr   typingr   r   r   r   r	   r
   numpyr  r5   torch.nn.functionalr   
functionalr   r   torchvision.models._apir   r   r   torchvision.models._metar   torchvision.models._utilsr   r   torchvision.ops.miscr   r    torchvision.ops.stochastic_depthr   torchvision.transforms._presetsr   r   torchvision.utilsr   __all__r`   r&   r0   r@   rt   rB   rx   r   r   r   r   r   r   r   rs   boolr7  r   rE  r   r$   r'   r%   <module>r^     s    # # # # # #       A A A A A A A A A A A A A A A A                       H H H H H H H H H H 9 9 9 9 9 9 T T T T T T T T H H H H H H H H < < < < < < R R R R R R R R 1 1 1 1 1 1  uS#X S RU `c hmnqsvnvhw    sCx C DQVWZ\_W_Q`La    # #S #U\ # # # #T T T T TRY T T TnF F F F F29 F F FR
 
 
 
 
ry 
 
 
    bi   4    	   <e e e e ebi e e eP^ ^ ^ ^ ^") ^ ^ ^BR R R R R") R R Rj^+ ^+ ^+ ^+ ^+RY ^+ ^+ ^+Z &*' '' I	'
 s)' !' ' ' k"' ' '  !' ' ' 'T    {   6 ,0@0N!OPPP6:T ! ! !"23 !d !]` !ek ! ! ! QP ! ! !r'   