
    Ngd                        d Z ddlZddlmZmZmZ ddlZddlmc m	Z
 ddlmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZmZmZmZmZm Z  ddl!m"Z# g Z$didZ%djdZ&dkdZ'dldZ( e) e e%dddd           e%dddd           e&ddddddd           e&ddddddd           e&ddddddd          fddd d d!d"#           e e%dd$d%           e%ddd%           e&dddddd&           e&dddd'dd&           e&dddd(dd&          fddd d d!d)#           e e%dd$d%           e%ddd%           e&dddd(dd&           e&dd*dd+dd&           e&dd,dd-dd&          fddd d d!d.#           e e%dd$d%           e%ddd%           e&dddd(dd&           e&dd*dd+dd&           e&dd,dd-dd&          fddd d d/ e)d01          d.2           e(d           e(d3           e(d4           e(d           e(d5           e(d6           e(d          7          Z*e G d8 d9ej+                              Z, G d: d;ej+                  Z- G d< d=ej+                  Z.e G d> d?ej+                              Z/ ed@e,            edAe/           dmdCZ0dmdDZ1dndEZ2 ei dF e2dGH          dI e2dGH          dJ e2dGH          dK e2dGdLM          dN e2dGdLM          dO e2dGdLM          dP e2dGdLM          dQ e2dGdLM          dR e2dGdLM          dS e2dGdLM          dT e2dGdLM          dU e2dGdLM          dV e2dGdLM          dW e2dGdXdYdZ          d[ e2dGdXdYdZ          d\ e2dGdXdYdZ                    Z3edod]efd^            Z4edod]efd_            Z5edod]efd`            Z6edod]efda            Z7edod]efdb            Z8edod]efdc            Z9edod]efdd            Z:edod]efde            Z;edod]efdf            Z<edod]efdg            Z= ee>dTdUdVdWd[d\dh           dS )pa   MobileViT

Paper:
V1: `MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer` - https://arxiv.org/abs/2110.02178
V2: `Separable Self-attention for Mobile Vision Transformers` - https://arxiv.org/abs/2206.02680

MobileVitBlock and checkpoints adapted from https://github.com/apple/ml-cvnets (original copyright below)
License: https://github.com/apple/ml-cvnets/blob/main/LICENSE (Apple open source)

Rest of code, ByobNet, and Transformer block hacked together by / Copyright 2022, Ross Wightman
    N)CallableTupleOptional)nn)	to_2tuplemake_divisible
GroupNorm1ConvMlpDropPathis_exportable   )build_model_with_cfg)register_notrace_module)register_modelgenerate_default_cfgsregister_model_deprecations)register_blockByoBlockCfgByoModelCfgByobNetLayerFn
num_groups)Block      @c                 L    t          d| ||d|t          dd                    S )Nbottler   T)	bottle_in
linear_out)typedcsgsbrblock_kwargs)r   dictr    r!   r"   r$   s       Q/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/mobilevit.py_inverted_residual_blockr)   !   s6    a1rDT:::< < < <       c                 p    t          | |||          t          dd|dt          |||                    fS )Nr'   	mobilevitr   )transformer_dimtransformer_depth
patch_size)r   r    r!   r"   r%   r)   r   r&   r    r!   r"   r.   r/   r0   r$   s          r(   _mobilevit_blockr3   (   sX     	!1Q2666Q! /"3%' ' '	
 	
 	
	 	r*             @      ?c                 r    t          | |||          t          dd|d|dt          ||                    fS )Nr'   
mobilevit2r   )r/   r0   )r   r    r!   r"   r$   r#   r%   r1   )r    r!   r"   r/   r0   r$   transformer_brs          r(   _mobilevitv2_blockr:   6   sY     	!1Q2666a1A"3%' ' '	
 	
 	
 r*         ?c                     d} dk    rt           fd|D                       }t          t          d|d         dd          t          d|d         dd          t          d|d         dd	          t          d|d
         dd	          t          d|d         dd
	          ft	          d z            dddd          }|S )N)@           i   r;   c                 4    g | ]}t          |z            S  )int).0r!   
multipliers     r(   
<listcomp>z$_mobilevitv2_cfg.<locals>.<listcomp>F   s%    666QSZ((666r*   r   r   r5   r'   r4   )r    r!   r"   r/      r+       3x3 silu)blocksstem_chs	stem_type	stem_pool
downsample	act_layer)tupler   r)   r:   rC   )rE   chscfgs   `  r(   _mobilevitv2_cfgrU   C   s    
"CS6666#66677
$qCFaC@@@$qCFaC@@@c!fQGGGc!fQGGGc!fQGGG
 R*_%%  C Jr*      r'   rG      0   r=   r2   P   `   rI   rJ   rK   i@  )rL   rM   rN   rO   rP   rQ   num_featuresrH   )r    r!   r"   )r    r!   r"   r.   r/   r0   x      r@   r>            i  seg      ?)rd_ratio)rL   rM   rN   rO   rP   
attn_layerattn_kwargsr[   g      ?g      ?g      ?g      ?)mobilevit_xxsmobilevit_xsmobilevit_ssemobilevit_smobilevitv2_050mobilevitv2_075mobilevitv2_125mobilevitv2_100mobilevitv2_150mobilevitv2_175mobilevitv2_200c            %            e Zd ZdZdddddddddd	d
dddddej        fdedee         dedededee         de	eef         dedee         dededededede
dededef$ fdZd ej        d!ej        fd"Z xZS )#MobileVitBlockzS MobileViT block
        Paper: https://arxiv.org/abs/2110.02178?context=cs.LG
    NrG   r   r;   r   r   r5   r4      r+           Fin_chsout_chskernel_sizestridebottle_ratio
group_sizedilation	mlp_ratior.   r/   r0   	num_heads	attn_dropdrop	no_fusiondrop_path_ratelayerstransformer_norm_layerc           
        	 t          t          |                                            pt                      t	          ||          }|p|}	pt          ||z            	                    ||||||d                   | _        t          j	        |	dd          | _
        t          j        	fdt          |
          D              | _         	          | _                            	|dd          | _        |rd | _        n!                    ||z   ||d          | _        t#          |          | _        | j        d         | j        d         z  | _        d S )Nr   rw   rx   groupsr{   r   Frw   biasc                 J    g | ]}t          d j        		  	         S )T)r|   r}   qkv_biasr~   	proj_drop	drop_pathrQ   
norm_layer)TransformerBlockact)
rD   _r~   r   r   r   r|   r}   r.   r   s
     r(   rF   z+MobileVitBlock.__init__.<locals>.<listcomp>   sW     +
 +
 +
  ###( *1
 
 
+
 +
 +
r*   rw   rx   )superrq   __init__r   r   r   conv_norm_actconv_kxkr   Conv2dconv_1x1
Sequentialrangetransformernorm	conv_projconv_fusionr   r0   
patch_area)selfru   rv   rw   rx   ry   rz   r{   r|   r.   r/   r0   r}   r~   r   r   r   r   r   kwargsr   	__class__s           ``  ``` ```  r(   r   zMobileVitBlock.__init__   s   , 	nd##,,...$799J//#V)R^L6<Q-R-R,,F&8A; - @ @ 	&/quUUU= +
 +
 +
 +
 +
 +
 +
 +
 +
 +
 +
 ,--+
 +
 +
  +*?;;	--owTU^_-`` 	r#D%33FW4Dg[fop3qqD#J///!,tq/AAr*   xreturnc                    |}|                      |          }|                     |          }| j        \  }}|j        \  }}}}t	          j        ||z            |z  t	          j        ||z            |z  }
}	|	|z  |
|z  }}||z  }d}|	|k    s|
|k    rt          j        ||	|
fdd          }d}|                    ||z  |z  |||          	                    dd          }|                    |||| j
                  	                    dd                              || j
        z  |d          }|                     |          }|                     |          }|                                                    || j
        |d          }|	                    dd                              ||z  |z  |||          }|	                    dd                              ||||z  ||z            }|rt          j        |||fdd          }|                     |          }| j        +|                     t#          j        ||fd	                    }|S )
NFbilinearsizemodealign_cornersTr   r4   rG   dim)r   r   r0   shapemathceilFinterpolatereshape	transposer   r   r   
contiguousviewr   r   torchcat)r   r   shortcutpatch_hpatch_wBCHWnew_hnew_wnum_patch_hnum_patch_wnum_patchesr   s                  r(   forwardzMobileVitBlock.forward   s^    MM!MM!  ?W
1ayW--71w;9O9ORY9Yu#(G#3Ug5E[!K/A::!auen:UZ[[[AK IIa!ek)7KIISSTUWXYYIIaK99CCAqIIQQRSVZVeRegrtvww QIIaLL LLNN4?KDDKK1%%a!ek&9;QXYYKK1%%aK',A;QXCXYY 	Taq!f:USSSANN1'  Ha=a!@!@!@AAAr*   )__name__
__module____qualname____doc__r   	LayerNormrC   r   floatr   boolr   r   r   r   Tensorr   __classcell__r   s   @r(   rq   rq      s         &* "%(,(."-1%&!#$&"/1|':B :B:B c]:B 	:B
 :B  :B !:B CHo:B :B &c]:B  #:B :B :B :B :B  !:B" "#:B$ %:B& %-':B :B :B :B :B :Bx( (%, ( ( ( ( ( ( ( (r*   rq   c                   "    e Zd ZdZ	 	 	 ddededededd	f
 fd
Zdej	        dej	        fdZ
ej                                        ddej	        deej	                 dej	        fd            Zddej	        deej	                 dej	        fdZ xZS )LinearSelfAttentiona  
    This layer applies a self-attention with linear complexity, as described in `https://arxiv.org/abs/2206.02680`
    This layer can be used for self- as well as cross-attention.
    Args:
        embed_dim (int): :math:`C` from an expected input of size :math:`(N, C, H, W)`
        attn_drop (float): Dropout value for context scores. Default: 0.0
        bias (bool): Use bias in learnable layers. Default: True
    Shape:
        - Input: :math:`(N, C, P, N)` where :math:`N` is the batch size, :math:`C` is the input channels,
        :math:`P` is the number of pixels in the patch, and :math:`N` is the number of patches
        - Output: same as the input
    .. note::
        For MobileViTv2, we unfold the feature map [B, C, H, W] into [B, C, P, N] where P is the number of pixels
        in a patch and N is the number of patches. Because channel is the first dimension in this unfolded tensor,
        we use point-wise convolution (instead of a linear layer). This avoids a transpose operation (which may be
        expensive on resource-constrained devices) that may be required to convert the unfolded tensor from
        channel-first to channel-last format in case of a linear layer.
    rt   T	embed_dimr~   r   r   r   Nc                 :   t                                                       || _        t          j        |dd|z  z   |d          | _        t          j        |          | _        t          j        |||d          | _        t          j        |          | _	        d S )Nr   r4   )in_channelsout_channelsr   rw   )
r   r   r   r   r   qkv_projDropoutr~   out_projout_drop)r   r   r~   r   r   r   s        r(   r   zLinearSelfAttention.__init__$  s     	"	!a)m,	
 
 
 I..	!"	
 
 
 
9--r*   r   c                    |                      |          }|                    d| j        | j        gd          \  }}}t          j        |d          }|                     |          }||z                      dd          }t          j        |          |                    |          z  }| 	                    |          }| 
                    |          }|S )Nr   r   r   Tr   keepdim)r   splitr   r   softmaxr~   sumrelu	expand_asr   r   )	r   r   qkvquerykeyvaluecontext_scorescontext_vectorouts	            r(   _forward_self_attnz&LinearSelfAttention._forward_self_attn=  s    mmA
  IIq$.$.&IqIQQsE 5b11177 .33D3II fUmmn66u===mmC  mmC  
r*   x_prevc                    |j         \  }}}}|j         dd          \  }}||k    s
J d            t          j        || j        j        d | j        dz            | j        j        d | j        dz                      }	|	                    d| j        gd          \  }
}t          j        || j        j        | j        dz            | j        j        | j        j        | j        dz            nd           }t          j        |
d          }| 	                    |          }||z  
                    dd          }t          j        |          |                    |          z  }|                     |          }|                     |          }|S )	NzJThe number of pixels in a patch for query and key_value should be the samer   )weightr   r   r   Tr   )r   r   conv2dr   r   r   r   r   r   r~   r   r   r   r   r   )r   r   r   
batch_sizein_dimkv_patch_areakv_num_patchesq_patch_areaq_num_patchesqkr   r   r   r   r   r   s                   r(   _forward_cross_attnz'LinearSelfAttention._forward_cross_attnU  s    =>G9
FM>&'gbccl#m \)))W *))
 X='(;!);(;<#$7T^a%7$78
 
 
 XXq$.1qX99
s='(:;;?=;M;Y#DNQ$677_c
 
 
 5b11177 .33D3II fUmmn66u===mmC  mmC  
r*   c                 ^    ||                      |          S |                     ||          S )N)r   )r   r   )r   r   r   s      r(   r   zLinearSelfAttention.forward  s4    >**1---++Af+===r*   )rt   rt   TN)r   r   r   r   rC   r   r   r   r   r   r   jitignorer   r   r   r   r   s   @r(   r   r     s?        , . .. . 	.
 . 
. . . . . .2EL U\    0 Y( (U\ (8EL;Q (]b]i ( ( ( (T> > >x/E >QVQ] > > > > > > > >r*   r   c                        e Zd ZdZ	 	 	 	 	 	 ddedededed	ed
df fdZddej        de	ej                 d
ej        fdZ
 xZS )LinearTransformerBlockaF  
    This class defines the pre-norm transformer encoder with linear self-attention in `MobileViTv2 paper <>`_
    Args:
        embed_dim (int): :math:`C_{in}` from an expected input of size :math:`(B, C_{in}, P, N)`
        mlp_ratio (float): Inner dimension ratio of the FFN relative to embed_dim
        drop (float): Dropout rate. Default: 0.0
        attn_drop (float): Dropout rate for attention in multi-head attention. Default: 0.0
        drop_path (float): Stochastic depth rate Default: 0.0
        norm_layer (Callable): Normalization layer. Default: layer_norm_2d
    Shape:
        - Input: :math:`(B, C_{in}, P, N)` where :math:`B` is batch size, :math:`C_{in}` is input embedding dim,
            :math:`P` is number of pixels in a patch, and :math:`N` is number of patches,
        - Output: same shape as the input
    r5   rt   Nr   r|   r   r~   r   r   c                    t                                                       |pt          j        }|pt          } ||          | _        t          |||          | _        t          |          | _	         ||          | _
        t          |t          ||z            ||          | _        t          |          | _        d S )N)r   r~   r   )in_featureshidden_featuresrQ   r   )r   r   r   SiLUr	   norm1r   attnr   
drop_path1norm2r
   rC   mlp
drop_path2)	r   r   r|   r   r~   r   rQ   r   r   s	           r(   r   zLinearTransformerBlock.__init__  s     	(	-:
Z	**
')y\`aaa	"9--Z	**
!	I 566	  
 #9--r*   r   r   c                    |?||                      |                     |                     |                              z   }nE|}|                     |          }|                     ||          }|                      |          |z   }||                     |                     |                     |                              z   }|S r   )r  r  r   r  r  r  )r   r   r   ress       r(   r   zLinearTransformerBlock.forward  s    >DOODIIdjjmm$<$<===AA C

1A		!V$$A""S(A A 7 7888r*   )r5   rt   rt   rt   NNr   )r   r   r   r   rC   r   r   r   r   r   r   r   r   s   @r(   r   r     s         $ . .. . 	.
 . . 
. . . . . .4  x/E QVQ]        r*   r   c                        e Zd ZdZddddddddd	d
d
d
defdedee         dededee         deeef         dedee         dededededede	de
f fdZdej        dej        fdZ xZS )MobileVitV2Blockz8
    This class defines the `MobileViTv2 block <>`_
    NrG   r;   r   rr   r5   r4   rs   rt   ru   rv   rw   ry   rz   r{   r|   r.   r/   r0   r~   r   r   r   r   c           	         t          t          |                                            pt                      t	          ||          }|p|}pt          ||z                                |||d||d                   | _        t          j	        |dd          | _
        t          j        fdt          |	          D              | _                   | _                            |ddd          | _        t!          |
          | _        | j        d         | j        d         z  | _        t'                      | _        d S )Nr   r   r   Fr   c                 F    g | ]}t          j                   S ))r|   r~   r   r   rQ   r   )r   r   )	rD   r   r~   r   r   r   r|   r.   r   s	     r(   rF   z-MobileVitV2Block.__init__.<locals>.<listcomp>  sQ     +
 +
 +
  ###( *1  +
 +
 +
r*   )rw   rx   	apply_act)r   r	  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r0   r   r   coreml_exportable)r   ru   rv   rw   ry   rz   r{   r|   r.   r/   r0   r~   r   r   r   r   r   r   r   s          ``  `````  r(   r   zMobileVitV2Block.__init__  s   & 	%%..000$799J//#V)R^L6<Q-R-R,,FVhqk - ; ; 	&/quUUU= +
 +
 +
 +
 +
 +
 +
 +
 +
 +
 ,--+
 +
 +
  +*?;;	--owTU^_kp-qq#J///!,tq/AA!.r*   r   r   c                    |j         \  }}}}| j        \  }}t          j        ||z            |z  t          j        ||z            |z  }	}||z  |	|z  }}
|
|z  }||k    s|	|k    rt	          j        |||	fdd          }|                     |          }|                     |          }|j         d         }| j        rt	          j	        |||f||f          }n2|
                    |||
|||                              ddddd	d
          }|
                    ||d|          }|                     |          }|                     |          }| j        r5|
                    |||z  |z  |
|          }t	          j        ||          }nP|
                    |||||
|                              ddd
d	dd          }|
                    |||
|z  ||z            }|                     |          }|S )Nr   Tr   r   r   r   rG      r4   r+   r   )upscale_factor)r   r0   r   r   r   r   r   r   r  unfoldr   permuter   r   pixel_shuffler   )r   r   r   r   r   r   r   r   r   r   r   r   r   s                r(   r   zMobileVitV2Block.forward  s   W
1a?yW--71w;9O9ORY9Yu#(G#3Ug5E[!K/A::!auen:UYZZZA MM!MM! GAJ! 	f'(:GWCUVVVAA		!QWk7KKSSTUWXZ[]^`acdeeAIIaB,, QIIaLL ! 	N		!Q[72KMMA':::AA		!Q+{KKSSTUWXZ[]^`acdeeA		!Qg 5{W7LMMANN1r*   )r   r   r   r   r	   rC   r   r   r   r   r   r   r   r   r   r   r   s   @r(   r	  r	    sG         "&!$%$*)-!" "+5!01 0101 #01 	01
 01 SM01 S/01 01 "#01 01 01 01 01 01 01  !)!01 01 01 01 01 01d# #%, # # # # # # # #r*   r	  r-   r8   Fc                     t          t          | |f|st          |          nt          |         t          d          d|S NT)flatten_sequential)	model_cfgfeature_cfgr   r   
model_cfgsr&   variantcfg_variant
pretrainedr   s       r(   _create_mobilevitr  #  T    *-8U*W%%j>UD111  	  r*   c                     t          t          | |f|st          |          nt          |         t          d          d|S r  r  r  s       r(   _create_mobilevit2r"  +  r   r*   c                 "    | ddddddddd	d
d|S )Ni  )rG   r?   r?   )rs   rs   g?bicubic)rt   rt   rt   )r;   r;   r;   z	stem.convzhead.fcF)urlnum_classes
input_size	pool_sizecrop_pctinterpolationmeanstd
first_conv
classifierfixed_input_sizerB   )r%  r   s     r(   _cfgr0  3  s7    4}SY)\!!   r*   zmobilevit_xxs.cvnets_in1kztimm/)	hf_hub_idzmobilevit_xs.cvnets_in1kzmobilevit_s.cvnets_in1kzmobilevitv2_050.cvnets_in1kg"~j?)r1  r)  zmobilevitv2_075.cvnets_in1kzmobilevitv2_100.cvnets_in1kzmobilevitv2_125.cvnets_in1kzmobilevitv2_150.cvnets_in1kzmobilevitv2_175.cvnets_in1kzmobilevitv2_200.cvnets_in1kz$mobilevitv2_150.cvnets_in22k_ft_in1kz$mobilevitv2_175.cvnets_in22k_ft_in1kz$mobilevitv2_200.cvnets_in22k_ft_in1kz(mobilevitv2_150.cvnets_in22k_ft_in1k_384)rG   r@   r@   )   r2  )r1  r'  r(  r)  z(mobilevitv2_175.cvnets_in22k_ft_in1k_384z(mobilevitv2_200.cvnets_in22k_ft_in1k_384r   c                     t          dd| i|S )Nre   r  )re   r  r  r   s     r(   re   re   o  s    NNNvNNNr*   c                     t          dd| i|S )Nrf   r  )rf   r4  r5  s     r(   rf   rf   t  s    MM
MfMMMr*   c                     t          dd| i|S )Nrg   r  )rg   r4  r5  s     r(   rg   rg   y  s    LLzLVLLLr*   c                     t          dd| i|S )Nri   r  )ri   r4  r5  s     r(   ri   ri   ~      PP:PPPPr*   c                     t          dd| i|S )Nrj   r  )rj   r4  r5  s     r(   rj   rj     r9  r*   c                     t          dd| i|S )Nrl   r  )rl   r4  r5  s     r(   rl   rl     r9  r*   c                     t          dd| i|S )Nrk   r  )rk   r4  r5  s     r(   rk   rk     r9  r*   c                     t          dd| i|S )Nrm   r  )rm   r4  r5  s     r(   rm   rm     r9  r*   c                     t          dd| i|S )Nrn   r  )rn   r4  r5  s     r(   rn   rn     r9  r*   c                     t          dd| i|S )Nro   r  )ro   r4  r5  s     r(   ro   ro     r9  r*   )mobilevitv2_150_in22ft1kmobilevitv2_175_in22ft1kmobilevitv2_200_in22ft1kmobilevitv2_150_384_in22ft1kmobilevitv2_175_384_in22ft1kmobilevitv2_200_384_in22ft1k)r   )r+   r   )r4   r5   r6   )r;   )NF)rJ   )F)?r   r   typingr   r   r   r   torch.nn.functionalr   
functionalr   timm.layersr   r   r	   r
   r   r   _builderr   _features_fxr   	_registryr   r   r   byobnetr   r   r   r   r   r   vision_transformerr   r   __all__r)   r3   r:   rU   r&   r  Modulerq   r   r   r	  r  r"  r0  default_cfgsre   rf   rg   ri   rj   rl   rk   rm   rn   ro   r   rB   r*   r(   <module>rR     s
  
 
  , , , , , , , , , ,                 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ * * * * * * 1 1 1 1 1 1 Y Y Y Y Y Y Y Y Y Y [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ 9 9 9 9 9 9
< < < <   
 
 
 
   * T+$$qB!<<<$$qB!<<<qB!RSTabgjkkkqB!RSTabgjkkkqB!RSTabgjkkk
     $$qB!444$$qB!444qB!RSTabcccqB!STUbcdddqB!STUbcddd
     $$qB!444$$qB!444qB!STUbcdddqC1cUVcdeeeqC1cUVcdeee
     +$$qB!444$$qB!444qB!STUbcdddqC1cUVcdeeeqC1cUVcdeee
 D#&&&  " %$S))$$S))$$T**$$S))$$S))$$T**$$S))QI I I
X h h h h hRY h h hVt> t> t> t> t>") t> t> t>n7 7 7 7 7RY 7 7 7t Z Z Z Z Zry Z Z Zz {N + + + |- . . .          %$ .&!8!8!8.&w 7 7 7.& ttg666.&
 "44$ $ $.& "44$ $ $.& "44$ $ $.& "44$ $ $.&" "44$ $ $#.&( "44$ $ $).&. "44$ $ $/.&6 +DD- - -7.&< +DD- - -=.&B +DD- - -C.&J / Hs1D 1D 1DK.&P / Hs1D 1D 1DQ.&V / Hs1D 1D 1DW.& . .b O O O O O O N N N N N N M Mw M M M M Q Q7 Q Q Q Q Q Q7 Q Q Q Q Q Q7 Q Q Q Q Q Q7 Q Q Q Q Q Q7 Q Q Q Q Q Q7 Q Q Q Q Q Q7 Q Q Q Q  H F F F$N$N$N' '     r*   