
    Ng                        d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	m
Z
mZmZmZ ddlZddlmc mZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZ d
dlmZ d
dlmZ d
dl m!Z! d
dl"m#Z#m$Z$m%Z% ddgZ&e G d d                      Z'd Z( G d dej)                  Z*e!	 dUdee+         de,deej-        eej-                 f         fd            Z.e!	 dVde+deej-                 deej-        ee+         f         fd            Z/e!dej-        dej-        de,dee+         d ee+         d!ej-        d"ej-        fd#            Z0 G d$ d%ej)                  Z1 G d& d'ej)                  Z2 G d( d)ej)                  Z3 G d* d+ej)                  Z4 G d, dej)                  Z5d- Z6 e7 e'd./           e'd0/           e'd1/           e'd2d3d4d56           e'd0d7           e'd1d7           e'd2d3d4dd8           e'd9d:d;dd8          <          Z8dWd=Z9dXd?Z: e% e:d@dAB           e:dCdAB           e:dDdAB           e:dEdAB           e:d>F           e:dGdAdHI           e:dJdAdHI           e:dKdAdHI          dL          Z;e#dYde5fdM            Z<e#dYde5fdN            Z=e#dYde5fdO            Z>e#dYde5fdP            Z?e#dYde5fdQ            Z@e#dYde5fdR            ZAe#dYde5fdS            ZBe#dYde5fdT            ZCdS )Za   Multi-Scale Vision Transformer v2

@inproceedings{li2021improved,
  title={MViTv2: Improved multiscale vision transformers for classification and detection},
  author={Li, Yanghao and Wu, Chao-Yuan and Fan, Haoqi and Mangalam, Karttikeya and Xiong, Bo and Malik, Jitendra and Feichtenhofer, Christoph},
  booktitle={CVPR},
  year={2022}
}

Code adapted from original Apache 2.0 licensed impl at https://github.com/facebookresearch/mvit
Original copyright below.

Modifications and timm support by / Copyright 2022, Ross Wightman
    N)OrderedDict)	dataclass)partialreduce)UnionListTupleOptional)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpDropPathtrunc_normal_tf_get_norm_layer	to_2tuple   )build_model_with_cfg)feature_take_indices)register_notrace_function)register_modelregister_model_deprecationsgenerate_default_cfgsMultiScaleVitMultiScaleVitCfgc                      e Zd ZU dZeedf         ed<   dZeeeedf         f         ed<   dZ	eeeedf         f         ed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   d
Zeed<   d
Zeed<   dZeed<   dZeed<   dZeeef         ed<   dZeeeeef                           ed<   dZeeeeef                           ed<   dZeeeef                  ed<   dZeeef         ed<   dZeeef         ed<   dZeeef         ed<   d Zeed!<   d"Zeed#<   d$Zeeeeef         f         ed%<   d&Z eeeeef         f         ed'<   d(Z!eed)<   d* Z"dS )+r            r    .depths`   	embed_dimr   	num_heads      @	mlp_ratioF
pool_firstTexpand_attnqkv_biasuse_cls_tokenuse_abs_posresidual_poolingconvmoder    r    
kernel_qkv)r   r   r   r   r3   r3   stride_qN	stride_kv   r7   stride_kv_adaptive   r:   patch_kernelpatch_stridepatch_paddingmax	pool_typespatialrel_pos_typegelu	act_layer	layernorm
norm_layergư>norm_epsc                 *    t           j                  }t           j        t          t
          f          s-t	           fdt          |          D                        _        t           j                  |k    sJ t           j        t          t
          f          s-t	           fdt          |          D                        _        t           j                  |k    sJ  j         j	         j        g }t          |          D ]lt           j                           dk    r* fdt          t                              D             |                    t	                               mt	          |           _	        d S d S d S )Nc              3   2   K   | ]}j         d |z  z  V  dS r   N)r$   .0iselfs     N/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/mvitv2.py	<genexpr>z1MultiScaleVitCfg.__post_init__.<locals>.<genexpr>A   .      "V"Vq4>AF#:"V"V"V"V"V"V    c              3   2   K   | ]}j         d |z  z  V  dS rI   )r%   rJ   s     rN   rO   z1MultiScaleVitCfg.__post_init__.<locals>.<genexpr>E   rP   rQ   r   c                 d    g | ],}t          |         j                 |         z  d           -S r   )r>   r4   )rK   d
_stride_kvrL   rM   s     rN   
<listcomp>z2MultiScaleVitCfg.__post_init__.<locals>.<listcomp>M   sH     " " " JqMT]1-=a-@@!DD" " "rQ   )lenr"   
isinstancer$   tuplelistranger%   r8   r5   minr4   append)rM   
num_stagespool_kv_striderV   rL   s   `  @@rN   __post_init__zMultiScaleVitCfg.__post_init__>   s   %%
$.5$-88 	W""V"V"V"VE*DUDU"V"V"VVVDN4>""j0000$.5$-88 	W""V"V"V"VE*DUDU"V"V"VVVDN4>""j0000".4>3I0JN:&& 9 9t}Q'((1,," " " " " "!&s:!7!7" " "J %%eJ&7&78888">22DNNN /.3I3IrQ   )#__name__
__module____qualname__r"   r	   int__annotations__r$   r   r%   r'   floatr(   boolr)   r*   r+   r,   r-   r/   strr1   r4   r
   r5   r8   r;   r<   r=   r?   rA   rC   rE   rF   ra    rQ   rN   r   r   $   s8        +FE#s(O+++-/IuS%S/)*///-.IuS%S/)*...IuJKHdM4K!d!!!D#"(Jc3h(((1QHhuU38_-.QQQ26IxeCHo./6664:sCx1:::$*L%S/***$*L%S/***%+M5c?+++Is!L#!!!-3IuS%S/)*333.9Jc5c?*+999He3 3 3 3 3rQ   c                 8    t          t          j        | d          S )Nr   )r   operatormul)iterables    rN   prodro   U   s    (,!,,,rQ   c                   `     e Zd ZdZ	 	 	 	 	 d
 fd	Zdeej        ee	         f         fd	Z
 xZS )
PatchEmbedz
    PatchEmbed.
    r       r9   r6   r0   c                     t                                                       t          j        |||||          | _        d S )N)kernel_sizestridepadding)super__init__r   Conv2dproj)rM   dim_indim_outkernelru   rv   	__class__s         rN   rx   zPatchEmbed.__init__^   sF     	I
 
 
			rQ   returnc                     |                      |          }|                    d                              dd          |j        dd          fS )Nr   r   )rz   flatten	transposeshaperM   xs     rN   forwardzPatchEmbed.forwardp   s@    IIaLLyy||%%a++QWRSS\99rQ   )r    rr   r9   r6   r0   )rb   rc   rd   __doc__rx   r	   torchTensorr   re   r   __classcell__r~   s   @rN   rq   rq   Y   s          
 
 
 
 
 
$:E%,S	"9: : : : : : : : :rQ   rq   T	feat_sizehas_cls_tokenr   c                    |\  }}|r)| d d d d d dd d f         | d d d d dd d d f         } }nd }|                      d||| j        d                                       dddd                                          } | |fS )Nr   r   r    r   )reshaper   permute
contiguous)r   r   r   HWcls_toks         rN   reshape_pre_poolr   v   s     DAq qqq!!!RaR{^Qqqq!!!QRR{^			"aAGBK((00Aq!<<GGIIAg:rQ   r%   r   c                    | j         d         | j         d         g}| j         d         | j         d         z  }|                     d|| j         d         |                              dd          } |t          j        || fd          } | |fS )Nr   r    r   r   dim)r   r   r   r   cat)r   r%   r   r   L_pooleds        rN   reshape_post_poolr      s     QWQZ(IwqzAGAJ&H			"iX66@@AFFAIwl***i<rQ   attnqq_sizek_size	rel_pos_h	rel_pos_wc                    |rdnd}|\  }}	|\  }
}t          |
|z  d          }t          ||
z  d          }t          j        ||j                                      d          |z  t          j        |
|j                                      d          |z  z
  }||
dz
  |z  z  }t          ||	z  d          }t          |	|z  d          }t          j        |	|j                                      d          |z  t          j        ||j                                      d          |z  z
  }||dz
  |z  z  }||                                         }||                                         }|j        \  }}}}|dddd|df                             ||||	|          }t          j        d||          }t          j        d||          }| dddd|d|df         	                    |d||	|
|          |                    d          z   |                    d	          z   	                    |d||	z  |
|z            | dddd|d|df<   | S )
z1
    Spatial Relative Positional Embeddings.
    r   r   g      ?)devicer   Nzbyhwc,hkc->byhwkzbyhwc,wkc->byhwkr   )
r>   r   aranger   	unsqueezelongr   r   einsumview)r   r   r   r   r   r   r   sp_idxq_hq_wk_hk_w	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_wrel_hrel_wBn_headq_Nr   r_qs                            rN   cal_rel_pos_typer      s     &QQQFHCHC C#Is##IC#Is##ILQX...88<<yHLQX...88;;iGH  sQw)##FC#Is##IC#Is##ILQX...88<<yHLQX...88;;iGH  sQw)##Ffkkmm$Efkkmm$E'AvsC
AAAqqq&''M

"
"1fc3
<
<CL+S%88EL+S%88E 	QQQ677FGG#$))!Rc3DD
//"

	
//"

	 d1b#)S3Y''	 	AAAvww	  KrQ   c                   X     e Zd Zddddddddddej        f fd	Zdee         fdZ xZ	S )	MultiScaleAttentionPoolFirst   Tr.   r2   r@   c           	      j   t                                                       || _        || _        ||z  | _        | j        dz  | _        || _        t          d |D                       }t          d |D                       }t          j	        |||          | _
        t          j	        |||          | _        t          j	        |||          | _        t          j	        ||          | _        t          |          dk    rt          |	          dk    rd }t          |          dk    rt          |
          dk    rd }|| _        |dk    | _        d\  | _        | _        | _        d\  | _        | _        | _        |dv rY|d	k    rt          j        nt          j        }|r |||	|          | _        |r$ |||
|          | _         |||
|          | _        n|d
k    s|dk    r|d
k    r||z  n|}|r0t          j        ||||	||d          | _         ||          | _        |r`t          j        ||||
||d          | _         ||          | _        t          j        ||||
||d          | _         ||          | _        nt5          d|           || _        | j        dk    r |d         |d         k    sJ |d         }t9          |	          dk    r||	d         z  n|}t9          |
          dk    r||
d         z  n|}dt;          ||          z  dz
  }t          j        t?          j         || j                            | _!        t          j        t?          j         || j                            | _"        tG          | j!        d           tG          | j"        d           || _$        d S )N      c                 2    g | ]}t          |d z            S r   re   rK   r   s     rN   rW   z9MultiScaleAttentionPoolFirst.__init__.<locals>.<listcomp>   "    99913qAv;;999rQ   c                 2    g | ]}t          |d z            S r   r   rK   kvs     rN   rW   z9MultiScaleAttentionPoolFirst.__init__.<locals>.<listcomp>   "    ===RCaLL===rQ   biasr   conv_unsharedNNNavgr>   r>   r.   Fru   rv   groupsr   Unsupported model r@   r   r   {Gz?std)%rw   rx   r%   r|   head_dimscaler   rZ   r   Linearr   kvrz   ro   r/   unsharedpool_qpool_kpool_vnorm_qnorm_knorm_v	MaxPool2d	AvgPool2dry   NotImplementedErrorrA   rX   r>   	Parameterr   zerosr   r   r   r-   rM   r   r|   r   r%   r*   r/   kernel_q	kernel_kvr4   r5   r   rA   r-   rE   	padding_q
padding_kvpool_opdim_convsizer   kv_size
rel_sp_dimr~   s                          rN   rx   z%MultiScaleAttentionPoolFirst.__init__   s   " 	"9,]d*
*99999::	==9===>>
3h7773h7773h777Igw//	 >>Q4>>Q#6#6H	??aDOOq$8$8I	/0@-T[$+0@-T[$+>!!&*emmbllG E%gh)DD H%giJGG%giJGGV^^t66+/6>>si''sH 
3 i#%#   )j22 3 i$&#   )j22 i$&#   )j22%&A4&A&ABBB )	))Q<9Q<////Q<D,/MMA,=,=TXa[((4F.1)nnq.@.@dil**dGS111A5J\%+j$-*P*PQQDN\%+j$-*P*PQQDNT^6666T^6666 0rQ   r   c           	         |j         \  }}}| j        rdn| j        }|                    |||d                              dddd          }|x}x}}	| j        Ht          ||| j                  \  }}
|                     |          }t          || j        |
          \  }}n|}| j	        | 	                    |          }| j
        Ht          ||| j                  \  }}| 
                    |          }t          || j        |          \  }}n|}| j        |                     |          }| j        Ht          |	|| j                  \  }	}|                     |	          }	t          |	| j        |          \  }	}n|}| j        |                     |	          }	|d         |d         z  t          | j                  z   }|                    dd                              ||d          }|                     |                              ||| j        d                              dd          }|d         |d         z  t          | j                  z   }|                    dd                              ||d          }|                     |                              ||| j        d          }|d         |d         z  t          | j                  z   }|	                    dd                              ||d          }	|                     |	                              ||| j        d                              dd          }	|| j        z  |z  }| j        dk    r$t+          ||| j        ||| j        | j                  }|                    d          }||	z  }| j        r||z   }|                    dd                              |d| j                  }|                     |          }||fS )Nr   r   r   r   r    r@   r   )r   r   r%   r   r   r   r   r   r   r   r   r   r   r   re   r   r   r   r   r   rA   r   r   r   softmaxr-   r|   rz   )rM   r   r   r   N_fold_dimr   r   r   q_tokr   k_tokr   v_tokv_sizer   k_Nv_Nr   s                       rN   r   z$MultiScaleAttentionPoolFirst.forward+  s   '1a9114>IIaHb))11!Q1==A;"'9d6HIIHAuAA)!T^UCCIAvvF;"AA;"'9d6HIIHAuAA)!T^UCCIAvvF;"AA;"'9d6HIIHAuAA)!T^UCCIAvvF;"AAQi&)#c$*<&=&==KK1%%ab11FF1IIadnb99CCAqIIQi&)#c$*<&=&==KK1%%ab11FF1IIadnb99Qi&)#c$*<&=&==KK1%%ab11FF1IIadnb99CCAqIIDJ!#	))#" D |||##1H  	AAKK1%%aT\::IIaLL&yrQ   
rb   rc   rd   r   	LayerNormrx   r   re   r   r   r   s   @rN   r   r      s         "!|b1 b1 b1 b1 b1 b1HBDI B B B B B B B BrQ   r   c                   X     e Zd Zddddddddddej        f fd	Zdee         fdZ xZ	S )	MultiScaleAttentionr   Tr.   r2   r@   c           	          t                                                       || _        || _        ||z  | _        | j        dz  | _        || _        t          d |D                       }t          d |D                       }t          j	        ||dz  |          | _
        t          j	        ||          | _        t          |          dk    rt          |	          dk    rd }t          |          dk    rt          |
          dk    rd }|| _        |dk    | _        d\  | _        | _        | _        d\  | _        | _        | _        |d	v rY|d
k    rt          j        nt          j        }|r |||	|          | _        |r$ |||
|          | _         |||
|          | _        n|dk    s|dk    r|dk    r||z  n|}|r0t          j        ||||	||d          | _         ||          | _        |r`t          j        ||||
||d          | _         ||          | _        t          j        ||||
||d          | _         ||          | _        nt1          d|           || _        | j        dk    r |d         |d         k    sJ |d         }t5          |	          dk    r||	d         z  n|}t5          |
          dk    r||
d         z  n|}dt7          ||          z  dz
  }t          j        t;          j        || j                            | _        t          j        t;          j        || j                            | _         tC          | j        d           tC          | j         d           || _"        d S )Nr   c                 2    g | ]}t          |d z            S r   r   r   s     rN   rW   z0MultiScaleAttention.__init__.<locals>.<listcomp>  r   rQ   c                 2    g | ]}t          |d z            S r   r   r   s     rN   rW   z0MultiScaleAttention.__init__.<locals>.<listcomp>  r   rQ   r    r   r   r   r   r   r>   r.   Fr   r   r@   r   r   r   r   )#rw   rx   r%   r|   r   r   r   rZ   r   r   qkvrz   ro   r/   r   r   r   r   r   r   r   r   r   ry   r   rA   rX   r>   r   r   r   r   r   r   r-   r   s                          rN   rx   zMultiScaleAttention.__init__q  s   " 	"9,]d*
*99999::	==9===>>
9S'A+H===Igw//	 >>Q4>>Q#6#6H	??aDOOq$8$8I	/0@-T[$+0@-T[$+>!!&*emmbllG E%gh)DD H%giJGG%giJGGV^^t66/3v~~w)++7H 
3 i#%#   )j22 3 i$&#   )j22 i$&#   )j22%&A4&A&ABBB )	))Q<9Q<////Q<D,/MMA,=,=TXa[((4F.1)nnq.@.@dil**dGS111A5J\%+j$-*P*PQQDN\%+j$-*P*PQQDNT^6666T^6666 0rQ   r   c           	         |j         \  }}}|                     |                              ||d| j        d                              ddddd          }|                    d          \  }}}	| j        Ht          ||| j                  \  }}
|                     |          }t          || j        |
          \  }}n|}| j
        | 
                    |          }| j        Ht          ||| j                  \  }}|                     |          }t          || j        |          \  }}n|}| j        |                     |          }| j        Gt          |	|| j                  \  }	}|                     |	          }	t          |	| j        |          \  }	}| j        |                     |	          }	|| j        z  |                    dd          z  }| j        d	k    r$t%          ||| j        ||| j        | j                  }|                    d          }||	z  }| j        r||z   }|                    dd                              |d| j                  }|                     |          }||fS )
Nr    r   r   r   r   r7   r   r   r@   )r   r  r   r%   r   unbindr   r   r   r   r   r   r   r   r   r   r   rA   r   r   r   r   r-   r|   rz   )rM   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   s                   rN   r   zMultiScaleAttention.forward  sU   '1ahhqkk!!!Q4>2>>FFq!QPQSTUU***##1a;"'9d6HIIHAuAA)!T^UCCIAvvF;"AA;"'9d6HIIHAuAA)!T^UCCIAvvF;"AA;"'9d6HIIHAuAA$Q>>DAq;"AADJ!++b""5"55	))#" D |||##1H  	AAKK1%%aT\::IIaLL&yrQ   r   r   s   @rN   r   r   p  s         "!|`1 `1 `1 `1 `1 `1D3DI 3 3 3 3 3 3 3 3rQ   r   c                   v     e Zd Zdddej        ddddddddddf fd	Zd	ee         fd
Zd	ee         fdZ	 xZ
S )MultiScaleBlockr&   T        r2   r.   Fr@   c                 P   t                                                       ||k    }|| _        || _        || _         ||          | _        |r|rt          j        ||          nd | _        |rIt          |          dk    r6d |D             }|}d |D             }t          j
        |||          | _        nd | _        |r|n|}|rt          nt          } |||||||	|
|||||||          | _        |dk    rt          |          nt          j                    | _         ||          | _        |}|r|st          j        ||          nd | _        t)          |t+          ||z            |          | _        |dk    rt          |          nt          j                    | _        d S )Nr   c                 (    g | ]}|d k    r|d z   n|S rT   rj   )rK   ss     rN   rW   z,MultiScaleBlock.__init__.<locals>.<listcomp>)  s(    CCCQAEE1q55qCCCrQ   c                 2    g | ]}t          |d z            S r   r   )rK   skips     rN   rW   z,MultiScaleBlock.__init__.<locals>.<listcomp>+  s"    CCCtC	NNCCCrQ   )r%   r   r*   r   r   r4   r5   rE   r   r/   rA   r-   r	  )in_featureshidden_featuresout_features)rw   rx   r   r|   r   norm1r   r   shortcut_proj_attnro   r   shortcut_pool_attnr   r   r   r   Identity
drop_path1norm2shortcut_proj_mlpr   re   mlp
drop_path2)rM   r   r|   r%   r   r'   r*   	drop_pathrE   r   r   r4   r5   r/   r   r)   r(   rA   r-   proj_neededkernel_skipstride_skippadding_skipatt_dim
attn_layermlp_dim_outr~   s                             rN   rx   zMultiScaleBlock.__init__
  s   * 	Wn*Z__
=H"b["b")C"9"9"9^b 	+X**CC(CCCK"KCC{CCCL&(l;\&Z&ZD##&*D#(1''c5?X11EX
J!'%-
 
 
	  2;S(9---bkmmZ((
<G!eP[!e3!8!8!8ae) 344$
 
 

 2;S(9---bkmmrQ   r   c                    | j         |S | j        r#|d d d dd d f         |d d dd d d f         }}nd }|j        \  }}}|\  }}|                    ||||                              dddd                                          }|                      |          }|                    ||d                              dd          }|t          j        ||fd          }|S )Nr   r   r    r   r   r   )	r  r   r   r   r   r   r   r   r   )	rM   r   r   r   r   LCr   r   s	            rN   _shortcut_poolzMultiScaleBlock._shortcut_poolN  s    "*H 	111bqb!!!8a122qqqkQGGG'1a1IIaAq!!))!Q155@@BB##A&&IIaB))!Q//	7A,A...ArQ   c                    |                      |          }| j        |n|                     |          }|                     ||          }|                     ||          \  }}||                     |          z   }|                     |          }| j        |n|                     |          }||                     |                     |                    z   }||fS N)	r  r  r&  r   r  r  r  r  r  )rM   r   r   x_norm
x_shortcutfeat_size_news         rN   r   zMultiScaleBlock.forward^  s    A19QQt?V?VW]?^?^
((Y??
99VY77=+++A08QQd>T>TU[>\>\
&)9)9:::-rQ   )rb   rc   rd   r   r   rx   r   re   r&  r   r   r   s   @rN   r  r  	  s         |"!'BT BT BT BT BT BTH49      DI                rQ   r  c                   ^     e Zd Zddddddddddddej        df fd	Zd	ee         fd
Z xZ	S )MultiScaleVitStager&   Tr.   r2   Fr@   r	  c                 f   t                                                       d| _        t          j                    | _        |r|f|z  }n|f|dz
  z  |fz   }t          |          D ]}t          di d|d||         d|d|d|d|d	|	d
|
d|dk    r|ndd|d|d|d|d|d|d|d|dt          |t          t          f          r||         n|}||         }| j                            |           |dk    r't          d t          ||          D                       }|| _        d S )NFr   r   r|   r%   r   r'   r*   r   r   r4   r   r2   r5   r/   r   r(   rA   r-   r)   rE   r  c                     g | ]
\  }}||z  S rj   rj   )rK   r   ru   s      rN   rW   z/MultiScaleVitStage.__init__.<locals>.<listcomp>  s     "_"_"_ldF46>"_"_"_rQ   rj   )rw   rx   grad_checkpointingr   
ModuleListblocksr\   r  rY   r[   rZ   r^   zipr   )rM   r   r|   depthr%   r   r'   r*   r/   r   r   r4   r5   r   r)   r(   rA   r-   rE   r  out_dimsrL   attention_blockr~   s                          rN   rx   zMultiScaleVitStage.__init__n  s   , 	"'moo 	9zE)HHv+wj8Hu 	a 	aA-   C  $) $)	
 $) " " $) &'!VV $) T ,m &: *\ "2!1  (K!" &:#$ +5Yu*N*N])A,,T]%O( 1+CK///Avv!"_"_c)U]F^F^"_"_"_``	"rQ   r   c                     | j         D ]P}| j        r8t          j                                        st          j        |||          \  }}A |||          \  }}Q||fS r(  )r2  r0  r   jitis_scripting
checkpoint)rM   r   r   blks       rN   r   zMultiScaleVitStage.forward  sn    ; 	1 	1C& 1uy/E/E/G/G 1)4S!YGG99"s1i0099)|rQ   r   r   s   @rN   r-  r-  l  s         "!|)9# 9# 9# 9# 9# 9#vDI        rQ   r-  c                   T    e Zd ZdZ	 	 	 	 	 	 d(dedeeef         d	ed
ee         dede	de	f fdZ
d Zej        j        d             Zej        j        d)d            Zej        j        d*d            Zej        j        dej        fd            Zd+ded
ee         fdZ	 	 	 	 	 d,dej        deeeee         f                  dededededeeej                 eej        eej                 f         f         fdZ	 	 	 d-deeee         f         d!ed"efd#Zd$ Zd)d%efd&Zd' Z xZS ).r   a  
    Improved Multiscale Vision Transformers for Classification and Detection
    Yanghao Li*, Chao-Yuan Wu*, Haoqi Fan, Karttikeya Mangalam, Bo Xiong, Jitendra Malik,
        Christoph Feichtenhofer*
    https://arxiv.org/abs/2112.01526

    Multiscale Vision Transformers
    Haoqi Fan*, Bo Xiong*, Karttikeya Mangalam*, Yanghao Li*, Zhicheng Yan, Jitendra Malik,
        Christoph Feichtenhofer*
    https://arxiv.org/abs/2104.11227
       r>  r    N  r	  cfgimg_sizein_chansglobal_poolnum_classesdrop_path_rate	drop_ratec           
         t                                                       t          |          }t          t	          |j                  |j                  }|| _        || _        ||j	        rdnd}|| _
        t          |j                  | _        |j        | _        |j        d         }	t          ||	|j        |j        |j                  | _        |d         |j        d         z  |d         |j        d         z  f}
t)          |
          }|j	        r:t+          j        t/          j        dd|	                    | _        d| _        |dz   }nd| _        d | _        |}|j        r.t+          j        t/          j        d||	                    | _        nd | _        t;          |j                  }|
}t=          |j                  }d t/          j        d|tA          |j                            !                    |j                  D             }t+          j"                    | _#        g | _$        tK          |          D ]D}|j        r|j        |         }n!|j        tM          |dz   |dz
                     }tO          d!i d|	d	|d
|j        |         d|j(        |         d|d|j)        d|j*        d|j+        d|j,        d|j        d|j-        d|j-        d|j.        |         d|j/        |         d|j	        d|j0        d|j1        d|d||         }|t=          |j.        |                   z  }| xj$        te          d| ||          gz  c_$        |}	|j3        }| j#        4                    |           F|	x| _5        | _6         ||	          | _7        t+          j8        ts          dt+          j:        | j                  fd|dk    rt+          j;        | j5        |          nt+          j<                    fg                    | _=        | j        t}          | j        d            | j        t}          | j        d            | ?                    | j@                   d S )"N)epstokenr   r   )r{   r|   r}   ru   rv   r   c                 6    g | ]}|                                 S rj   )tolist)rK   r   s     rN   rW   z*MultiScaleVit.__init__.<locals>.<listcomp>  s     hhhaqxxzzhhhrQ   r   r|   r4  r%   r   r'   r*   r/   r(   r)   r   r   r4   r5   r   rA   r-   rE   r  zblock.)modulenum_chs	reductiondropfcr   r   rj   )Arw   rx   r   r   r   rE   rF   rD  rF  r+   rC  rZ   r"   r)   r$   rq   r;   r<   r=   patch_embedro   r   r   r   r   	cls_tokennum_prefix_tokensr,   	pos_embedrX   r>   linspacesumsplitr1  stagesfeature_infor\   r]   r-  r%   r'   r*   r/   r(   r1   r4   r5   rA   r-   dictr   r^   num_featureshead_hidden_sizenorm
Sequentialr   Dropoutr   r  headr   apply_init_weights)rM   r@  rA  rB  rC  rD  rE  rF  rE   r$   
patch_dimsnum_patchespos_embed_dimr_   r   curr_stridedprrL   r|   stager~   s                       rN   rx   zMultiScaleVit.__init__  s    	X&&^CN;;NNN
&"%(%6A''EK&CJ''?M!$	%##%
 
 
 qkS%5a%88(1+IYZ[I\:\]
:&& 	(\%+aI*F*FGGDN%&D"'!OMM%&D"!DN'M? 	"\%+a	*R*RSSDNN!DN''
	#*++hh5>!^S__#U#U#[#[\_\f#g#ghhhmooz"" 	& 	&A D-*-AE:>(B(BC&   I jmm -**	
 $) --  XX >>  OO  .. a -** "//  !--!" "%!5!5#$ &:%& a&&'E* 3s|A///K$lqllGWb"c"c"c!ddIIKu%%%%4==D1Jy))	M+RZ//0a29T.<<<UWU`UbUbc/
 # #  	
 >%T^6666>%T^6666

4%&&&&&rQ   c                     t          |t          j                  r^t          |j        d           t          |t          j                  r0|j        +t          j                            |j        d           d S d S d S d S )Nr   r   r	  )rY   r   r   r   weightr   init	constant_)rM   ms     rN   rb  zMultiScaleVit._init_weights   s    a## 	/QX40000!RY'' /AF,>!!!&#.....	/ 	// /,>,>rQ   c                 >    d |                                  D             S )Nc                 P    h | ]"\  }t          fd dD                        #S )c              3       K   | ]}|v V  	d S r(  rj   )rK   nr   s     rN   rO   z:MultiScaleVit.no_weight_decay.<locals>.<setcomp>.<genexpr>)  s'      \\!qAv\\\\\\rQ   )rT  r   r   rR  )any)rK   r   r   s     @rN   	<setcomp>z0MultiScaleVit.no_weight_decay.<locals>.<setcomp>(  sV     ^ ^ ^da\\\\'[\\\\\^ ^ ^ ^rQ   )named_parametersrM   s    rN   no_weight_decayzMultiScaleVit.no_weight_decay&  s4    ^ ^d3355 ^ ^ ^ 	^rQ   Fc                 ,    t          dddg          }|S )Nz^patch_embed)z^stages\.(\d+)N)z^norm)i )stemr2  )rZ  )rM   coarsematchers      rN   group_matcherzMultiScaleVit.group_matcher+  s)     -/CD
 
 
 rQ   Tc                 (    | j         D ]	}||_        
d S r(  )rX  r0  )rM   enabler  s      rN   set_grad_checkpointingz$MultiScaleVit.set_grad_checkpointing3  s(     	* 	*A#)A  	* 	*rQ   r   c                     | j         j        S r(  )r`  rP  ru  s    rN   get_classifierzMultiScaleVit.get_classifier8  s    y|rQ   c           
         || _         ||| _        t          j        t	          dt          j        | j                  fd|dk    rt          j        | j        |          nt          j	                    fg                    | _
        d S )NrO  rP  r   )rD  rC  r   r^  r   r_  rF  r   r[  r  r`  )rM   rD  rC  s      rN   reset_classifierzMultiScaleVit.reset_classifier<  s    &"*DM+RZ//0a29T.<<<UWU`UbUbc/
 # #  			rQ   NCHWr   indicesr]  
stop_early
output_fmtintermediates_onlyc                 $   |dv s
J d            |dk    }g }t          t          | j                  |          \  }	}
|                     |          \  }}|j        d         }| j        4| j                            |dd          }t          j        ||fd          }| j	        
|| j	        z   }t          | j                  D ]\  }} |||          \  }}||	v r|r1|t          | j                  dz
  k    r|                     |          }n|}|rO| j        |ddddf         }|                    ||d         |d         d                              dd	dd
          }|                    |           |r|S |                     |          }||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  NLCz!Output shape must be NCHW or NLC.r  r   Nr   r   r   r    r   )r   rX   rX  rQ  r   rR  expandr   r   rT  	enumerater]  r   r   r^   )rM   r   r  r]  r  r  r  r   intermediatestake_indices	max_indexr   r   
cls_tokensrL   rh  x_inters                    rN   forward_intermediatesz#MultiScaleVit.forward_intermediatesE  s   * _,,,.Q,,,&"6s4;7G7G"Q"Qi ''**9GAJ>%..q"b99J	:q/q111A>%DN"A!$+.. 	. 	.HAu 5I..LAyL    A#dk"2"2Q"677"iillGGG e~1")!!!QRR%.%ooa1y|RPPXXYZ\]_`bcddG$$W--- 	!  IIaLL-rQ   r   
prune_norm
prune_headc                     t          t          | j                  |          \  }}|rt          j                    | _        |r|                     dd           |S )z@ Prune layers not required for specified intermediates.
        r    )r   rX   rX  r   r  r]  r  )rM   r  r  r  r  r  s         rN   prune_intermediate_layersz'MultiScaleVit.prune_intermediate_layers~  s]     #7s4;7G7G"Q"Qi  	&DI 	)!!!R(((rQ   c                 @   |                      |          \  }}|j        \  }}}| j        4| j                            |dd          }t	          j        ||fd          }| j        
|| j        z   }| j        D ]} |||          \  }}|                     |          }|S )Nr   r   r   )	rQ  r   rR  r  r   r   rT  rX  r]  )rM   r   r   r   r   r%  r  rh  s           rN   forward_featureszMultiScaleVit.forward_features  s    ''**9'1a>%..q"b99J	:q/q111A>%DN"A[ 	/ 	/E 5I..LAyyIIaLLrQ   
pre_logitsc                     | j         r>| j         dk    r'|d d | j        d f                             d          }n|d d df         }|r|n|                     |          S )Nr   r   r   )rC  rS  meanr`  )rM   r   r  s      rN   forward_headzMultiScaleVit.forward_head  sq     	5((aaa/000166q99aaadG0qqDIIaLL0rQ   c                 Z    |                      |          }|                     |          }|S r(  )r  r  r   s     rN   r   zMultiScaleVit.forward  s-    !!!$$a  rQ   )r=  r    Nr?  r	  r	  FTr(  )NFFr  F)r   FT) rb   rc   rd   r   r   r	   re   r
   ri   rg   rx   rb  r   r8  ignorerv  r{  r~  r   Moduler  r  r   r   r   rh   r  r  r  r  r   r   r   s   @rN   r   r     s       
 
 )3)-#$&!_' _'!_' CHo_' 	_'
 "#_' _' "_' _' _' _' _' _' _'B/ / / Y^ ^ ^ Y    Y* * * * Y	     C hsm     8<$$',7  7 |7  eCcN347  	7 
 7  7  !%7  
tEL!5tEL7I)I#JJ	K7  7  7  7 v ./$#	 3S	>*  	   "  "1 1$ 1 1 1 1      rQ   c           	         d| v r|                                  D ]}d|v r| |         }|                                |         j        }|j        d         |d         k    rt          j        j                            |                    d|j        d         d                              ddd          |d         d          }|                    d|d                                       dd          | |<   | S dd l	}d	| v r| d	         } t          |d
d           }t          |dd          }|
J d            i dt          |          D ]=\  }	                    fdt          |	z             D                        |	z  >i }
|                                 D ]o\  }}|                    dfd|          }|r|                    dd|          }n|                    dd|          }d|v r|                    dd          }||
|<   p|
S )Nzstages.0.blocks.0.norm1.weightrel_posr   r   r   r   linear)r   r/   model_stater"   r)   Tz3model requires depth attribute to remap checkpointsc                      i | ]
}||z
  fS rj   rj   )rK   rL   	block_idx	stage_idxs     rN   
<dictcomp>z(checkpoint_filter_fn.<locals>.<dictcomp>  s$    aaaA!iY7aaarQ   zblocks\.(\d+)c           	          dt          |                     d                             d          dt          |                     d                             d          S )Nzstages.r   r   z.blocks.)re   group)r   	depth_maps    rN   <lambda>z&checkpoint_filter_fn.<locals>.<lambda>  sV    f	#aggajj// :1 =ffyQTUVU\U\]^U_U_Q`Q`GabcGdff rQ   z stages\.(\d+).blocks\.(\d+).projz&stages.\1.blocks.\2.shortcut_proj_attnz%stages.\1.blocks.\2.shortcut_proj_mlpr`  zhead.projectionhead.fc)keys
state_dictr   r   r   
functionalinterpolater   r   regetattrr  updater\   itemssubreplace)r  modelr   r  dest_rel_pos_shaperel_pos_resizedr  r"   r)   rU   out_dictr   r  r  r  s               @@@rN   checkpoint_filter_fnr    sv   ':55"" 
	e 
	eAA~~$Q-%*%5%5%7%7%:%@"=#'9!'<<<&+h&9&E&E7=+;R@@HHAqQQ/2% 'F ' 'O
 %4$;$;B@RST@U$V$V$^$^_`bc$d$dJqMIII
"".
UHd++F%55KTII!&))  	1aaaaayR[^_R_A`A`aaabbbQ		H  ""  1FFffff 
  	k:<gijkkAA:<fhijjAQ;;		+Y77AOrQ   )r   r      r   )r"   )r   r      r   r   )r      $   r7      r   F)r"   r$   r%   r)   )r"   r+   )r"   r$   r%   r+   r)   )r7   r   <   r      r    )mvitv2_tinymvitv2_smallmvitv2_basemvitv2_largemvitv2_small_clsmvitv2_base_clsmvitv2_large_clsmvitv2_huge_clsc           	          |                     dd          }t          t          | |f|st          |          nt          |         t          t          |d          d|S )Nout_indicesr7   getter)r  feature_cls)	model_cfgpretrained_filter_fnfeature_cfg)popr   r   
model_cfgsr  rZ  )variantcfg_variant
pretrainedkwargsr  s        rN   _create_mvitv2r    sp    **]A..K .9U*W%%j>U1[hGGG    rQ   r  c                 6    | ddd ddt           t          dddd|S )	Nr?  )r    r>  r>  g?bicubiczpatch_embed.projr  T)urlrD  
input_size	pool_sizecrop_pctinterpolationr  r   
first_conv
classifierfixed_input_sizer   )r  r  s     rN   _cfgr    s8    =t%.B(	    rQ   zDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_T_in1k.pythztimm/)r  	hf_hub_idzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_S_in1k.pythzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in1k.pythzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in1k.pyth)r  zEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in21k.pythiJ  )r  r  rD  zEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in21k.pythzEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_H_in21k.pyth)zmvitv2_tiny.fb_in1kzmvitv2_small.fb_in1kzmvitv2_base.fb_in1kzmvitv2_large.fb_in1kr  zmvitv2_base_cls.fb_inw21kzmvitv2_large_cls.fb_inw21kzmvitv2_huge_cls.fb_inw21kc                     t          dd| i|S )Nr  r  )r  r  r  r  s     rN   r  r  ;      IIJI&IIIrQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  @      JJZJ6JJJrQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  E  r  rQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  J  r  rQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  O      NNNvNNNrQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  T      MM
MfMMMrQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  Y  r  rQ   c                     t          dd| i|S )Nr  r  )r  r  r  s     rN   r  r  ^  r  rQ   r  r(  )NF)r  r  )Dr   rl   collectionsr   dataclassesr   	functoolsr   r   typingr   r   r	   r
   r   torch.utils.checkpointutilsr:  r   	timm.datar   r   timm.layersr   r   r   r   r   _builderr   	_featuresr   _features_fxr   	_registryr   r   r   __all__r   ro   r  rq   re   rh   r   r   r   r   r   r   r  r-  r   r  rZ  r  r  r  default_cfgsr  r  r  r  r  r  r  r  rj   rQ   rN   <module>r     s     # # # # # # ! ! ! ! ! ! % % % % % % % % / / / / / / / / / / / /  + + + + + + + + +       A A A A A A A A R R R R R R R R R R R R R R * * * * * * + + + + + + 3 3 3 3 3 3 Y Y Y Y Y Y Y Y Y Y.
/ -3 -3 -3 -3 -3 -3 -3 -3`- - -: : : : : : : ::  # 9  5<%,//0	     +/
 

 %,'
 5<c"#	
 
 
 
 /l/</ / S		/
 S	/ </ </ / / /dg g g g g29 g g gTV V V V V") V V Vr`  `  `  `  ` bi `  `  ` FC C C C C C C CLy y y y yBI y y yx, , ,^ T     "!   !    "!	   &%   %$   &%   %$  A' ' '
T
 
 
 
	 	 	 	 %$4R   !D%k  4$j   D%k   !%S" " " #'$S# # # "&S" " ")& &  6 J J} J J J J K K K K K K J J} J J J J K K K K K K O OM O O O O N N= N N N N O OM O O O O N N= N N N N N NrQ   