
    NgM                    5   d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZmZmZmZ 	 ddlmZ n# e$ r	 ddlmZ Y nw xY wddlZddlmZ ddlmc mZ ddlZddlmZ ddlmZmZm Z m!Z!m"Z"m#Z# dd	l$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 d
dl4m5Z5 d
dl6m7Z7 d
dl8m9Z9m:Z:m;Z; d
dl<m=Z=m>Z>m?Z? dgZ@ ejA        eB          ZC G d dejD                  ZE G d dejD                  ZF G d dejD                  ZG G d dejD                  ZH G d dejD                  ZI G d dejD                  ZJ	 	 	 d_dejK        deLd eMd!eNfd"ZO G d# dejD                  ZPd`d%ejD        d&eLd'dfd(ZQdad%ejD        d&eLd*eRd'dfd+ZSd`d%ejD        d&eLd'dfd,ZTdbd.eLd*eRd'e	fd/ZU	 	 	 	 dcd2ejK        d3ejK        d eMd4eeMeMf         d5eLd6eNd'ejK        fd7ZV ejW                    d`d8ePd9eLd:eLd'dfd;            ZX	 ddd=e
eLejK        f         d8ePd:eLd'e
eLejK        f         fd>ZYd=e
eLejK        f         d8ePd'e
eLejK        f         fd?ZZ	 	 	 ded=e
eLejK        f         d8ePdAeNd5eLd6eNd'e
eLejK        f         fdBZ[d`dCeLd'e
eLef         fdDZ\i dE e\dFG          dH e\            dI e\dFG          dJ e\dKdFd@L          dM e\dNdFd@dOdPQ          dR e\dSdFd@L          dT e\dUdFd@dOdPQ          dV e\dWdFd@L          dX e\dYdFd@dOdPQ          dZ e\d[dFd@L          d\ e\d]dFd@dOdPQ          d^ e\d_dFd@L          d` e\dadFd@dOdPQ          db e\dcdFd@L          dd e\dedFd@L          df e\dgdFd@dOdPQ          dh e\didFj          i dk e\dldFdOdPm          dn e\dodFdOdPm          dp e\dqdFd@L          dr e\dsdFd@dOdPQ          dt e\dudFd@L          dv e\dwdFd@dOdPQ          dx e\dydFd@L          dz e\d{dFd@dOdPQ          d| e\d$}          d~ e\d$}          d e\d$}          d e\d$}          d e\dFd          d e\dFd          d e\dFd          d e\dFd          d e\dFd          i d e\ddFd@d          d e\ddFd@d          d e\ddFd@d          d e\ddFd@d          d e\ddFd@d          d e\ddFd@d          d e\ddFd@d          d e\dd@dF          d e\dd@dF          d e\ddFeed          d e\ddFeed          d e\ddFeed          d e\ddFeed          d e\ddFdeedddP          d e\ddFdeedddP          d e\ddFdeedddP          d e\ddFdeedddP          i d e\ddFdeedddP          d e\ddFdeedddP          d e\ddFdeedddP          d e\ddFdeedddP          d e\ddFddddd          d e\ddFdddd¦          d e\ddFj          d e\dFdddȬɦ          d e\dFddǬ̦          d e\dFdOddάϦ          d e\            d e\dFe"e#Ҧ          d e\dFe"e#dPdOԦ          d e\dFe"e#dPd֬Ԧ          d e\dFe"e#dǬئ          d e\dFe"e#dPdOdάڦ          d e\dFe e!dPئ          i d e\dFe e!dPddάڦ          d e\dFe"e#dPئ          d e\dFe"e#dPddάڦ          d e\e"e#          d e\dFe"e#ddOdάڦ          d e\dFe"e#dǬئ          d e\dFe"e#ddOdάڦ          d e\dFe"e#dPئ          d e\dFe"e#dPddάڦ          d e\dFe"e#Ҧ          d e\dFe"e#dPئ          d e\dFe"e#dPdOdάڦ          d e\dFe e!dPئ          d e\dFe e!dPddάڦ          d e\dFe"e#dPئ          d e\d$e"e#dPddάڦ          d e\dFe"e#Ҧ          i d e\dFe"e#Ҧ          d e\dFe"e#dPdOdάڦ          d e\dFe"e#dPئ          d e\e"e#dȬ          d e\dFe"e#dȬ          d e\dFe e!dPdȬ          d e\dFe"e#dPdȬ          d e\e"e#dȬ          d e\dFe"e#dȬ          d e\dFe"e#dPdȬ          d e\dde"e#d           d e\dde"e#dPd          d e\dde e!dPd          d e\dde"e#dPd	          d
 e\dde"e#dPd	          d e\dde"e#dPd          d e\dde"e#dPd          i d e\dde"e#dPdd          d e\dde"e#dPd          d e\dde"e#dPd          d e\dde"e#dPd          d e\ddde"e#dPd          d e\ddde"e#dPd	          d  e\d!de"e#ddPd"d	#          d$ e\d%d&d'de"e#dPd(          d) e\d*d+d'de"e#dPd(          d, e\d-d.d'de"e#dPd(          d/ e\d0d1d'de"e#dPd	(          d2 e\d3de"e#d4          d5 e\d6de"e#d4          d7 e\d8de"e#dPd9          d: e\d;dde"e#dPdݐd<          d= e\d$ddǐ>          d? e\d$ddǐ>          i d@ e\d$}          dA e\d$}          dB e\d$}          dC e\dFdDe"e#dEdPF          dG e\dFdDe"e#ddPdΐH          dI e\dFdDe"e#dEdPF          dJ e\dFdDe"e#ddPdΐH          dK e\dLd@dFddǐM          dN e\dOd@dFddǐM          dP e\dQd@dFddǐM          dR e\dSd@dFddǐM          dT e\dUd@dFddǐM          dV e\dWd@dFddǐM          dX e\dYd@dFdddZ          d[ e\d\d@dFdddZ          d] e\d^d@dFddǐM          d_ e\d`d@dFddǐM          i da e\dbd@dFddǐM          dc e\ddd@dFdddZ          de e\dfd@dFdddZ          dg e\d$}          dh e\d$}          di e\d$}          dj e\dkdFd'eedl          dm e\dndFd'eedl          do e\dpdFd'eedl          dq e\drd'eeds          dt e\dud'eeds          dv e\dwd'ddPeedx          dy e\dzd'eeds          d{ e\d|dd}          d~ e\dddd          d e\dddd          d e\dddOd          i d e\dddd          d e\dddd          d e\dddOd          d e\ddd}          d e\dddd          d e\ddd"d          d e\dddOd          d e\d|dd}          d e\dddd          d e\dddd          d e\dddOd          d e\dddd          d e\dddd          d e\dddOd          d e\ddd}          d e\dddd          d e\dddd          i d e\dddd          d e\ddd"dPd          d e\dddOdPd          d e\dddddPd          d e\dddddPd          d e\dddddPd          d e\dFd"dPdάϦ          d e\dFd"dPdάϦ          d e\dFddDe"e#d          d e\dFddDe"e#d          d e\dFddDe"e#d          d e\dFddDe"e#d          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFdddǐ          d e\dFddǬ̦          i d e\dFddǬ̦          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFdddǐ          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFdddǐ          d e\dFdddǐ          d e\dFdOdP̦          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFddǬ̦          d e\dFdddǐ          d e\dFdddǐ          d e\dFdOdP̦          d e\dːЦ           e\dːЦ           e\dːЦ           e\dFeeddPdѦ           e\dFddǬ̦           e\dFddǬ̦           e\dFddǬ̦          dӜZ]g dԢZ^e]_                    dՄ e^D                         e=e]          Z]dfdeLdeNd'ePfd؄Z`e>dfdeNd'ePfdل            Zae>dfdeNd'ePfdڄ            Zbe>dfdeNd'ePfdۄ            Zce>dfdeNd'ePfd܄            Zde>dfdeNd'ePfd݄            Zee>dfdeNd'ePfdބ            Zfe>dfdeNd'ePfd߄            Zge>dfdeNd'ePfd            Zhe>dfdeNd'ePfd            Zie>dfdeNd'ePfd            Zje>dfdeNd'ePfd            Zke>dfdeNd'ePfd            Zle>dfdeNd'ePfd            Zme>dfdeNd'ePfd            Zne>dfdeNd'ePfd            Zoe>dfdeNd'ePfd            Zpe>dfdeNd'ePfd            Zqe>dfdeNd'ePfd            Zre>dfdeNd'ePfd            Zse>dfdeNd'ePfd            Zte>dfdeNd'ePfd            Zue>dfdeNd'ePfd            Zve>dfdeNd'ePfd            Zwe>dfdeNd'ePfd            Zxe>dfdeNd'ePfd            Zye>dfdeNd'ePfd            Zze>dfdeNd'ePfd            Z{e>dfdeNd'ePfd            Z|e>dfdeNd'ePfd            Z}e>dfdeNd'ePfd            Z~e>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd             Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd	            Ze>dfdeNd'ePfd
            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd            Ze>dfdeNd'ePfd             Ze>dfdeNd'ePfd!            Ze>dfdeNd'ePfd"            Ze>dfdeNd'ePfd#            Ze>dfdeNd'ePfd$            Ze>dfdeNd'ePfd%            Ze>dfdeNd'ePfd&            Ze>dfdeNd'ePfd'            Ze>dfdeNd'ePfd(            Ze>dfdeNd'ePfd)            Ze>dfdeNd'ePfd*            Ze>dfdeNd'ePfd+            Ze>dfdeNd'ePfd,            Ze>dfdeNd'ePfd-            Ze>dfdeNd'ePfd.            Ze>dfdeNd'ePfd/            Ze>dfdeNd'ePfd0            Ze>dfdeNd'ePfd1            Ze>dfdeNd'ePfd2            Ze>dfdeNd'ePfd3            Ze>dfdeNd'ePfd4            Ze>dfdeNd'ePfd5            Ze>dfdeNd'ePfd6            Ze>dfdeNd'ePfd7            Ze>dfdeNd'ePfd8            Ze>dfdeNd'ePfd9            Ze>dfdeNd'ePfd:            Ze>dfdeNd'ePfd;            Ze>dfdeNd'ePfd<            Ze>dfdeNd'ePfd=            Ze>dfdeNd'ePfd>            Ze>dfdeNd'ePfd?            Ze>dfdeNd'ePfd@            Ze>dfdeNd'ePfdA            Ze>dfdeNd'ePfdB            Ze>dfdeNd'ePfdC            Ze>dfdeNd'ePfdD            Ze>dfdeNd'ePfdE            Ze>dfdeNd'ePfdF            Ze>dfdeNd'ePfdG            Ze>dfdeNd'ePfdH            Ze>dfdeNd'ePfdI            Ze>dfdeNd'ePfdJ            Z e?eBi dKddLddMddNddOddPddQddRddSddTdUdVdWdXddYddZdd[dd\dd]dddd
d^           dS (g  a   Vision Transformer (ViT) in PyTorch

A PyTorch implement of Vision Transformers as described in:

'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale'
    - https://arxiv.org/abs/2010.11929

`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers`
    - https://arxiv.org/abs/2106.10270

`FlexiViT: One Model for All Patch Sizes`
    - https://arxiv.org/abs/2212.08013

The official jax code is released and available at
  * https://github.com/google-research/vision_transformer
  * https://github.com/google-research/big_vision

Acknowledgments:
  * The paper authors for releasing code and weights, thanks!
  * I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch
  * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
  * Bert reference code checks against Huggingface Transformers and Tensorflow Bert

Hacked together by / Copyright 2020, Ross Wightman
    N)OrderedDict)partial)	AnyCallableDictOptionalSetTupleTypeUnionList)Literal)Final)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)
PatchEmbedMlpDropPathAttentionPoolLatentRmsNormPatchDropoutSwiGLUPackedtrunc_normal_lecun_normal_resample_patch_embedresample_abs_pos_embeduse_fused_attnget_act_layerget_norm_layer	LayerType   )build_model_with_cfg)feature_take_indices)named_applycheckpoint_seqadapt_input_conv)generate_default_cfgsregister_modelregister_model_deprecationsVisionTransformerc                        e Zd ZU ee         ed<   dddddej        fdedededed	e	d
e	dej
        ddf fdZdej        dej        fdZ xZS )	Attention
fused_attn   F        dim	num_headsqkv_biasqk_norm	attn_drop	proj_drop
norm_layerreturnNc                 R   t                                                       ||z  dk    s
J d            || _        ||z  | _        | j        dz  | _        t                      | _        t          j        ||dz  |          | _	        |r || j                  nt          j
                    | _        |r || j                  nt          j
                    | _        t          j        |          | _        t          j        ||          | _        t          j        |          | _        d S )Nr   $dim should be divisible by num_heads         bias)super__init__r5   head_dimscaler!   r1   nnLinearqkvIdentityq_normk_normDropoutr8   projr9   )	selfr4   r5   r6   r7   r8   r9   r:   	__class__s	           Z/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/vision_transformer.pyrC   zAttention.__init__=   s    	Y!###%K###"y(]d*
(**9S#'9993:Mjj///3:Mjj///I..Ic3''	I..    xc                    |j         \  }}}|                     |                              ||d| j        | j                                      ddddd          }|                    d          \  }}}|                     |          |                     |          }}| j	        r,t          j        |||| j        r| j        j        nd          }nS|| j        z  }||                    dd	          z  }	|	                    d	
          }	|                     |	          }	|	|z  }|                    dd                              |||          }|                     |          }|                     |          }|S )Nr?      r   r%      r3   	dropout_pr4   )shaperH   reshaper5   rD   permuteunbindrJ   rK   r1   Fscaled_dot_product_attentiontrainingr8   prE   	transposesoftmaxrM   r9   )
rN   rR   BNCrH   qkvattns
             rP   forwardzAttention.forwardU   s_   '1ahhqkk!!!Q4>4=IIQQRSUVXY[\^_``**Q--1a{{1~~t{{1~~1? 
	.1a.2mC$.**  AA
 DJAq{{2r***D<<B<''D>>$''DqAKK1%%aA..IIaLLNN1rQ   )__name__
__module____qualname__r   bool__annotations__rF   	LayerNormintfloatModulerC   torchTensorrl   __classcell__rO   s   @rP   r0   r0   :   s         d
 "!!!$&L/ // / 	/
 / / / 	/ 
/ / / / / /0 %,        rQ   r0   c            	       Z     e Zd Z	 	 ddedededdf fdZd	ej        dej        fd
Z	 xZ
S )
LayerScaleh㈵>Fr4   init_valuesinplacer;   Nc                     t                                                       || _        t          j        |t          j        |          z            | _        d S N)rB   rC   r~   rF   	Parameterrv   onesgamma)rN   r4   r}   r~   rO   s       rP   rC   zLayerScale.__init__n   sD     	\+
3"?@@


rQ   rR   c                 X    | j         r|                    | j                  n	|| j        z  S r   )r~   mul_r   rN   rR   s     rP   rl   zLayerScale.forwardx   s(    %)\Eqvvdj!!!q4:~ErQ   )r|   F)rm   rn   ro   rs   rt   rp   rC   rv   rw   rl   rx   ry   s   @rP   r{   r{   m   s         "&!	A AA A 	A
 
A A A A A AF F%, F F F F F F F FrQ   r{   c                        e Zd Zdddddddej        ej        ef
dededede	d	e	d
edede
e         dedej        dej        dej        ddf fdZdej        dej        fdZ xZS )Block      @Fr3   Nr4   r5   	mlp_ratior6   r7   r9   r8   r}   	drop_path	act_layerr:   	mlp_layerr;   c           	      f   t                                                        ||          | _        t          |||||||          | _        |rt          ||          nt          j                    | _        |	dk    rt          |	          nt          j                    | _
         ||          | _         ||t          ||z            |
|          | _        |rt          ||          nt          j                    | _        |	dk    rt          |	          nt          j                    | _        d S )Nr5   r6   r7   r8   r9   r:   r}   r3   in_featureshidden_featuresr   drop)rB   rC   norm1r0   rk   r{   rF   rI   ls1r   
drop_path1norm2rs   mlpls2
drop_path2rN   r4   r5   r   r6   r7   r9   r8   r}   r   r   r:   r   rO   s                rP   rC   zBlock.__init__}   s4    	Z__
!
 
 
	 @K]:c{;;;;PRP[P]P]1:R(9---R[]]Z__
9i00	
 
 
 @K]:c{;;;;PRP[P]P]1:R(9---R[]]rQ   rR   c           
      J   ||                      |                     |                     |                     |                                        z   }||                     |                     |                     |                     |                                        z   }|S r   )r   r   rk   r   r   r   r   r   r   s     rP   rl   zBlock.forward   sw    4::a==)A)A B BCCC$**Q--)@)@ A ABBBrQ   )rm   rn   ro   rF   GELUrr   r   rs   rt   rp   r   ru   rC   rv   rw   rl   rx   ry   s   @rP   r   r   |   s+       
  ""!!!+/!#%7$&L#&%S %S%S %S 	%S
 %S %S %S %S "%%S %S y%S 	%S y%S 
%S %S %S %S %S %SN %,        rQ   r   c                        e Zd Zdddddddej        ej        ef
dededede	d	e	d
edede
e         dedej        dej        dej        ddf fdZddZdej        dej        fdZ xZS )ResPostBlockr   Fr3   Nr4   r5   r   r6   r7   r9   r8   r}   r   r   r:   r   r;   c           	         t                                                       || _        t          |||||||          | _         ||          | _        |	dk    rt          |	          nt          j                    | _	         ||t          ||z            |
|          | _         ||          | _        |	dk    rt          |	          nt          j                    | _        |                                  d S )Nr   r3   r   )rB   rC   r}   r0   rk   r   r   rF   rI   r   rs   r   r   r   init_weightsr   s                rP   rC   zResPostBlock.__init__   s     	&!
 
 
	  Z__
1:R(9---R[]]9i00	
 
 
  Z__
1:R(9---R[]]rQ   c                     | j         `t          j                            | j        j        | j                    t          j                            | j        j        | j                    d S d S r   )r}   rF   init	constant_r   weightr   rN   s    rP   r   zResPostBlock.init_weights   sY    'Gdj/1ABBBGdj/1ABBBBB ('rQ   rR   c                     ||                      |                     |                     |                              z   }||                     |                     |                     |                              z   }|S r   )r   r   rk   r   r   r   r   s     rP   rl   zResPostBlock.forward   s_    

499Q<< 8 8999

488A;; 7 7888rQ   )r;   N)rm   rn   ro   rF   r   rr   r   rs   rt   rp   r   ru   rC   r   rv   rw   rl   rx   ry   s   @rP   r   r      s/       
  ""!!!+/!#%7$&L#&' '' ' 	'
 ' ' ' ' "%' ' y' 	' y' 
' ' ' ' ' 'RC C C C %,        rQ   r   c                        e Zd ZU dZee         ed<   dddddddej        ej	        df
de
de
d	ed
ededededee         dedej        dej        deej                 ddf fdZdej        dej        fdZ xZS )ParallelScalingBlockz Parallel ViT block (MLP & Attention in parallel)
    Based on:
      'Scaling Vision Transformers to 22 Billion Parameters` - https://arxiv.org/abs/2302.05442
    r1   r   Fr3   Nr4   r5   r   r6   r7   r9   r8   r}   r   r   r:   r   r;   c                    t                                                       ||z  dk    s
J d            || _        ||z  | _        | j        dz  | _        t                      | _        t          ||z            }|d|z  z   } ||          | _        t          j
        |||          | _        |g|gdz  z   | _        |r-|                     dd            |                     dd            nX|                     dt          j        d|z            d	           t          j        t          j        |                    | _        |r || j                  nt          j                    | _        |r || j                  nt          j                    | _        t          j        |          | _        t          j
        ||          | _        t          j        |          | _         |
            | _        t          j
        ||          | _        |t9          ||
          nt          j                    | _        |	dk    rt=          |	          nt          j                    | _        d S )Nr   r=   r>   r?   r@   r6   mlp_biasF)
persistentr   r3   ) rB   rC   r5   rD   rE   r!   r1   rs   in_normrF   rG   in_projin_splitregister_bufferregister_parameterrv   zerosr   r   rI   rJ   rK   rL   r8   attn_out_projmlp_dropmlp_actmlp_out_projr{   lsr   r   )rN   r4   r5   r   r6   r7   r9   r8   r}   r   r   r:   r   mlp_hidden_dimin_proj_out_dimrO   s                  rP   rC   zParallelScalingBlock.__init__   s)    	Y!###%K###"y(]d*
(**Y_--(1s72!z#yoHEEE'(C5194 	F  T222##J5555  U[S-A-Ae TTTL^)D)DEEDM3:Mjj///3:Mjj///I..YsC00
9-- y{{Inc::>I>U*Sk::::[][f[h[h09B),,,BKMMrQ   rR   c                 8   |j         \  }}}|                     |          }| j        ?t          j        || j        j        t          j        | j	        | j        f                    }n|                     |          }t          j
        || j        d          \  }}}}	|                     |                    ||| j        | j                                                dd          }|                     |                    ||| j        | j                                                dd          }|	                    ||| j        | j                                      dd          }	| j        r,t          j        |||	| j        r| j        j        nd          }
nS|| j        z  }||                    dd          z  }|                    d          }|                     |          }||	z  }
|
                    dd                              |||          }
|                     |
          }
|                     |          }|                     |          }|                     |          }|                     |                     |
|z                       }||z   }|S )NrY   rZ   r%   rT   r3   rV   rX   ) r[   r   r   r_   linearr   r   rv   catr6   splitr   rJ   viewr5   rD   rc   rK   r1   r`   ra   r8   rb   rE   rd   r\   r   r   r   r   r   r   )rN   rR   re   rf   rg   yx_mlprh   ri   rj   x_attnrk   s               rP   rl   zParallelScalingBlock.forward  sU   '1a LLOO=$ DL/DM4=;Y1Z1Z[[AAQAQ2>>>q!Q KKq!T^T]CCDDNNqRSTTKKq!T^T]CCDDNNqRSTTFF1a77AA!QGG? 
	31a.2mC$.**  FF
 DJAq{{2r***D<<B<''D>>$''DAXF!!!Q''//1a88##F++ U##e$$!!%(( NN4776E>2233ErQ   )rm   rn   ro   __doc__r   rp   rq   rF   r   rr   rs   rt   r   ru   rC   rv   rw   rl   rx   ry   s   @rP   r   r      sN          d  ""!!!+/!#%7$&L-1,R ,R,R ,R 	,R
 ,R ,R ,R ,R "%,R ,R y,R 	,R  	*,R 
,R ,R ,R ,R ,R ,R\' '%, ' ' ' ' ' ' ' 'rQ   r   c                   B    e Zd ZdZddddddddej        ej        efdeded	ed
e	de
de
dee	         de	de	de	dej        dej        dej        ddf fdZdej        dej        fdZej        j        dej        dej        fd            Zdej        dej        fdZ xZS )ParallelThingsBlockz Parallel ViT block (N parallel attention followed by N parallel MLP)
    Based on:
      `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    rT   r   FNr3   r4   r5   num_parallelr   r6   r7   r}   r9   r8   r   r   r:   r   r;   c                    t                                                       || _        t          j                    | _        t          j                    | _        t          |          D ]d}| j                            t          j	        t          d ||          fdt          |||||	||          fd|rt          ||          nt          j                    fd|
dk    rt          |
          nt          j                    fg                               | j                            t          j	        t          d ||          fd ||t          ||z            ||	          fd|rt          ||          nt          j                    fd|
dk    rt          |
          nt          j                    fg                               fd S )
Nnormrk   r   r   r   r   r3   r   )r   r   r   )rB   rC   r   rF   
ModuleListattnsffnsrangeappend
Sequentialr   r0   r{   rI   r   rs   )rN   r4   r5   r   r   r6   r7   r}   r9   r8   r   r   r:   r   _rO   s                  rP   rC   zParallelThingsBlock.__init__D  s     	(]__
MOO	|$$ 	 	AJbmKC)'%#'')    ;az#;????TVT_TaTabY^^hy111W9 - -      IR];C)		$'i$8$8'"	    ;az#;????TVT_TaTabY^^hy111W
8 
, 
, 
 
 
 
 
 
	 	rQ   rR   c                     t          j        fd| j        D                                           d          z   t          j        fd| j        D                                           d          z   S )Nc                 &    g | ]} |          S  r   .0rk   rR   s     rP   
<listcomp>z4ParallelThingsBlock._forward_jit.<locals>.<listcomp>t  s!    <<<TT!WW<<<rQ   r   rZ   c                 &    g | ]} |          S r   r   r   ffnrR   s     rP   r   z4ParallelThingsBlock._forward_jit.<locals>.<listcomp>u  s!    999SSVV999rQ   )rv   stackr   sumr   r   s    `rP   _forward_jitz ParallelThingsBlock._forward_jits  s    <<<<<<<==AAaAHHH9999ty999::>>1>EEErQ   c                     t          fd| j        D                       z   t          fd| j        D                       z   S )Nc              3   .   K   | ]} |          V  d S r   r   r   s     rP   	<genexpr>z/ParallelThingsBlock._forward.<locals>.<genexpr>z  s+      33DDGG333333rQ   c              3   .   K   | ]} |          V  d S r   r   r   s     rP   r   z/ParallelThingsBlock._forward.<locals>.<genexpr>{  s+      00sCCFF000000rQ   )r   r   r   r   s    `rP   _forwardzParallelThingsBlock._forwardx  sZ    3333
3333330000di000000rQ   c                     t           j                                        st           j                                        r|                     |          S |                     |          S r   )rv   jitis_scripting
is_tracingr   r   r   s     rP   rl   zParallelThingsBlock.forward~  sS    9!!## 	$uy';';'='= 	$$$Q'''==###rQ   )rm   rn   ro   r   rF   r   rr   r   rs   rt   rp   r   ru   rC   rv   rw   r   r   ignorer   rl   rx   ry   s   @rP   r   r   ?  s         !"!"!+/!!!#%7$&L#&- -- - 	-
 - - - "%- - - - y- 	- y- 
- - - - - -^el u|    
 Y%, 5<    
$ $%, $ $ $ $ $ $ $ $rQ   r   tokenFrR   	pool_typenum_prefix_tokensreduce_include_prefixc                 X   |s| S |dk    r| d d df         } n|r| n| d d |d f         } |dk    r|                      d          } nc|dk    r1d|                     d          |                      d          z   z  } n,|dk    r|                     d          } n|rJ d	|             | S )
Nr   r   avgr%   rZ   avgmaxg      ?maxzUnknown pool type )meanamax)rR   r   r   r   s       rP   global_pool_nlcr     s      GaaadG&DAAAaaa1B1C1C.C,D1AA(""qvv!v}}qvv!v}}45AA%1AA BB"By"B"BBBBHrQ   c            G           e Zd ZU dZee         ed<   ddddddd	d	d
ddddddddddddddddddddeddee	f"de
eeeef         f         de
eeeef         f         dededed         dededededededee         ded ed!ed"ed#ed$ed%ee         ded&ed'ed(ed)ed*ed+ed,ed-ed.         d/ed0ed1ee         d2ee         d3eej                 d4eej                 d5dfF fd6Zd7 Zd^d8ed5dfd9Zd:ej        d5dfd;Zej                                        d^d<ed=ed5dfd>            Zej        j        d5efd?            Zej        j        d_d@ed5e fdA            Z!ej        j        d`dBed5dfdC            Z"ej        j        d5ej        fdD            Z#dadedee         fdEZ$	 	 dbdeeeef                  deeeef                  fdFZ%dGej&        d5ej&        fdHZ'	 	 	 	 	 	 dcdGej&        dJee
ee(e         f                  dKedLedMedNedOed5e
e(ej&                 eej&        e(ej&                 f         f         fdPZ)	 	 	 dddJe
ee(e         f         dRedSefdTZ*	 	 	 	 dedGej&        dUe
ee(e         ee         f         dVedKedLed5e(ej&                 fdWZ+dGej&        d5ej&        fdXZ,dadGej&        dYee         d5ej&        fdZZ-d_dGej&        d[ed5ej&        fd\Z.dGej&        d5ej&        fd]Z/ xZ0S )fr.   z Vision Transformer

    A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`
        - https://arxiv.org/abs/2010.11929
    dynamic_img_size      r?     r         r   TFNlearnr   r3    img_size
patch_sizein_chansnum_classesglobal_poolr   r   r   r   r   map	embed_dimdepthr5   r   r6   r7   r}   class_token	pos_embedno_embed_class
reg_tokenspre_norm
final_normfc_normdynamic_img_pad	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)skipjaxjax_nlhbmocor   fix_initembed_layerr:   r   block_fnr   r;   c#                   	
 !"'( t                                                       |dv sJ |s|dk    sJ |dv sJ ||dv n|}#t                    pt          t          j        d          t                     pt          j         || _        || _	        x| _
        x| _        | _        |rdnd	| _        | xj        |z  c_        || _        || _        || _        || _        d
| _        i }$|r$|$                    t)          d
d                      |d|||| |d|$| _        | j        j        }%t/          | j        d          r| j                                        n|(|r(t	          j        t5          j        dd                    nd| _        |r(t	          j        t5          j        d|                    nd| _        |r|%n	|%| j        z   }&|r|dk    rd| _        n0t	          j        t5          j        d|&          dz            | _        t	          j         |          | _!        |d	k    rtE          || j                  | _#        nt	          j$                    | _#        |r           nt	          j$                    | _%        d t5          j&        d	||          D             't	          j'         !'"	
fdtQ          |          D              | _)        (fdtQ          |          D             | _*        |r|#s           nt	          j$                    | _+        |dk    rtY          | j        	          | _-        nd| _-        |r|#r           nt	          j$                    | _.        t	          j         |          | _/        |d	k    rt	          j0        | j        |          nt	          j$                    | _1        |dk    r| 2                    |           |r| 3                                 dS dS )a  
        Args:
            img_size: Input image size.
            patch_size: Patch size.
            in_chans: Number of image input channels.
            num_classes: Number of classes for classification head.
            global_pool: Type of global pooling for final sequence (default: 'token').
            embed_dim: Transformer embedding dimension.
            depth: Depth of transformer.
            num_heads: Number of attention heads.
            mlp_ratio: Ratio of mlp hidden dim to embedding dim.
            qkv_bias: Enable bias for qkv projections if True.
            init_values: Layer-scale init values (layer-scale enabled if not None).
            class_token: Use class token.
            no_embed_class: Don't include position embeddings for class (or reg) tokens.
            reg_tokens: Number of register tokens.
            pre_norm: Enable norm after embeddings, before transformer blocks (standard in CLIP ViT).
            final_norm: Enable norm after transformer blocks, before head (standard in most ViT).
            fc_norm: Move final norm after pool (instead of before), if None, enabled when global_pool == 'avg'.
            drop_rate: Head dropout rate.
            pos_drop_rate: Position embedding dropout rate.
            attn_drop_rate: Attention dropout rate.
            drop_path_rate: Stochastic depth rate.
            weight_init: Weight initialization scheme.
            fix_init: Apply weight initialization fix (scaling w/ layer index).
            embed_layer: Patch embedding layer.
            norm_layer: Normalization layer.
            act_layer: MLP activation layer.
            block_fn: Transformer block layer.
        r   r   )r   noner   N)r   r   r   ư>)epsr%   r   FNHWC)strict_img_size
output_fmt)r   r   r   r  rA   r
  
feat_ratior  {Gz?)rb   r   c                 6    g | ]}|                                 S r   )item)r   rR   s     rP   r   z.VisionTransformer.__init__.<locals>.<listcomp>  s     JJJAqvvxxJJJrQ   c                 J    g | ]} 	|         
            S ))r4   r5   r   r6   r7   r}   r9   r8   r   r:   r   r   r   )r   ir   r  r  dprr  r}   r   r   r:   r5   r  r7   r6   s     rP   r   z.VisionTransformer.__init__.<locals>.<listcomp>  sd     &# &# &#  H##!'((a&%##  &# &# &#rQ   c                 :    g | ]}t          d |           S )blocks.)modulenum_chs	reductiondict)r   r&  r  r,  s     rP   r   z.VisionTransformer.__init__.<locals>.<listcomp>.  sA     f f fSTD!yINNNf f frQ   r   )r5   r   r:   r  r   )4rB   rC   r#   r   rF   rr   r"   r   r   r   num_featureshead_hidden_sizer  r   num_reg_tokenshas_class_tokenr  r   grad_checkpointingupdater.  patch_embednum_patcheshasattrr   r   rv   r   	cls_token	reg_tokenr  randnrL   pos_dropr   
patch_droprI   norm_prelinspacer   r   blocksfeature_infor   r   	attn_poolr	  	head_droprG   headr   fix_init_weight)*rN   r   r   r   r   r   r  r  r5   r   r6   r7   r}   r  r  r  r  r  r  r	  r   r
  r  r  r  r  r  r  r  r  r  r:   r   r  r   use_fc_norm
embed_argsr6  	embed_lenr'  r,  rO   s*         ` `````            ``    ````    @@rP   rC   zVisionTransformer.__init__  s   F 	JJJJJ4kW444411111AHk%===V]#J//R72<T3R3R3R
!),,7	&&ENNND1DN&1!8q*,(*, 0"'
 	Nd5VLLLMMM&; 
!+
 
 
 
 &25<T=M|5\5\lD$//111bl	GR\ek!Q	&B&BCCCX\PZdek!Z&K&KLLL`d#1[KK{TE[7[	 	VI//!DNN\%+aI*N*NQT*TUUDN
]333Q*"&"8  DOO
 !kmmDO19L

9---r{}}JJ>5!I!IJJJm &# &# &# &# &# &# &# &# &# &# &# &# &# &# &# &# 5\\&# &# &# $ f f f f fX]^cXdXdf f f-7^^JJy)))QSQ\Q^Q^	 %0##%	  DNN "DN0:]{]zz),,,PRP[P]P]I..>IAooBIdnk:::SUS^S`S`	&  k*** 	#  """""	# 	#rQ   c                     d }t          | j                  D ]K\  }} ||j        j        j        j        |dz               ||j        j        j        j        |dz              Ld S )Nc                 Z    |                      t          j        d|z                       d S )Ng       @)div_mathsqrt)param	_layer_ids     rP   rescalez2VisionTransformer.fix_init_weight.<locals>.rescaleF  s(    JJtyy1122222rQ   r%   )	enumerater?  rk   rM   r   datar   fc2)rN   rO  layer_idlayers       rP   rD  z!VisionTransformer.fix_init_weightE  s}    	3 	3 	3  )55 	= 	=OHeGEJO*/A>>>GEIM(-x!|<<<<	= 	=rQ   modec                 |   |dv sJ d|v rt          j        | j                   nd}| j        t	          | j        d           | j        &t          j                            | j        d           | j	        &t          j                            | j	        d           t          t          ||          |            d S )N)r  r  r  r   nlhbr3   r!  stdr  )rK  logr   r  r   r8  rF   r   normal_r9  r(   get_init_weights_vit)rN   rU  	head_biass      rP   r   zVisionTransformer.init_weightsM  s    6666639T>>TXd.////r	>%$.c2222>%GOODNO555>%GOODNO555(y994@@@@@rQ   mc                 $    t          |           d S r   )init_weights_vit_timm)rN   r^  s     rP   _init_weightszVisionTransformer._init_weightsX  s    a     rQ   checkpoint_pathprefixc                 (    t          | ||           d S r   )_load_weights)rN   rb  rc  s      rP   load_pretrainedz!VisionTransformer.load_pretrained\  s    dOV44444rQ   c                 
    h dS )N>   r8  r  
dist_tokenr   r   s    rP   no_weight_decayz!VisionTransformer.no_weight_decay`  s    7777rQ   coarsec                 (    t          dddg          S )Nz ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr?  r-  )rN   rj  s     rP   group_matcherzVisionTransformer.group_matcherd  s%    4-/CD
 
 
 	
rQ   enablec                 v    || _         t          | j        d          r| j                            |           d S d S )Nset_grad_checkpointing)r3  r7  r5  rp  )rN   rn  s     rP   rp  z(VisionTransformer.set_grad_checkpointingk  sG    "(4#%=>> 	<33F;;;;;	< 	<rQ   c                     | j         S r   )rC  r   s    rP   get_classifierz VisionTransformer.get_classifierq  s
    yrQ   c                     || _         |8|dv sJ |dk    r| j        
J d            |dk    r| j        d | _        || _        |dk    rt          j        | j        |          nt          j                    | _        d S )Nr   r   Fz=Cannot currently add attention pooling in reset_classifier().r   )r   rA  r   rF   rG   r  rI   rC  )rN   r   r   s      rP   reset_classifierz"VisionTransformer.reset_classifieru  s    &""NNNNNe##(>]]]]]%%$.*D!%*D>IAooBIdnk:::SUS^S`S`			rQ   c           	      J   | j         j        }| j                             ||           | j        q| j        rdn| j        }| j         j        |z   }|| j        j        d         k    r>t          j	        t          | j        | j         j        ||d                    | _        dS dS dS )zMethod updates the input image resolution, patch size

        Args:
            img_size: New input resolution, if None current resolution is used
            patch_size: New patch size, if None existing patch size is used
        )r   r   Nr   r%   T)new_sizeold_sizer   verbose)r5  	grid_sizeset_input_sizer  r  r   r6  r[   rF   r   r    )rN   r   r   prev_grid_sizer   num_new_tokenss         rP   rz  z VisionTransformer.set_input_size  s     )3''j'QQQ>%%)%8 Td>T!-9<MMN!5a!888!#.DN!-7+&7 / / / " "	 &% 98rQ   rR   c                    | j         -|                    |j        d         d|j        d                   S | j        rK|j        \  }}}}t	          | j         ||f| j        rdn| j                  }|                    |d|          }n| j         }g }| j        :|                    | j        	                    |j        d         dd                     | j
        :|                    | j
        	                    |j        d         dd                     | j        r"||z   }|rt          j        ||gz   d          }n!|rt          j        ||gz   d          }||z   }|                     |          S )Nr   rY   r"  r%   rZ   )r  r   r[   r   r    r  r   r8  r   expandr9  rv   r   r;  )rN   rR   re   HWrg   r  to_cats           rP   
_pos_embedzVisionTransformer._pos_embed  s}   >!66!'!*b!'"+666  		'JAq!Q.A'+':"V!!@V  I
 q"a  AAI>%MM$.//
BCCDDD>%MM$.//
BCCDDD 	 IA 3Ifsl222  3Ifsl222IA}}QrQ   NCHWindicesreturn_prefix_tokensr   
stop_earlyr  intermediates_onlyc                     |dv s
J d            |dk    }g }	t          t           j                  |          \  }
}|j        \  }}}                     |          }                     |          }                     |          }                     |          }t          j	        
                                s|s j        }n j        d|dz            }t          |          D ]@\  }} ||          }||
v r,|	                    |r                     |          n|           A j        r fd|	D             } fd|	D             }	|r/ j                            ||f          \  fd|	D             }	t          j	        
                                s|rt!          t#          |	|                    }	|r|	S                      |          }||	fS )	a=   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            return_prefix_tokens: Return both prefix and spatial intermediate tokens
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  NLCz)Output format must be one of NCHW or NLC.r  Nr%   c                 6    g | ]}|d d dj         f         S )Nr   r"  r   r   rN   s     rP   r   z;VisionTransformer.forward_intermediates.<locals>.<listcomp>  s/    SSSQqqq!D$:"::;SSSrQ   c                 6    g | ]}|d d j         d f         S r   r"  r  s     rP   r   z;VisionTransformer.forward_intermediates.<locals>.<listcomp>  s0    RRRqQqqq$"8"9"99:RRRrQ   c                     g | ]B}|                     d                               dddd                                          CS )rY   r   r?   r%   rT   )r\   r]   
contiguous)r   r   re   r  r  s     rP   r   z;VisionTransformer.forward_intermediates.<locals>.<listcomp>  sL    lllYZQYYq!Q33;;Aq!QGGRRTTlllrQ   )r'   lenr?  r[   r5  r  r<  r=  rv   r   r   rP  r   r   r   dynamic_feat_sizelistzip)rN   rR   r  r  r   r  r  r  r\   intermediatestake_indices	max_indexr   heightwidthr?  r&  blkprefix_tokensre   r  r  s   `                  @@@rP   forward_intermediatesz'VisionTransformer.forward_intermediates  s$   . _,,,.Y,,,&"6s4;7G7G"Q"Qi  g1feQOOAOOAMM!9!!## 	1: 	1[FF[)a-0F'' 	B 	BFAsAAL  $$T%@TYYq\\\qAAA ! 	SSSSS]SSSMRRRRMRRRM 	m#55vuoFFDAqllllll^klllMy%%'' 	D,@ 	D ]M!B!BCCM 	!  IIaLL-rQ   r%   
prune_norm
prune_headc                    t          t          | j                  |          \  }}| j        d|dz            | _        |rt          j                    | _        |r.t          j                    | _        |                     dd           |S )z@ Prune layers not required for specified intermediates.
        Nr%   r   r   )r'   r  r?  rF   rI   r   r	  rt  )rN   r  r  r  r  r  s         rP   prune_intermediate_layersz+VisionTransformer.prune_intermediate_layers  s~     #7s4;7G7G"Q"Qik.9q=.1 	&DI 	);==DL!!!R(((rQ   nr\   c                 @    |                      |||||rdndd          S )z Intermediate layer accessor inspired by DINO / DINOv2 interface.
        NOTE: This API is for backwards compat, favour using forward_intermediates() directly.
        r  r  T)r  r   r  r  )r  )rN   rR   r  r\   r  r   s         rP   get_intermediate_layersz)VisionTransformer.get_intermediate_layers  s;     ))q!5!(3vve# * 
 
 	
rQ   c                 x   |                      |          }|                     |          }|                     |          }|                     |          }| j        r4t
          j                                        st          | j	        |          }n| 	                    |          }| 
                    |          }|S r   )r5  r  r<  r=  r3  rv   r   r   r)   r?  r   r   s     rP   forward_featuresz"VisionTransformer.forward_features"  s    QOOAOOAMM!" 	59+A+A+C+C 	t{A..AAAAIIaLLrQ   r   c                     | j         |                      |          }|S || j        n|}t          ||| j                  }|S )N)r   r   )rA  r   r   r   )rN   rR   r   s      rP   poolzVisionTransformer.pool.  sN    >%q!!AH(1(9D$$y	AdF\]]]rQ   
pre_logitsc                     |                      |          }|                     |          }|                     |          }|r|n|                     |          S r   )r  r	  rB  rC  )rN   rR   r  s      rP   forward_headzVisionTransformer.forward_head6  sJ    IIaLLLLOONN10qqDIIaLL0rQ   c                 Z    |                      |          }|                     |          }|S r   )r  r  r   s     rP   rl   zVisionTransformer.forward<  s-    !!!$$a  rQ   r   F)Tr   )NN)NFFFr  F)r%   FT)r%   FFF)1rm   rn   ro   r   r   rp   rq   r   r   r   r   rs   r
   r   rt   r   strr   r$   r   rF   ru   rC   rD  r   ra  rv   r   r   rf  r	   ri  r   rm  rp  rr  rt  rz  rw   r  r   r  r  r  r  r  r  rl   rx   ry   s   @rP   r.   r.     s         
 Dk!!! 5868#OV !!!+/ $$#("#&*%*$)!#%%'$&$&$&JL"$..2-1(-),G]# ]#CsCx01]# c5c?23]# 	]#
 ]# !!KL]# ]# ]# ]# ]# ]# ]# "%]# ]# ]#  !!]#" #]#$ %]#& ']#( d^)]#* #+]#, "-]#. /]#0 !1]#2 #3]#4 "5]#6 "7]#8 "9]#: !!FG;]#< =]#> "?]#@ !+A]#B  	*C]#D 29oE]#F BIG]#H 
I]# ]# ]# ]# ]# ]#~= = =	A 	A 	Ad 	A 	A 	A 	A!ry !T ! ! ! ! Y5 5s 5C 5 5 5 5 5 Y8 8 8 8 8 Y
 
D 
T 
 
 
 
 Y< <T <T < < < <
 Y	    	a 	aC 	ahsm 	a 	a 	a 	a 3748 uS#X/ !sCx1   2" EL " U\ "  "  "  " N 8<).$$',?  ? |?  eCcN34?  #'	? 
 ?  ?  ?  !%?  
tEL!5tEL7I)I#JJ	K?  ?  ?  ? F ./$#	 3S	>*  	   ( 45!).
 
|
 S$s)U3Z/0
 	

 #'
 
 
el	
 
 
 
&
%, 
5< 
 
 
 
 el x}     1 1el 1 1 1 1 1 1 %,        rQ   r   r*  namer;   c                    t          | t          j                  rEt          | j        d           | j        &t          j                            | j                   dS dS t          | d          r| 	                                 dS dS )zE ViT weight initialization, original timm impl (for reproducibility) r!  rX  Nr   )

isinstancerF   rG   r   r   rA   r   zeros_r7  r   )r*  r  s     rP   r`  r`  B  s    &")$$ fm----;"GNN6;''''' #"		(	(  rQ   r3   r]  c                    t          | t          j                  r|                    d          rKt          j                            | j                   t          j                            | j        |           dS t          j        	                    | j                   | j        Pd|v r&t          j        
                    | j        d          n#t          j                            | j                   dS dS t          | t          j                  rCt          | j                   | j        &t          j                            | j                   dS dS t          | d          r|                                  dS dS )z5 ViT weight initialization, matching JAX (Flax) impl rC  Nr   r  rX  r   )r  rF   rG   
startswithr   r  r   r   rA   xavier_uniform_r[  Conv2dr   r7  r   )r*  r  r]  s      rP   init_weights_vit_jaxr  L  sH   &")$$ ??6"" 	iGNN6=)))Gfk955555G##FM222{&:?4--666RW^^\b\gMhMh   '&	FBI	&	& fm$$$;"GNN6;''''' #"		(	(  rQ   c                    t          | t          j                  rd|v rrt          j        dt          | j        j        d         dz  | j        j        d         z             z            }t          j        	                    | j        | |           n$t          j        
                    | j                   | j        &t          j                            | j                   dS dS t          | d          r|                                  dS dS )zI ViT weight initialization, matching moco-v3 impl minus fixed PatchEmbed rH   g      @r   r?   r%   Nr   )r  rF   rG   rK  rL  rt   r   r[   r   uniform_r  rA   r  r7  r   )r*  r  vals      rP   init_weights_vit_mocor  ^  s    &")$$ 
D==)Bv}':1'='BV]EXYZE['[!\!\\]]CGV]SD#6666G##FM222;"GNN6;''''' #"		(	(  rQ   r  rU  c                 Z    d| v rt          t          |          S d| v rt          S t          S )Nr  )r]  r  )r   r  r  r`  )rU  r]  s     rP   r\  r\  m  s4    }}+yAAAA	4$$$$rQ   r   bicubicposemb
posemb_newgs_newinterpolation	antialiasc           	          |j         d         |z
  }| j         d         |z
  }t          t          j        |                    gdz  }t	          |          s%t          t          j        |                    gdz  }t          | |||||d          S )z Rescale the grid of position embeddings when loading from state_dict.
    *DEPRECATED* This function is being deprecated in favour of using resample_abs_pos_embed
    r%   rT   T)r   r  r  rx  )r[   rs   rK  rL  r  r    )	r  r  r   r  r  r  ntok_newntok_oldgs_olds	            rP   resize_pos_embedr  v  s     "%66H|A!22H$)H%%&&'!+Fv;; 0di))**+a/!+#   rQ   modelrb  rc  c                    ddl }dLd|                    |          d}d}d}|sdv rd}nd	v rd
}d}ndv rd}d}t          | j        d          r| j        j        }t          |d           }|r|n|j        }	|	j        j                            t          |	j        j        j
        d          | d                                        |	j        j                             | d                              |	j        j                             | d                              |st          |j                  D ]\  }
}t          |j                  D ]\  }}| d|
dz    d|dz    d}t!          d          D ]}t#          |d|dz              j                             | d|dz    d                              t#          |d|dz              j                             | d|dz    d                              t#          |d|dz              j                             | d|dz    d                              |j        |j        j        j                             | d                              |j        j        j                             | d                              |j        j        j                             | d                               | d                    }n<t          | j        j        j        j
        d          | d                              }|j
        d!d         | j        j        j        j
        d!d         k    r0t)          || j        j        j        j
        d!d         ||d"          }| j        j        j                            |           | j        j        j                             | d#                              | j        .| j                             | d$         d%                     |r | d&         d%          }n | d'         d%          }|j
        | j        j
        k    rJ|j
        }t#          | d(d          rdnt#          | d)d          }t/          || j        j        |||d*          }| j                            |           | j        j                             | d+                              | j        j                             | d,                              t3          | j        t6          j                  r| d-v r| j        j        j
        d         | d-         j
        d.         k    rb| j        j                             | d/                              | j        j                             | d-                              | j        | d0}|d1z   | j        j                             | d2         d%                     | j        j        j                            tA          j!        fd3d4D                                  | j        j        j                            tA          j!        fd5d4D                                  | j        j"        j                              d6         d%          #                    d          j$                   | j        j"        j                              d7         d%          %                    d.                     | j        j        j                              d8                   #                    d                     | j        j        j                              d9                              | j        j        j                             | d:                              | j        j        j                             | d;                              t!          d<          D ]}t#          | j        j&        d=|dz              j                             | d>| d                              t#          | j        j&        d=|dz              j                             | d>| d                              |rd?nd@\  }}}t          | j        '                                          D ]\  }
}| dAv r| dB}|
n
| dC|
 d}d|dD| dz   |j(        j                             | d:         E                     |j(        j                             | d;         E                     |j)        j*        j                            tA          j!        fdFdGD                                  |j)        j*        j                            tA          j!        fdHdGD                                  |j)        j        j                              d8         E          #                    d                     |j)        j        j                              d9         E                     |j+        j                             | dI| d         E                     |j+        j                             | dI| d         E                     t!          d<          D ]}t#          |j&        d=|dz              j                             | dJ| dK| d         E                     t#          |j&        d=|dz              j                             | dJ| dK| d         E                     dS )MzV Load weights from .npz checkpoints for official Google Brain Flax implementation
    r   NTc                    || |         } | j         dk    rL| j        d         | j        d         cxk    r| j        d         cxk    rdk    rn n|                                 } |rh| j         dk    r|                     g d          } nE| j         dk    r|                     g d          } n"| j         dk    r|                     ddg          } t	          j        |           S )NrU   r   r%   rT   )r?   rT   r   r%   r?   )rT   r   r%   )ndimr[   flattenrc   rv   
from_numpy)wtidxs      rP   _n2pz_load_weights.<locals>._n2p  s    ?#A6Q;;171:FFFFqwqzFFFFQFFFFF		A 	(v{{KK--1KK			**1KKA''"""rQ   bilinearFzopt/target/embedding/kernelzopt/target/zparams/embedding/kernelzparams/zparams/img/embedding/kernelzparams/img/backbonerl  r%   zconv_root/kernelzgn_root/scalezgn_root/biasblockz/unit/r?   conv/kernelr   gnz/scale/biaszconv_proj/kernelzgn_proj/scalezgn_proj/biaszembedding/kernelrX   r  r  rx  zembedding/biasclsr  pos_embeddingz(Transformer/posembed_input/pos_embeddingr  r   rv  r   r  r  rx  zTransformer/encoder_norm/scalezTransformer/encoder_norm/biasz	head/biasrY   zhead/kernelz
MAPHead_0/zMultiHeadDotProductAttention_0/probec                 p    g | ]2}  | d          d                               d          j        3S )r  Fr  r%   r  Tr   r  r  
mha_prefixr  s     rP   r   z!_load_weights.<locals>.<listcomp>  s_     3` 3` 3`IJDDj,!,,,-777??BBD3` 3` 3`rQ   )keyvaluec                 f    g | ]-}  | d          d                               d          .S )r  Fr  rY   r\   r  s     rP   r   z!_load_weights.<locals>.<listcomp>  s\     1] 1] 1]FGDDj*!***+u555==bAA1] 1] 1]rQ   zquery/kernelz
query/biasz
out/kernelzout/biaszLayerNorm_0/scalezLayerNorm_0/biasrT   fczMlpBlock_0/Dense_)r   r   r%   )r%   r?   rT   z*Transformer/encoderblock/LayerNorm_0/scalezTransformer/encoderblock/zTransformer/encoderblock_MultiHeadDotProductAttention_)r  c                 r    g | ]3}  | d          d                               d          j        4S )r  Fr  r  r%   r  r   r  r  r  r  r  s     rP   r   z!_load_weights.<locals>.<listcomp>  sa     /r /r /rRSDDj,!,,,-C@@@HHKKM/r /r /rrQ   )queryr  r  c                 h    g | ].}  | d          d                               d          /S )r  Fr  rY   r  r  s     rP   r   z!_load_weights.<locals>.<listcomp>  s^     -o -o -oOPDDj*!***+u#>>>FFrJJ-o -o -orQ   
LayerNorm_	MlpBlock_z/Dense_)TN),numpyloadr7  r5  r  rl  r  r   copy_r*   r[   r   rA   rP  stagesr?  r   getattr
downsamplerM   r   r8  r  r    ry  r  rC  rF   rG   rA  latentkvrv   r   rh   r  r  r\   r   childrenr   rk   rH   r   )r  rb  rc  npr  r  
big_visionr  	stem_onlyrl  r&  stagejr  bprembed_conv_wpos_embed_w	old_shaper   block_prefixmha_subb_subln1_subr  r  r  r  s                           @@@@rP   re  re    s+    # # # # 	  AMIJ (A--"FF&!++FJJ*a//"FJu *-- Z$-&111	$7xx(-	/	0@0Fq0I44PQU[RmRmRmPnKoKoppqqq	ttA&>&>&>$?@@AAA	TT!v$;$;$;"<==>>> 	W%ho66 
W 
W5 )%, 7 7 	W 	WHAu"==Q==QU===B"1XX b b~a!e~~66=CCDDbKdKdVWZ[V[KdKdKdIeDfDfggg~a!e~~66=CCDDbKaKaTUXYTYKaKaKaIbDcDcddd~a!e~~66;AA$$qBI^I^RSVWRWI^I^I^G_B`B`aaaa'3(-4::44RBYBYBY@Z;[;[\\\(-4::44RBVBVBV@W;X;XYYY(-288a2@S@S@S>T9U9UVVV	W tA999:;;'")/2DDf;V;V;V9W4X4XZ Z"##%"3"8"?"Ebcc"JJJ+")/4'
 
 
 
!''555	%%dd1-F-F-F+G&H&HIII"dd1^^^#4>>>??? \d15556%@@@d1PPPQUZ[[[EO111%	!(0@%!H!HtAAgV[]prsNtNt,&0/'
 
 
 
O+&&&	JDDf#L#L#L!MNNOOO	JO$$qF!I!I!IJKKLLL5:ry)) =   A%%JO!!$V*>*>*>(?(Eb(III
Q&'='='=%> ? ?@@@
dd1%9%9%9#:;;<<<
 " ,,,!$FF
$$TT!|,B,B,B*Cu%M%M%MNNN!''	 3` 3` 3` 3` 3` 3`N^3` 3` 3` )a )a 	b 	b 	b%%ei 1] 1] 1] 1] 1] 1]K[1] 1] 1] '^ '^ 	_ 	_ 	_ &&ttA.I.I.I,Je'T'T'T'\'\]^'_'_'abbb$$TT!z,E,E,E*F%%P%P%P%X%XY[%\%\]]]#))$$qJ1J1J1J/K*L*L*T*TUV*W*WXXX!''Q*/F/F/F-G(H(HIII#))$$qL1S1S1S/T*U*UVVV!''Q,/P/P/P-Q(R(RSSSq 	w 	wAEO'a!e66=CCDDlKwKwmnKwKwKwIxDyDyzzzEO'a!e66;AA$$qLIsIsklIsIsIsGtBuBuvvvv+5Dii9GUGel335566 T T5@@@AEE$???LCC$CCqCCCLC!$NG$N$N$NN
  a<(J(J(J&KQT!U!U!UVVVttA&G&G&G$HcRRRSSS
##EI /r /r /r /r /r /r /rWp/r /r /r %s %s 	t 	t 	t
!!%) -o -o -o -o -o -o -oTm-o -o -o #p #p 	q 	q 	q
$$TT!z,E,E,E*FC%P%P%P%X%XYZ%[%[\\\
""44Z*A*A*A(B#L#L#LMMM  a<(R(R7(R(R(R&SY\!]!]!]^^^ttA&O&O&O&O&O$PVYZZZ[[[q 	T 	TAEI|AE||,,399Q,IIIIqIIIJPSTTTV V VEI|AE||,,177Q,GGGGqGGGHcRRRT T T T	T%T TrQ   visual.
state_dictc                    i }g d}|                                  D ]\  }}|                    |          s|                    |d          }|D ]$}|                    |d         |d                   }%|dk    r;d}|                    dd          }t	          j        |j        d                   |d<   nL|dk    r+d	}|                    d                              d          }n|d
k    r|                    d          }|||<   |S )N)
)conv1patch_embed.proj)positional_embeddingr  )ztransformer.resblocks.r)  )ln_prer=  )ln_postr   )ln_r   )in_proj_zqkv.)out_projrM   )zmlp.c_fczmlp.fc1)z
mlp.c_projzmlp.fc2r   r   r%   rM   head.weight	head.biasclass_embeddingr8  r  )itemsr  replacerc   rv   r   r[   	unsqueeze)r  r  rc  out_dictswapsri   rj   sps           rP   _convert_openai_clipr    s)   
 H  E   ""  1||F## 	IIfb!! 	( 	(B		"Q%A''AA;;AAq!!A$)K
$;$;H[!!###AA((++AA+AAOrQ   c                    dd l }i }|                     dd            d| v rg|                     d          |d<   |                     d          | d         d d df         z   |d<   |                     d          d d dd f         |d<   |                                 D ]j\  }}|                    d|          r|||                    d	d
          <   5|                    d|          r|||                    dd          <   e|||<   k|S )Nr   
mask_tokenregister_tokensr9  r8  r  r%   z(blocks\.(\d+)\.mlp\.w12\.(?:weight|bias)w12fc1z'blocks\.(\d+)\.mlp\.w3\.(?:weight|bias)w3rR  )repopr  matchr  )r  r  r"  r  ri   rj   s         rP   _convert_dinov2r%  C  s4    IIIHNN<&&&J&& */@ A A *{ ; ;j>UVWVWVWYZVZ>[ [ *{ ; ;AAAqrrE B  ""  188?CC 	01HQYYue,,-XX@!DD 	/0HQYYtU++,OrQ   Tadapt_layer_scalec           	         ddl }i }|                     d|           } |                     d|           } dd| v rt          | |          } nd| v rt          | |d	          } nd
| v rt          | |          } nod| v r| d         } dn`d| v sd| v rXdd| v rRt	          |j        t          j                  r3| d         |d<   t          j	        | d         j
        d                   |d<   r fd|                                 D             } |                                 D ]=\  }}d|v r|j        j        j        j
        \  }	}
}}t          |j
                  dk     r3|j        j        j        j
        \  }	}
}}|                    |	d||          }|j
        d         |k    s|j
        d         |k    rt#          |||f||d          }n|dk    re|j
        d         |j        j
        d         k    rDt'          |dd          rdnt'          |dd          }t)          ||j        j        |||d          }n$|rd |v r|                    d!d"|          }nd#|v r8|||<   ?|S )$zJ convert patch embedding weight from manual patchify + linear proj to convr   Nr  r  r   zvisual.class_embeddingzmodule.visual.class_embeddingzmodule.visual.)rc  r  encoderzmodule.zvisual.trunk.pos_embedz"visual.trunk.blocks.0.norm1.weightzvisual.trunk.zvisual.head.proj.weightr  r  c                 p    i | ]2\  }}|                               |t                    d          |3S r   )r  r  )r   ri   rj   rc  s      rP   
<dictcomp>z(checkpoint_filter_fn.<locals>.<dictcomp>|  s?    ```TQ1<<X^K_K_`aFoq```rQ   zpatch_embed.proj.weightrU   rY   rX   Tr  r  r%   r  Fr   r  gamma_zgamma_([0-9])z
ls\1.gammar  )r"  getr  r%  r  rC  rF   rG   rv   r   r[   r  r5  rM   r   r  r\   r   r  r  r    ry  sub)r  r  r&  r  r  r"  r  ri   rj   OIr  r  r   rc  s                 @rP   checkpoint_filter_fnr0  Z  s    IIIH44Jj99JF:--)*e<<

	(J	6	6)*eDTUUU

		#	#$Z77

	j	 	 	*
	!Z	/	/3W[e3e3e $
22z%*bi7X7X2&01J&KH]#$)K
;T0U0[\]0^$_$_H[! a````Z5E5E5G5G```
  ""    1$))*/6<JAq!Q17||a".3:@
1aIIaQ**wr{a172;!#3#3(F"/'    +!'!*0Ea0H"H"H%,U4De%L%L xRYZ_atvwRxRx&*4"3+#  AA  	8q=='::AAQOrQ   urlc                 6    | ddd dddt           t          ddd|S )	Nr   )r?   r   r   g?r  Tr  rC  )r1  r   
input_size	pool_sizecrop_pctr  fixed_input_sizer   rY  
first_conv
classifier)r   r   )r1  kwargss     rP   _cfgr:    s<    #" '%(   rQ   z*vit_base_patch16_224.augreg2_in21k_ft_in1kztimm/)	hf_hub_idz*vit_base_patch16_384.augreg2_in21k_ft_in1kz)vit_base_patch8_224.augreg2_in21k_ft_in1kz)vit_tiny_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz)r1  r;  custom_loadz)vit_tiny_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz)r?     r=        ?)r1  r;  r<  r3  r5  z*vit_small_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz*vit_small_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz)vit_base_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz)vit_base_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz(vit_base_patch8_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz'vit_base_patch16_224.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth)r1  r;  z'vit_base_patch16_384.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_384-83fb41ba.pth)r1  r;  r3  r5  z(vit_large_patch32_384.orig_in21k_ft_in1kzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pthz!vit_small_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz!vit_small_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch32_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch32_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzzvit_large_patch14_224.untrained)r1  zvit_huge_patch14_224.untrainedzvit_giant_patch14_224.untrainedz"vit_gigantic_patch14_224.untrainedzvit_base_patch32_224.orig_in21k)r;  r   zvit_base_patch16_224.orig_in21kz vit_large_patch32_224.orig_in21kz vit_large_patch16_224.orig_in21kzvit_huge_patch14_224.orig_in21kz!vit_tiny_patch16_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npziSU  )r1  r;  r<  r   z"vit_small_patch32_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz"vit_small_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch32_224.augreg_in21kzohttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz vit_base_patch8_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz"vit_large_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npzzvit_base_patch32_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_32.npz)r1  r<  r;  zvit_base_patch16_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_16.npzzvit_small_patch16_224.dinoz[https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth)r1  r;  r   rY  r   zvit_small_patch8_224.dinozYhttps://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_pretrain.pthzvit_base_patch16_224.dinozWhttps://dl.fbaipublicfiles.com/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pthzvit_base_patch8_224.dinozUhttps://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pthz vit_small_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pthz
apache-2.0)r?     r?  )r1  r;  licenser   rY  r   r3  r5  zvit_base_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pthz vit_large_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pthz vit_giant_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pthz%vit_small_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_reg4_pretrain.pthz$vit_base_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_reg4_pretrain.pthz%vit_large_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pthz%vit_giant_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_reg4_pretrain.pthzvit_base_patch16_224_miil.in21kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_in21k_miil-887286df.pth)r3   r3   r3   )r>  r>  r>  g      ?r  i+  )r1  r;  r   rY  r5  r  r   z'vit_base_patch16_224_miil.in21k_ft_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_1k_miil_84_4-2deb18e3.pth)r1  r;  r   rY  r5  r  z vit_base_patch16_rpn_224.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_base_patch16_rpn_224-sw-3b07e89d.pthz#vit_medium_patch16_gap_240.sw_in12k)r?      rA  gffffff?i-.  )r;  r3  r5  r   z+vit_medium_patch16_gap_256.sw_in12k_ft_in1k)r?      rB  )r;  r3  r5  z+vit_medium_patch16_gap_384.sw_in12k_ft_in1ksquash)r;  r3  r5  	crop_modevit_base_patch16_gap_224z/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k)r;  r   rY  z/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k)r;  r   rY  r5  r3  z/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k)r?     rF  z/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k)r;  r   rY  r5  z/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k)r;  r   rY  r5  r3  rD  z0vit_large_patch14_clip_224.laion2b_ft_in12k_in1kz0vit_large_patch14_clip_336.laion2b_ft_in12k_in1k)r?   P  rG  z/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1kz/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1kz.vit_base_patch32_clip_224.openai_ft_in12k_in1k)r   rY  z.vit_base_patch32_clip_384.openai_ft_in12k_in1kz.vit_base_patch16_clip_224.openai_ft_in12k_in1kz.vit_base_patch16_clip_384.openai_ft_in12k_in1kz/vit_large_patch14_clip_224.openai_ft_in12k_in1kz/vit_large_patch14_clip_336.openai_ft_in12k_in1kz)vit_base_patch32_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_384.laion2b_ft_in1kz*vit_large_patch14_clip_224.laion2b_ft_in1kz*vit_large_patch14_clip_336.laion2b_ft_in1kz)vit_huge_patch14_clip_224.laion2b_ft_in1kz)vit_huge_patch14_clip_336.laion2b_ft_in1kz(vit_base_patch32_clip_224.openai_ft_in1kz(vit_base_patch16_clip_224.openai_ft_in1kz(vit_base_patch16_clip_384.openai_ft_in1kz)vit_large_patch14_clip_224.openai_ft_in1kz*vit_base_patch32_clip_224.laion2b_ft_in12k)r   rY  r   z*vit_base_patch16_clip_224.laion2b_ft_in12k)r;  r   rY  r   z+vit_large_patch14_clip_224.laion2b_ft_in12k)r;  r   rY  r5  r   z*vit_huge_patch14_clip_224.laion2b_ft_in12kz)vit_base_patch32_clip_224.openai_ft_in12kz)vit_base_patch16_clip_224.openai_ft_in12kz*vit_large_patch14_clip_224.openai_ft_in12kz!vit_base_patch32_clip_224.laion2bz%laion/CLIP-ViT-B-32-laion2B-s34B-b79Kzopen_clip_pytorch_model.bin   )r;  hf_hub_filenamer   rY  r   z!vit_base_patch16_clip_224.laion2bz%laion/CLIP-ViT-B-16-laion2B-s34B-b88K)r;  rI  r   rY  r5  r   z"vit_large_patch14_clip_224.laion2bz%laion/CLIP-ViT-L-14-laion2B-s32B-b82Kr   z!vit_huge_patch14_clip_224.laion2bz%laion/CLIP-ViT-H-14-laion2B-s32B-b79K   z"vit_giant_patch14_clip_224.laion2bz%laion/CLIP-ViT-g-14-laion2B-s12B-b42Kz%vit_gigantic_patch14_clip_224.laion2bz(laion/CLIP-ViT-bigG-14-laion2B-39B-b160k   z$vit_base_patch32_clip_224.datacompxlz)laion/CLIP-ViT-B-32-DataComp.XL-s13B-b90Kz$vit_base_patch32_clip_256.datacompxlz.laion/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K)r;  rI  r   rY  r5  r3  r   z$vit_base_patch16_clip_224.datacompxlz)laion/CLIP-ViT-B-16-DataComp.XL-s13B-b90Kz%vit_large_patch14_clip_224.datacompxlz)laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90Kzvit_base_patch16_clip_224.dfn2bzapple/DFN2B-CLIP-ViT-B-16 vit_large_patch14_clip_224.dfn2bzapple/DFN2B-CLIP-ViT-L-14)zDnatively QuickGELU, use quickgelu model variant for original results)r;  rI  notesr   rY  r5  r   vit_huge_patch14_clip_224.dfn5bzapple/DFN5B-CLIP-ViT-H-14vit_huge_patch14_clip_378.dfn5bzapple/DFN5B-CLIP-ViT-H-14-378)r?   z  rP  )r;  rI  r   rY  rM  r5  r3  r   (vit_base_patch32_clip_224.metaclip_2pt5bz facebook/metaclip-b32-fullcc2.5bzmetaclip_b32_fullcc2.5b.binzcc-by-nc-4.0)r;  rI  r@  rM  r   rY  r5  r   (vit_base_patch16_clip_224.metaclip_2pt5bz facebook/metaclip-b16-fullcc2.5bzmetaclip_b16_fullcc2.5b.bin)vit_large_patch14_clip_224.metaclip_2pt5bz facebook/metaclip-l14-fullcc2.5bzmetaclip_l14_fullcc2.5b.bin(vit_huge_patch14_clip_224.metaclip_2pt5bz facebook/metaclip-h14-fullcc2.5bzmetaclip_h14_fullcc2.5b.bin vit_base_patch32_clip_224.openaiz%timm/vit_base_patch32_clip_224.openai)r;  rM  r   rY  r    vit_base_patch16_clip_224.openaiz%timm/vit_base_patch16_clip_224.openai!vit_large_patch14_clip_224.openaiz&timm/vit_large_patch14_clip_224.openai)r;  rM  r   rY  r5  r   !vit_large_patch14_clip_336.openaiz&timm/vit_large_patch14_clip_336.openai)r;  rI  rM  r   rY  r5  r3  r   z#vit_base_patch32_plus_256.untrained)r1  r3  r5  z#vit_base_patch16_plus_240.untrainedz$vit_small_patch16_36x1_224.untrainedz$vit_small_patch16_18x2_224.untrainedz#vit_base_patch16_18x2_224.untrainedz)eva_large_patch14_196.in22k_ft_in22k_in1kmit)r?      rZ  )r;  r@  r   rY  r3  r5  z)eva_large_patch14_336.in22k_ft_in22k_in1k)r;  r@  r   rY  r3  r5  rD  z#eva_large_patch14_196.in22k_ft_in1kz#eva_large_patch14_336.in22k_ft_in1kzflexivit_small.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k.npz)r1  r<  r;  r3  r5  zflexivit_small.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_600ep.npzzflexivit_small.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_300ep.npzzflexivit_base.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k.npzzflexivit_base.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_600ep.npzzflexivit_base.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_300ep.npzzflexivit_base.1000ep_in21kzMhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_1000ep.npz)r1  r<  r;  r3  r5  r   zflexivit_base.300ep_in21kzLhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_300ep.npzzflexivit_large.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k.npzzflexivit_large.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_600ep.npzzflexivit_large.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_300ep.npzzflexivit_base.patch16_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b16_i21k_300ep.npzzflexivit_base.patch30_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b30_i21k_300ep.npzz!vit_base_patch16_xp_224.untrainedz"vit_large_patch14_xp_224.untrainedz!vit_huge_patch14_xp_224.untrainedzvit_base_patch16_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth)r1  r;  r@  r   rY  r   zvit_large_patch16_224.maezFhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pthzvit_huge_patch14_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_huge.pthz#vit_huge_patch14_gap_224.in1k_ijepaz?https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.14-300e.pth.tar)r1  r@  r   rY  r   z$vit_huge_patch14_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.h.14-900e.pth.tarz#vit_huge_patch16_gap_448.in1k_ijepazEhttps://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.16-448px-300e.pth.tar)r1  r@  r3  r5  r   rY  r   z%vit_giant_patch16_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.g.16-600e.pth.tarz!vit_base_patch16_siglip_224.webliztimm/ViT-B-16-SigLIP)r;  rI  r   z!vit_base_patch16_siglip_256.webliztimm/ViT-B-16-SigLIP-256)r;  rI  r3  r   z&vit_base_patch16_siglip_256.webli_i18nztimm/ViT-B-16-SigLIP-i18n-256z!vit_base_patch16_siglip_384.webliztimm/ViT-B-16-SigLIP-384z!vit_base_patch16_siglip_512.webliztimm/ViT-B-16-SigLIP-512)r?   rH  rH  z"vit_large_patch16_siglip_256.webliztimm/ViT-L-16-SigLIP-256z"vit_large_patch16_siglip_384.webliztimm/ViT-L-16-SigLIP-384z#vit_so400m_patch14_siglip_224.webliztimm/ViT-SO400M-14-SigLIPz(vit_so400m_patch16_siglip_256.webli_i18nz"timm/ViT-SO400M-16-SigLIP-i18n-256z#vit_so400m_patch14_siglip_378.webliztimm/ViT-SO400M-14-SigLIP-384z#vit_so400m_patch14_siglip_384.webliz%vit_base_patch16_siglip_gap_224.webliz%vit_base_patch16_siglip_gap_256.webliz*vit_base_patch16_siglip_gap_256.webli_i18nz%vit_base_patch16_siglip_gap_384.webliz%vit_base_patch16_siglip_gap_512.webliz&vit_large_patch16_siglip_gap_256.webliz&vit_large_patch16_siglip_gap_384.webliz'vit_so400m_patch14_siglip_gap_224.webliz*vit_so400m_patch14_siglip_gap_224.pali_mixzgoogle/paligemma-3b-mix-224-jaxzpaligemma-3b-mix-224.npzhf)r;  rI  r<  r   z)vit_so400m_patch14_siglip_gap_224.pali_ptzgoogle/paligemma-3b-pt-224-jaxzpaligemma-3b-pt-224.npzz,vit_so400m_patch16_siglip_gap_256.webli_i18nz'vit_so400m_patch14_siglip_gap_378.webli)r;  rI  r3  r5  r   z'vit_so400m_patch14_siglip_gap_384.webliz*vit_so400m_patch14_siglip_gap_448.pali_mixzgoogle/paligemma-3b-mix-448-jaxzpaligemma-3b-mix-448.npz)r;  rI  r<  r3  r5  r   z)vit_so400m_patch14_siglip_gap_448.pali_ptzgoogle/paligemma-3b-pt-448-jaxzpaligemma-3b-pt-448.npzz)vit_so400m_patch14_siglip_gap_896.pali_ptzgoogle/paligemma-3b-pt-896-jaxzpaligemma-3b-pt-896.npz)r?     r\  z+vit_so400m_patch14_siglip_378.webli_ft_in1kz/vit_so400m_patch14_siglip_gap_378.webli_ft_in1kz,vit_xsmall_patch16_clip_224.tinyclip_yfcc15m)r;  rI  r@  r   rY  r   z.vit_medium_patch32_clip_224.tinyclip_laion400mz,vit_medium_patch16_clip_224.tinyclip_yfcc15mz/vit_betwixt_patch32_clip_224.tinyclip_laion400mz%vit_wee_patch16_reg1_gap_256.sbb_in1kz&vit_pwee_patch16_reg1_gap_256.sbb_in1kz1vit_little_patch16_reg1_gap_256.sbb_in12k_ft_in1kz)vit_little_patch16_reg1_gap_256.sbb_in12k)r;  r   r3  r5  z(vit_little_patch16_reg4_gap_256.sbb_in1kz(vit_medium_patch16_reg1_gap_256.sbb_in1kz1vit_medium_patch16_reg4_gap_256.sbb_in12k_ft_in1kz(vit_medium_patch16_reg4_gap_256.sbb_in1kz)vit_medium_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1kz0vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_mediumd_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz)vit_betwixt_patch16_reg1_gap_256.sbb_in1kz8vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1kz)vit_betwixt_patch16_reg4_gap_256.sbb_in1kz0vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_betwixt_patch16_reg4_gap_256.sbb_in12kz8vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz'vit_base_patch16_reg4_gap_256.untrained)r3  )r;  r   rY  r3  r5  r   )r?      r]  )z)vit_so150m_patch16_reg4_gap_256.untrainedz)vit_so150m_patch16_reg4_map_256.untrainedz$vit_intern300m_patch14_448.ogvl_distztest_vit.r160_in1kztest_vit2.r160_in1kztest_vit3.r160_in1k)rL  rN  rO  rQ  rR  rS  rT  rU  rV  rW  rX  c                 R    i | ]$}|                     d d          t          |         %S )_clip__clip_quickgelu_)r  default_cfgs)r   r  s     rP   r*  r*  '  s:       ABAIIh*++\!_  rQ   variant
pretrainedc           	      .   |                     dd          }d| v rt          t          dd          }nt          }|                     dd          }d	| v r|                    d
d           dk    rd}t	          t
          | |f||t          |d          d|S )Nout_indicesr?   flexir  F)r  r  pretrained_strictTsiglipr   r   getter)re  feature_cls)pretrained_filter_fnrg  feature_cfg)r#  r   r0  r,  r&   r.   r.  )rb  rc  r9  re  
_filter_fnstricts         rP   _create_vision_transformerro  -  s    **]A..K' 1W\]]]

)
 ZZ+T22F7vzz->>%GG ( [hGGG    rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z ViT-Tiny (Vit-Ti/16)
    r      r   r?   r   r  r  r5   vit_tiny_patch16_224rc  )rs  r.  ro  rc  r9  
model_argsr  s       rP   rs  rs  F  J     s"JJJJ&ss*sX\]gXrXrkqXrXrssELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z% ViT-Tiny (Vit-Ti/16) @ 384x384.
    r   rq  r   r?   rr  vit_tiny_patch16_384rc  )ry  rt  ru  s       rP   ry  ry  O  rw  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z ViT-Small (ViT-S/32)
        r=  r      rr  vit_small_patch32_224rc  )r}  rt  ru  s       rP   r}  r}  X  J     s"JJJJ&tt:tY]^hYsYslrYsYsttELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z& ViT-Small (ViT-S/32) at 384x384.
    r{  r=  r   r|  rr  vit_small_patch32_384rc  )r  rt  ru  s       rP   r  r  a  r~  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	 ViT-Small (ViT-S/16)
    r   r=  r   r|  rr  vit_small_patch16_224rc  )r  rt  ru  s       rP   r  r  j  r~  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	r  r   r=  r   r|  rr  vit_small_patch16_384rc  )r  rt  ru  s       rP   r  r  s  r~  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z ViT-Small (ViT-S/8)
    r2   r=  r   r|  rr  vit_small_patch8_224rc  )r  rt  ru  s       rP   r  r  |  sJ     cqIIIJ&ss*sX\]gXrXrkqXrXrssELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k, source https://github.com/google-research/vision_transformer.
    r{  r   r   rr  vit_base_patch32_224rc  )r  rt  ru  s       rP   r  r    J    
 s"KKKJ&ss*sX\]gXrXrkqXrXrssELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r{  r   r   rr  vit_base_patch32_384rc  )r  rt  ru  s       rP   r  r    r  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
    r   r   r   rr  vit_base_patch16_224rc  )r  rt  ru  s       rP   r  r    r  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r   r   r   rr  vit_base_patch16_384rc  )r  rt  ru  s       rP   r  r    r  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
    r2   r   r   rr  vit_base_patch8_224rc  )r  rt  ru  s       rP   r  r    sJ    
 crJJJJ&rrrW[\fWqWqjpWqWqrrELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	zo ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
    r{  rJ     r   rr  vit_large_patch32_224rc  )r  rt  ru  s       rP   r  r    J     t2LLLJ&tt:tY]^hYsYslrYsYsttELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r{  rJ  r  r   rr  vit_large_patch32_384rc  )r  rt  ru  s       rP   r  r    J    
 t2LLLJ&tt:tY]^hYsYslrYsYsttELrQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
    r   rJ  r  rr  vit_large_patch16_224rc  )r  rt  ru  s       rP   r  r    r  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )z ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r   rJ  r  rr  vit_large_patch16_384rc  )r  rt  ru  s       rP   r  r    r  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	z  ViT-Large model (ViT-L/14)
       rJ  r  r   rr  vit_large_patch14_224rc  )r  rt  ru  s       rP   r  r    r  rQ   c           	      ^    t          dddd          }t          dd| it          |fi |}|S )	zW ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    r  rK  r{  r   rr  vit_huge_patch14_224rc  )r  rt  ru  s       rP   r  r    sJ     t2LLLJ&ss*sX\]gXrXrkqXrXrssELrQ   c           	      `    t          ddddd          }t          d	d| it          |fi |}|S )
zq ViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    tE]t@(   r   r   r  r   r  r5   vit_giant_patch14_224rc  )r  rt  ru  s       rP   r  r    sM     tuBZ\]]]J&tt:tY]^hYsYslrYsYsttELrQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
zq ViT-Gigantic (big-G) model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    ;;@0   r   r  vit_gigantic_patch14_224rc  )r  rt  ru  s       rP   r  r    s_     tuBZ\]]]J&"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           	      b    t          ddddd          }t          	 dd| it          |fi |}|S )	z ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
    r   r   r   F)r   r  r  r5   r6   vit_base_patch16_224_miilrc  )r  rt  ru  s       rP   r  r  	  s_    
 s"UZ[[[J&#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c                 j    t          ddddddddd	  	        }t          	 dd
| it          |fi |}|S )zB ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 240x240
    r   rH  r   r2   Fr   r  	r   r  r  r5   r  r   r6   r}   r	  vit_medium_patch16_gap_240rc  )r  rt  ru  s       rP   r  r  	  s     B!EtUL L LJ '$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c                 j    t          ddddddddd	  	        }t          	 dd
| it          |fi |}|S )zB ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 256x256
    r   rH  r   r2   Fr   r  r  vit_medium_patch16_gap_256rc  r  rt  ru  s       rP   r  r  	  r  rQ   c                 j    t          ddddddddd	  	        }t          	 dd
| it          |fi |}|S )zB ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 384x384
    r   rH  r   r2   Fr   r  r  vit_medium_patch16_gap_384rc  )r  rt  ru  s       rP   r  r  &	  r  rQ   c                 j    t          ddddddddd	  	        }t          	 dd
| it          |fi |}|S )zC ViT-Betwixt (ViT-b/16) w/o class token, w/ avg-pool @ 256x256
    r     r   
   Fr   r  r  r  rc  r  rt  ru  s       rP   vit_betwixt_patch16_gap_256r  2	  ss     B"%EtUL L LJ '$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
z@ ViT-Base (ViT-B/16) w/o class token, w/ avg-pool @ 224x224
    r   r   r   Fr   r   r  r  r5   r  r   r	  rE  rc  )rE  rt  ru  s       rP   rE  rE  >	  sn     B"%]blqs s sJ&"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           	      f    t          ddddddd          }t          	 d
d	| it          |fi |}|S )z; ViT-Huge model (ViT-H/14) w/ no class token, avg pool
    r  rK  r{  r   Fr   r  vit_huge_patch14_gap_224rc  )r  rt  ru  s       rP   r  r  I	  n     R25^cmrt t tJ&"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
zE ViT-Huge model (ViT-H/16) w/ no class token, avg pool @ 448x448
    r   rK  r{  Fr   r  vit_huge_patch16_gap_448rc  )r  rt  ru  s       rP   r  r  T	  r  rQ   c           
      h    t          dddddddd          }t          	 d
d	| it          |fi |}|S )zH ViT-Giant (little-gg) model (ViT-g/16) w/ no class token, avg pool
    r   r  r  r  Fr   r   r  r  r5   r   r  r   r	  vit_giant_patch16_gap_224rc  )r  rt  ru  s       rP   r  r  _	  sn     R2ue= = =J '#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c           	      v    t          ddddt          j                  }t          	 dd| it          |fi |}|S )	NrB  r  rU   Tr  r  r5   r  r:   vit_xsmall_patch16_clip_224rc  )r  r.  rF   rr   ro  ru  s       rP   r  r  k	  b     2TVXVbcccJ&%\ \2<\@DZ@Z@ZSY@Z@Z\ \ELrQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
Nr{  rH  r   r2   Tr   r  r  r5   r  r:   vit_medium_patch32_clip_224rc  )r  r  ru  s       rP   r  r  t	  sn     B!dWYWce e eJ&%\ \2<\@DZ@Z@ZSY@Z@Z\ \ELrQ   c           	      v    t          ddddt          j                  }t          	 dd| it          |fi |}|S )	NrH  r   r2   Tr  vit_medium_patch16_clip_224rc  )r  r  ru  s       rP   r  r  ~	  r  rQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
Nr{  r  r   r  Tr  vit_betwixt_patch32_clip_224rc  )r  r  ru  s       rP   r  r  	  sn     B"tXZXdf f fJ&&] ]3=]AEjA[A[TZA[A[] ]ELrQ   c           	      x    t          dddddt          j                  }t          	 dd| it          |fi |}|S )	) ViT-B/32 CLIP image tower @ 224x224
    r{  r   r   Tr  vit_base_patch32_clip_224rc  )r  r  ru  s       rP   r  r  	  n     B"tXZXdf f fJ&#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c           	      x    t          dddddt          j                  }t          	 dd| it          |fi |}|S )	z) ViT-B/32 CLIP image tower @ 256x256
    r{  r   r   Tr  vit_base_patch32_clip_256rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      x    t          dddddt          j                  }t          	 dd| it          |fi |}|S )	z) ViT-B/32 CLIP image tower @ 384x384
    r{  r   r   Tr  vit_base_patch32_clip_384rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      x    t          dddddt          j                  }t          	 dd| it          |fi |}|S )	z) ViT-B/32 CLIP image tower @ 448x448
    r{  r   r   Tr  vit_base_patch32_clip_448rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      x    t          dddddt          j                  }t          	 dd| it          |fi |}|S )	z ViT-B/16 CLIP image tower
    r   r   r   Tr  vit_base_patch16_clip_224rc  )r  r  ru  s       rP   r  r  	  e     s"UYfhfrsssJ&#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c           	      x    t          dddddt          j                  }t          	 dd| it          |fi |}|S )	z) ViT-B/16 CLIP image tower @ 384x384
    r   r   r   Tr  vit_base_patch16_clip_384rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
z1 ViT-Large model (ViT-L/14) CLIP image tower
    r  rJ  r  r   Tr  vit_large_patch14_clip_224rc  )r  r  ru  s       rP   r  r  	  e     t2VZgigstttJ&$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
z; ViT-Large model (ViT-L/14) CLIP image tower @ 336x336
    r  rJ  r  r   Tr  vit_large_patch14_clip_336rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
z1 ViT-Huge model (ViT-H/14) CLIP image tower.
    r  rK  r{  r   Tr  vit_huge_patch14_clip_224rc  )r  r  ru  s       rP   r  r  	  e     t2VZgigstttJ&#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
z: ViT-Huge model (ViT-H/14) CLIP image tower @ 336x336
    r  rK  r{  r   Tr  vit_huge_patch14_clip_336rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      x    t          dddddt          j                  }t          	 d	d| it          |fi |}|S )
z: ViT-Huge model (ViT-H/14) CLIP image tower @ 378x378
    r  rK  r{  r   Tr  vit_huge_patch14_clip_378rc  )r  r  ru  s       rP   r  r  	  r  rQ   c           	      z    t          ddddddt          j                  }t          	 d
d	| it          |fi |}|S )z ViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    Pretrained weights from CLIP image tower.
    r  r  r  r  r   Tr   r  r   r  r5   r  r:   vit_giant_patch14_clip_224rc  )r  r  ru  s       rP   r  r  
  sq    
 bBY]jljvx x xJ&$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c           	      z    t          ddddddt          j                  }t          	 d
d	| it          |fi |}|S )z ViT-bigG model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    Pretrained weights from CLIP image tower.
    r  r  r  r  r   Tr  vit_gigantic_patch14_clip_224rc  )r  r  ru  s       rP   r  r  
  sq    
 bBY]jljvx x xJ&'^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c           	      z    t          dddddt          j        d          }t          	 d	d| it          |fi |}|S )
r  r{  r   r   T
quick_gelur   r  r  r5   r  r:   r   #vit_base_patch32_clip_quickgelu_224rc  )r  r  ru  s       rP   r  r  
  n     B"t<<9 9 9J '-d d:DdHLZHbHb[aHbHbd dELrQ   c           	      z    t          dddddt          j        d          }t          	 d	d| it          |fi |}|S )
z0 ViT-B/16 CLIP image tower w/ QuickGELU act
    r   r   r   Tr  r  #vit_base_patch16_clip_quickgelu_224rc  )r  r  ru  s       rP   r  r  '
  r  rQ   c           	      z    t          dddddt          j        d          }t          	 d
d	| it          |fi |}|S )zB ViT-Large model (ViT-L/14) CLIP image tower w/ QuickGELU act
    r  rJ  r  r   Tr  r  $vit_large_patch14_clip_quickgelu_224rc  )r  r  ru  s       rP   r  r  3
  n     R2<<9 9 9J '.e e;EeIMjIcIc\bIcIce eELrQ   c           	      z    t          dddddt          j        d          }t          	 d
d	| it          |fi |}|S )zL ViT-Large model (ViT-L/14) CLIP image tower @ 336x336 w/ QuickGELU act
    r  rJ  r  r   Tr  r  $vit_large_patch14_clip_quickgelu_336rc  )r  r  ru  s       rP   r  r  ?
  r  rQ   c           	      z    t          dddddt          j        d          }t          	 d
d	| it          |fi |}|S )zB ViT-Huge model (ViT-H/14) CLIP image tower w/ QuickGELU act.
    r  rK  r{  r   Tr  r  #vit_huge_patch14_clip_quickgelu_224rc  )r   r  ru  s       rP   r   r   K
  n     R2<<9 9 9J '-d d:DdHLZHbHb[aHbHbd dELrQ   c           	      z    t          dddddt          j        d          }t          	 d
d	| it          |fi |}|S )zK ViT-Huge model (ViT-H/14) CLIP image tower @ 378x378 w/ QuickGELU act
    r  rK  r{  r   Tr  r  #vit_huge_patch14_clip_quickgelu_378rc  )r  r  ru  s       rP   r  r  W
  r  rQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
z ViT-Base (ViT-B/32+)
    r{  r\  r   r  r|   r   r  r  r5   r}   vit_base_patch32_plus_256rc  )r  rt  ru  s       rP   r  r  e
  _     s"X\]]]J&#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
z ViT-Base (ViT-B/16+)
    r   r\  r   r  r|   r  vit_base_patch16_plus_240rc  )r	  rt  ru  s       rP   r	  r	  o
  r  rQ   c                 t    t          dddddddt          d	  	        }t          	 d
d	| it          |fi |}|S )z/ ViT-Base (ViT-B/16) w/ residual post-norm
    r   r   r   Fr|   r   )	r   r  r  r5   r6   r}   r  r  r   vit_base_patch16_rpn_224rc  )r  )r.  r   ro  ru  s       rP   r  r  y
  st     B"uZ^LeE E EJ '"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
a   ViT-Base w/ LayerScale + 36 x 1 (36 block serial) config. Experimental, may remove.
    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
    r   r=  $   r|  r|   r  vit_small_patch16_36x1_224rc  )r  rt  ru  s       rP   r  r  
  s_     s"W[\\\J&$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c           	      n    t          dddddt                    }t          	 d	d| it          |fi |}|S )
a   ViT-Small w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
    r   r=     r|  r|   r   r  r  r5   r}   r  vit_small_patch16_18x2_224rc  )r  r.  r   ro  ru  s       rP   r  r  
  sk     B!Xkm m mJ&$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c           	      n    t          dddddt                    }t          	 d	d| it          |fi |}|S )
z ViT-Base w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    r   r   r  r   r|   r  vit_base_patch16_18x2_224rc  )r  r  ru  s       rP   r  r  
  sk    
 B"$Yln n nJ&#Z Z0:Z>B:>X>XQW>X>XZ ZELrQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
zG EVA-large model https://arxiv.org/abs/2211.07636 /via MAE MIM pretrainr  rJ  r  r   r   r   r  r  r5   r   eva_large_patch14_196rc  )r  rt  ru  s       rP   r  r  
  s^     t2Y^___J&V V,6V:>z:T:TV:T:TV VELrQ   c           	      `    t          ddddd          }t          d	d| it          |fi |}|S )
zF EVA-large model https://arxiv.org/abs/2211.07636 via MAE MIM pretrainr  rJ  r  r   r   r  eva_large_patch14_336rc  )r  rt  ru  s       rP   r  r  
  sM     t2Y^___J&tt:tY]^hYsYslrYsYsttELrQ   c           	      `    t          ddddd          }t          d	d| it          |fi |}|S )
z FlexiViT-Small
    r   r=  r   r|  Tr   r  r  r5   r  flexivit_smallrc  )r  rt  ru  s       rP   r  r  
  sM     s"Z^___J&mmJmRVWaRlRlekRlRlmmELrQ   c           	      `    t          ddddd          }t          dd| it          |fi |}|S )	z FlexiViT-Base
    r   r   r   Tr  flexivit_baserc  )r  rt  ru  s       rP   r  r  
  sM     s"[_```J&ll:lQUV`QkQkdjQkQkllELrQ   c           	      `    t          ddddd          }t          dd| it          |fi |}|S )	z FlexiViT-Large
    r   rJ  r  Tr  flexivit_largerc  )r!  rt  ru  s       rP   r!  r!  
  sM     t2\`aaaJ&mmJmRVWaRlRlekRlRlmmELrQ   c                     t          ddddddt          t          dd
  
        }t          	 d	d| it          |fi |}|S )
H ViT-Large model (ViT-L/14) w/ parallel blocks and qk norm enabled.
    r   r   r   TF
r   r  r  r5   r  r  r:   r  r6   r7   vit_base_patch16_xp_224rc  )r%  r.  r   r   ro  ru  s       rP   r%  r%  
  st     B"t\`%9ESW  J '!X X.8X<@<V<Vv<V<VX XELrQ   c                     t          ddddddt          t          dd
  
        }t          	 d
d	| it          |fi |}|S )r#  r  rJ  r  r   TFr$  vit_large_patch14_xp_224rc  )r(  r&  ru  s       rP   r(  r(  
  su     R2]a%9ESW  J '"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c                     t          ddddddt          t          dd
  
        }t          	 d
d	| it          |fi |}|S )zG ViT-Huge model (ViT-H/14) w/ parallel blocks and qk norm enabled.
    r  rK  r{  r   TFr$  vit_huge_patch14_xp_224rc  )r*  r&  ru  s       rP   r*  r*  
  st     R2]a%9ESW  J '!X X.8X<@<V<Vv<V<VX XELrQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
z ViT-S/14 for DINOv2
    r  r=  r   r|  r|   r  vit_small_patch14_dinov2rc  )r,  rt  ru  s       rP   r,  r,  
  s_     s"W[\\\J&"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           	      b    t          ddddd          }t          	 dd| it          |fi |}|S )	z ViT-B/14 for DINOv2
    r  r   r   r|   r  vit_base_patch14_dinov2rc  )r.  rt  ru  s       rP   r.  r.    s^     s"X\]]]J&!X X.8X<@<V<Vv<V<VX XELrQ   c           	      b    t          ddddd          }t          	 d	d| it          |fi |}|S )
z ViT-L/14 for DINOv2
    r  rJ  r  r   r|   r  vit_large_patch14_dinov2rc  )r0  rt  ru  s       rP   r0  r0    s_     t2Y]^^^J&"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           
          t          ddddddt          t          j                  }t	          	 d
d	| it          |fi |}|S ) ViT-G/14 for DINOv2
    r     r  r  r|   h˹WU@)r   r  r  r5   r}   r   r   r   vit_giant_patch14_dinov2rc  )r5  r.  r   rF   SiLUro  ru  s       rP   r5  r5    sp     R24  J '"Y Y/9Y=A*=W=WPV=W=WY YELrQ   c           	      f    t          ddddddd          }t          	 dd
| it          |fi |}|S )z( ViT-S/14 for DINOv2 w/ 4 registers
    r  r=  r   r|  r|   rU   Tr   r  r  r5   r}   r  r  vit_small_patch14_reg4_dinov2rc  )r:  rt  ru  s       rP   r:  r:  ,  sl     B!T  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c           	      f    t          ddddddd          }t          	 d
d	| it          |fi |}|S )z( ViT-B/14 for DINOv2 w/ 4 registers
    r  r   r   r|   rU   Tr9  vit_base_patch14_reg4_dinov2rc  )r<  rt  ru  s       rP   r<  r<  9  sl     B"$T  J '&] ]3=]AEjA[A[TZA[A[] ]ELrQ   c           	      f    t          ddddddd          }t          	 dd
| it          |fi |}|S )z( ViT-L/14 for DINOv2 w/ 4 registers
    r  rJ  r  r   r|   rU   Tr9  vit_large_patch14_reg4_dinov2rc  )r>  rt  ru  s       rP   r>  r>  F  sl     R24T  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c                     t          ddddddt          t          j        dd	
  
        }t	          	 dd| it          |fi |}|S )r2  r  r3  r  r  r|   r4  rU   T)
r   r  r  r5   r}   r   r   r   r  r  vit_giant_patch14_reg4_dinov2rc  )r@  r6  ru  s       rP   r@  r@  S  sv     R24[f"'aPT  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
Nr   r   r   Fr   r   r  r  r5   r  r   vit_base_patch16_siglip_224rc  )rC  rt  ru  s       rP   rC  rC  d  h    B"%]b  J '%\ \2<\@DZ@Z@ZSY@Z@Z\ \ELrQ   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
Nr   r   r   Fr   rB  vit_base_patch16_siglip_256rc  )rF  rt  ru  s       rP   rF  rF  n  rD  rQ   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
Nr   r   r   Fr   rB  vit_base_patch16_siglip_384rc  )rH  rt  ru  s       rP   rH  rH  x  rD  rQ   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
Nr   r   r   Fr   rB  vit_base_patch16_siglip_512rc  )rJ  rt  ru  s       rP   rJ  rJ    rD  rQ   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
Nr   rJ  r  Fr   rB  vit_large_patch16_siglip_256rc  )rL  rt  ru  s       rP   rL  rL    h    R25^c  J '&] ]3=]AEjA[A[TZA[A[] ]ELrQ   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
Nr   rJ  r  Fr   rB  vit_large_patch16_siglip_384rc  )rO  rt  ru  s       rP   rO  rO    rM  rQ   c           	      f    t          ddddddd          }t          	 dd
| it          |fi |}|S )Nr       r   爅ZӼ@Fr   r   r  r  r5   r   r  r   vit_so400m_patch14_siglip_224rc  )rU  rt  ru  s       rP   rU  rU    k    R2]bpu  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c           	      f    t          ddddddd          }t          	 d
d	| it          |fi |}|S )Nr   rQ  rR  rS  Fr   rT  vit_so400m_patch16_siglip_256rc  )rX  rt  ru  s       rP   rX  rX    m     R2]bpu  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c           	      f    t          ddddddd          }t          	 dd
| it          |fi |}|S )Nr  rQ  rR  r   rS  Fr   rT  vit_so400m_patch14_siglip_378rc  )r[  rt  ru  s       rP   r[  r[    rY  rQ   c           	      f    t          ddddddd          }t          	 dd
| it          |fi |}|S )Nr  rQ  rR  r   rS  Fr   rT  vit_so400m_patch14_siglip_384rc  )r]  rt  ru  s       rP   r]  r]    rV  rQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
^ A SigLIP variant of ViT with global average pooling (GAP) instead of attention pooling (MAP).r   r   r   Fr   r  vit_base_patch16_siglip_gap_224rc  )r`  rt  ru  s       rP   r`  r`    m     B"%]blq  J ')` `6@`DHD^D^W]D^D^` `ELrQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
r_  r   r   r   Fr   r  vit_base_patch16_siglip_gap_256rc  )rc  rt  ru  s       rP   rc  rc    ra  rQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
r_  r   r   r   Fr   r  vit_base_patch16_siglip_gap_384rc  )re  rt  ru  s       rP   re  re    ra  rQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
r_  r   r   r   Fr   r  vit_base_patch16_siglip_gap_512rc  )rg  rt  ru  s       rP   rg  rg    ra  rQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
r_  r   rJ  r  Fr   r   vit_large_patch16_siglip_gap_256rc  )ri  rt  ru  s       rP   ri  ri    m     R25^cmr  J '*a a7AaEI*E_E_X^E_E_a aELrQ   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
r_  r   rJ  r  Fr   r   vit_large_patch16_siglip_gap_384rc  )rl  rt  ru  s       rP   rl  rl    rj  rQ   c           
      h    t          dddddddd          }t          	 dd
| it          |fi |}|S )r_  r  rQ  rR  r   rS  Fr   r  !vit_so400m_patch14_siglip_gap_224rc  )rn  rt  ru  s       rP   rn  rn    n     R2ue  J '+b b8BbFJ:F`F`Y_F`F`b bELrQ   c           
      h    t          dddddddd          }t          	 d
d	| it          |fi |}|S )r_  r   rQ  rR  rS  Fr   r  !vit_so400m_patch16_siglip_gap_256rc  )rq  rt  ru  s       rP   rq  rq    ro  rQ   c           
      h    t          dddddddd          }t          	 dd
| it          |fi |}|S )r_  r  rQ  rR  r   rS  Fr   r  !vit_so400m_patch14_siglip_gap_378rc  )rs  rt  ru  s       rP   rs  rs  $  ro  rQ   c           
      h    t          dddddddd          }t          	 dd
| it          |fi |}|S )r_  r  rQ  rR  r   rS  Fr   r  !vit_so400m_patch14_siglip_gap_384rc  )ru  rt  ru  s       rP   ru  ru  0  ro  rQ   c           
      h    t          dddddddd          }t          	 dd
| it          |fi |}|S )r_  r  rQ  rR  r   rS  Fr   r  !vit_so400m_patch14_siglip_gap_448rc  )rw  rt  ru  s       rP   rw  rw  <  ro  rQ   c           
      h    t          dddddddd          }t          	 dd
| it          |fi |}|S )r_  r  rQ  rR  r   rS  Fr   r  !vit_so400m_patch14_siglip_gap_896rc  )ry  rt  ru  s       rP   ry  ry  H  ro  rQ   c                 l    t          ddddddddd	d

  
        }t          	 dd| it          |fi |}|S )Nr   rB  r  rU   r|      FTr%   r   
r   r  r  r5   r}   r   r  r  r  r   vit_wee_patch16_reg1_gap_256rc  )r}  rt  ru  s       rP   r}  r}  T  sq    B!YZ$1%  J '&] ]3=]AEjA[A[TZA[A[] ]ELrQ   c                 x    t          dddddddddd	t          
          }t          	 dd| it          |fi |}|S )Nr   rB  rU   r|   r{  FTr%   r   )r   r  r  r5   r}   r   r  r  r  r   r  vit_pwee_patch16_reg1_gap_256rc  )r  )r.  r   ro  ru  s       rP   r  r  _  st    B!YZ$1%Zn  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c                 l    t          ddddddddd	d

  
        }t          	 dd| it          |fi |}|S )Nr   @  r  r{  r|   ffffff@FTr%   r   r|  vit_little_patch16_reg1_gap_256rc  )r  rt  ru  s       rP   r  r  j  q    B!Y\$1%  J ')` `6@`DHD^D^W]D^D^` `ELrQ   c                 l    t          ddddddddd	d

  
        }t          	 dd| it          |fi |}|S )Nr   r  r  r{  r|   r  FTrU   r   r|  vit_little_patch16_reg4_gap_256rc  )r  rt  ru  s       rP   r  r  u  r  rQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   rH  r   r2   r|   FTr%   r   	r   r  r  r5   r}   r  r  r  r   vit_medium_patch16_reg1_gap_256rc  )r  rt  ru  s       rP   r  r    n    B!$1%  J ')` `6@`DHD^D^W]D^D^` `ELrQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   rH  r   r2   r|   FTrU   r   r  vit_medium_patch16_reg4_gap_256rc  )r  rt  ru  s       rP   r  r    r  rQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   rH     r2   r|   FTrU   r   r   vit_mediumd_patch16_reg4_gap_256rc  )r  rt  ru  s       rP   r  r    n    B!$1%  J '*a a7AaEI*E_E_X^E_E_a aELrQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   rH  r  r2   r|   FTrU   r   r   vit_mediumd_patch16_reg4_gap_384rc  )r  rt  ru  s       rP   r  r    r  rQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   r  r   r  r|   FTr%   r   r   vit_betwixt_patch16_reg1_gap_256rc  )r  rt  ru  s       rP   r  r    n    B"$$1%  J '*a a7AaEI*E_E_X^E_E_a aELrQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   r  r   r  r|   FTrU   r   r   vit_betwixt_patch16_reg4_gap_256rc  )r  rt  ru  s       rP   r  r    r  rQ   c                 j    t          ddddddddd	
	  	        }t          	 dd| it          |fi |}|S )Nr   r  r   r  r|   FTrU   r   r   vit_betwixt_patch16_reg4_gap_384rc  )r  rt  ru  s       rP   r  r    r  rQ   c           
      h    t          dddddddd          }t          	 dd
| it          |fi |}|S )Nr   r   r   FTr   rU   )r   r  r  r5   r  r  r   r  vit_base_patch16_reg4_gap_256rc  )r  rt  ru  s       rP   r  r    sl    B"%1  J ''^ ^4>^BFzB\B\U[B\B\^ ^ELrQ   c           
      h    t          dddddddd	          }t          	 dd| it          |fi |}|S )Nr   r\  r  r  ~jt@FrU   r   )r   r  r  r5   r   r  r  r   vit_so150m_patch16_reg4_map_256rc  )r  rt  ru  s       rP   r  r    sl    B"aU  J ')` `6@`DHD^D^W]D^D^` `ELrQ   c                 j    t          ddddddddd		  	        }t          	 dd| it          |fi |}|S )Nr   r\  r  r  r  FrU   r   )	r   r  r  r5   r   r  r  r   r	  vit_so150m_patch16_reg4_gap_256rc  )r  rt  ru  s       rP   r  r    sn    B"aUE  J ')` `6@`DHD^D^W]D^D^` `ELrQ   c           	      f    t          ddddddd          }t          	 dd
| it          |fi |}|S )Nr  rJ  r  r   g?FT)r   r  r  r5   r}   r  r   vit_intern300m_patch14_448rc  )r  rt  ru  s       rP   r  r    sj    R2ED  J '$[ [1;[?CJ?Y?YRX?Y?Y[ [ELrQ   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S ) ViT Test
    r   @   r|  rT   r?   T)r   r  r  r5   r   r   test_vitrc  )r  rt  ru  s       rP   r  r    sO     raSTgklllJ&ggjgDQ[LfLf_eLfLfggELrQ   c                 j    t          ddddddddd	d

  
        }t          dd| it          |fi |}|S )r  r   r  r2   rT   r?   Fr%   r   r|   T)
r   r  r  r5   r   r  r  r   r}   r   	test_vit2rc  )r  rt  ru  s       rP   r  r    sc     1QaU_ce e eJ 'hhzhTR\MgMg`fMgMghhELrQ   c                 h    t          ddddddddd	
	  	        }t          dd| it          |fi |}|S )r  r   `   	   r?   rT   Fr%   r   r|   )	r   r  r  r5   r   r  r  r   r}   	test_vit3rc  )r  rt  ru  s       rP   r  r    s`     1QaUN N NJ 'hhzhTR\MgMg`fMgMghhELrQ   vit_tiny_patch16_224_in21kvit_small_patch32_224_in21kvit_small_patch16_224_in21kvit_base_patch32_224_in21kvit_base_patch16_224_in21kvit_base_patch8_224_in21kvit_large_patch32_224_in21kvit_large_patch16_224_in21kvit_huge_patch14_224_in21kvit_base_patch32_224_samzvit_base_patch32_224.samvit_base_patch16_224_samzvit_base_patch16_224.samvit_small_patch16_224_dinovit_small_patch8_224_dinovit_base_patch16_224_dinovit_base_patch8_224_dinovit_base_patch16_224_miil_in21k!vit_base_patch32_224_clip_laion2b)"vit_large_patch14_224_clip_laion2b!vit_huge_patch14_224_clip_laion2b"vit_giant_patch14_224_clip_laion2b)r   r%   Fr  )r   r3   )r  r3   )r%   r   r  F)r  )Fr  Tr  )r   loggingrK  collectionsr   	functoolsr   typingr   r   r   r   r	   r
   r   r   r   r   ImportErrortyping_extensionsrv   torch.nnrF   torch.nn.functional
functionalr_   torch.utils.checkpoint	torch.jitr   	timm.datar   r   r   r   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   _builderr&   	_featuresr'   _manipulater(   r)   r*   	_registryr+   r,   r-   __all__	getLoggerrm   _loggerru   r0   r{   r   r   r   r   rw   r  rs   rp   r   r.   r`  rt   r  r  r\  r  no_gradre  r  r%  r0  r:  ra  _quick_gelu_cfgsr4  ro  rs  ry  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rE  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r	  r  r  r  r  r  r  r  r  r!  r%  r(  r*  r,  r.  r0  r5  r:  r<  r>  r@  rC  rF  rH  rJ  rL  rO  rU  rX  r[  r]  r`  rc  re  rg  ri  rl  rn  rq  rs  ru  rw  ry  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   rQ   rP   <module>r     sD   2   # # # # # #       O O O O O O O O O O O O O O O O O O O O O O* * * *))))))))*                          & & & & & & & & & & & & & & & &- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + * * * * * + + + + + + F F F F F F F F F F Y Y Y Y Y Y Y Y Y Y
 '
H
%
%0 0 0 0 0	 0 0 0fF F F F F F F F+ + + + +BI + + +\3 3 3 3 329 3 3 3l\ \ \ \ \29 \ \ \~C$ C$ C$ C$ C$") C$ C$ C$P !!"&+	 <   $	   2a a a a a	 a a aH ") 3       # u W[    $ ") 3     % %s %u %x % % % % "#"$& L  c3h	
   \   2 JT JT* JTS JT# JTW[ JT JT JT JT`  # #el*+# # # 
#u|
	# # # #Lel*+  
#u|
   4 #(&E Eel*+E E  E 	E
 E 
#u|
E E E EP c T#s(^     f 1$$3 3 3f
 1$$&&f 02 2 2f 0 d2 2 2f 0 d]S2B 2B 2Bf$ 1$$ e3 3 3%f, 1$$ e]S3B 3B 3B-f4 1$$ e3 3 35f< 1$$ e]S3B 3B 3B=fD 0 f2 2 2EfL 0 d]S2B 2B 2BMfT 0 e2 2 2Uf\ 0 e]S2B 2B 2B]fd / d1 1 1efl 1$$ e3 3 3mft 1$$ e]S3B 3B 3Buf@ .tt}0 0 0Af fF .tt} 300 00 00GfN /~ 310 10 10OfZ ( d* * *[fb ( d]S*B *B *Bcfj ' d) ) )kfr ' d]S)B )B )Bsfz ' d) ) ){fB ' d]S)B )B )BCfL &tt|||MfN %ddrlllOfP &tt|||QfR )$$2,,,SfX &tt( ( (Yf` &tt( ( (afh ') ) )ifp ') ) )qfx &tt( ( (yf f fB ({e*- *- *-CfJ )$$|e+- +- +-KfR )$$|e+- +- +-SfZ (}e*- *- *-[fb (|e*- *- *-cfj '{e)- )- )-kfr )$$|e+- +- +-sf~ $TTHVZ& & &fD $TTHVZ& & &EfN !$$i"(<!#M #M #MOfV  g"(<!"M "M "MWf^  e"(<!"M "M "M_ff c"(<!!M !M !Mgfr '\"(<! 3)0 )0 )0sf~ &tt\"(<! 3(0 (0 (0fJ '\"(<! 3)0 )0 )0KfV '\"(<! 3)0 )0 )0Wf f ff ,TTa"(<! 3.0 .0 .0gfr +DDa"(<! 3-0 -0 -0sf~ ,TTa"(<! 3.0 .0 .0fJ ,TTa"(<! 3.0 .0 .0KfZ &tt L|e:ch(j (j (j[fb .tt N|e:0W 0W 0Wcfn ' L) ) )oft *44 4U,D ,D ,Dufz 244 441 41 41{f@ 244 484E 4E 4EAfF GfL 6tt?84 84 84MfR 6tt?S]8\ 8\ 8\SfX 6tt?S]8\ 8\ 8\Yf^ 6tt?T8C 8C 8C_fd 6tt?(8D 8D 8Defl 7$*@39P 9P 9Pmf f fr 7$*@(9D 9D 9Dsfz 6tt?S8B 8B 8B{f@	 6tt?(8D 8D 8DA	fJ	 5dd?74 74 74K	fP	 5dd?-87E 7E 7EQ	fX	 5dd?T7C 7C 7CY	f^	 5dd?-87E 7E 7E_	ff	 6tt?S8B 8B 8Bg	fl	 6tt?(8D 8D 8Dm	fv	 0?24 24 24w	f|	 0?S2B 2B 2B}	fB
 0?(2D 2D 2DC
fJ
 1$$$*@33P 3P 3PK
fP
 1$$$*@(3D 3D 3DQ
fX
 0?S2B 2B 2BY
f^
 0?(2D 2D 2D_
fh
 /?14 14 14i
f f fn
 /?14 14 14o
ft
 /?(1D 1D 1Du
f|
 0?S2B 2B 2B}
fD 1$$?3G 3G 3GEfJ 1$$?3G 3G 3GKfP 244$*@3\a4c 4c 4cQfV 1$$?Se3U 3U 3UWf^ 0?2G 2G 2G_fd 0?2G 2G 2Gefj 1$$?Se3U 3U 3Ukfr (95?*E *E *Esfz (95?Sc*S *S *S{fB )$$95$*@3\_+a +a +aCfJ (95?Sd*T *T *TKfR )$$95?Sd+T +T +TSfZ ,TT<5?Sd.T .T .T[fd +DD=5?Sc-S -S -Sef f fl +DDB5?C	-A -A -Amfv +DD=5?Sc-S -S -Swf~ ,TT=5?Sc.S .S .SfH &tt-5?Sc(S (S (SIfP '-5W?Sc	)S )S )SQfZ &tt-5W?Sd	(T (T (T[fd &tt15?WD(B (B (Befr /45W?Sc1S 1S 1Ssf~ /45W?Sc1S 1S 1SfJ 045W?Sc2S 2S 2SKfV /45W?Sd1T 1T 1TWfd '9W?)E )E )Eefl '9W?)E )E )Emft (:W?Sc*S *S *Suf| (:LiW?C	*A *A *A}fJ *44B=[_+`+`+`KfL *44B=[_+`+`+`Mf f fN +DDRLLLOfP +DDRLLLQfR *44B<<<SfZ 05? 3	20 20 20[fd 05? 3(	2D 2D 2Defn *445? 3	,0 ,0 ,0ofx *445? 3(	,D ,D ,DyfD !$$Sae 4#1 #1 #1EfL  Ygk 4"1 "1 "1MfT  Ygk 4"1 "1 "1Uf^  Sae 4"1 "1 "1_ff Ygk 4!1 !1 !1gfn Ygk 4!1 !1 !1ofv !$$[im 4U#D #D #Dwf~  Zhl 4U"D "D "DfH !$$Sae 4#1 #1 #1IfP  Ygk 4"1 "1 "1Qf f fX  Ygk 4"1 "1 "1Yfb "44Wei 4U$D $D $Dcfj "44Wei 4U$D $D $Dkft ("ufv )$$2,,,wfx ("yf| S"(<!	!M !M !M}fF  T"(<!	"M "M "MGfP S"(<!	!M !M !MQf\ *44M"(<!	,M ,M ,M]ff +DDN"(<!	-M -M -Mgfp *44S 3"(<!,M ,M ,Mqf| ,TTN"(<!	.M .M .M}fH ((5* * *IfP (,5 	* * *QfZ -dd15 	/ / /[fd (,5 	* * *ef f fn (,5 	* * *ofx )$$,5 	+ + +yfB )$$,5 	+ + +CfL *44-5, , ,MfT /65 	1 1 1Uf^ *4415 	, , ,_fh *4415 	, , ,ift ,TT(5. . .uf| ,TT,5 	. . .}fF 1$$15 	3 3 3GfP ,TT,5 	. . .QfZ ,TT,5 	. . .[fd -dd,5 	/ / /efn -dd,5 	/ / /ofx .tt-50 0 0yf@ 1$$32	3 3 3AfJ 021	2 2 2Kf f fT 3DD65 	5 5 5Uf^ .tt15 3	0 0 0_fh .tt15 3	0 0 0ifr 1$$32 33 3 3sf~ 021 32 2 2fJ 021 32 2 2KfX 244 3(4 4 4Yf` 6tt 3(8 8 8afj 3DD5?	5E 5E 5Ekft 5dd5?	7E 7E 7Euf~ 3DD5?	5E 5E 5EfH 6tt5?	8E 8E 8EIfT ,TT 4.1 .1 .1UfZ -dd 4/1 /1 /1[f` 8 4:1 :1 :1aff 0 421 21 21gfn / 411 11 11of f ft / 411 11 11ufz 8 4:1 :1 :1{f@ / 411 11 11AfF 0 421 21 21GfN ? 4A1 A1 A1OfT 9$$ 4;1 ;1 ;1UfZ 7 491 91 91[fb 1$$ 431 31 31cfj ? 3A0 A0 A0kfp 0 421 21 21qfv ? 4A1 A1 A1wf| 9$$ 4;1 ;1 ;1}fB 0 421 21 21CfH 7 491 91 91IfP 1$$ 431 31 31QfX ? 3A0 A0 A0Yf^ .tt 0" 0" 0"_f fd 26 2" 2" 2"15 2" 2" 2" -1D"(< 3A- - - $ 41 1 1  4 41 1 1  4 41 1 1Gf f fP        FV      %$\22   Te    2  T @Q      T @Q      d AR      d AR      d AR      d AR      T @Q      T @Q      T @Q      T @Q      T @Q      D ?P      d AR      d AR      d AR      d AR      d AR      T @Q      d AR       DU      $ EV      4 FW      4 FW      4 FW      D GX       DU       DU       DU      $ EV      D GX      D GX      D GX      T HY      $ EV      $ EV      $ EV      $ EV      $ EV      $ EV      4 FW      4 FW      $ EV      $ EV      $ EV      4 FW      d IZ      D O`      D O`      T Pa      T Pa      D O`      D O`      $ EV      $ EV       DU      4 FW     	 	4 	FW 	 	 	 	  $ EV      d AR      d AR      t :K      d 9J      t :K     	 	 	CT 	 	 	 	 	 	 	DU 	 	 	 	 	 	 	CT 	 	 	 	   DU       CT       DU       DU      	 	d 	IZ 	 	 	 	 	 	T 	HY 	 	 	 	 	 	d 	IZ 	 	 	 	  d IZ       D GX      D GX      D GX      D GX      T HY      T HY      d IZ      d IZ      d IZ      d IZ       K\       K\       K\       K\       L]       L]      $ M^      $ M^      $ M^      $ M^      $ M^      $ M^      T HY      d IZ       K\       K\       K\       K\       L]       L]       L]       L]       L]      d IZ       K\       K\      4 FW       4E      $ 5F      $ 5F      H ' "E'!#G' "#G' !"E	'
 !"E'  !C' "#E' "#G' !"C'  :'  :' !">'  !<'  !<'  :'  &'H!'" ()L#'$ +O)L*N)' ' '     s   5 AA