
    Ng                        d Z ddlZddlmZmZmZmZmZ ddlZddl	m
Z
 ddlm
c mZ ddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& dd	l'm(Z( dd
l)m*Z*m+Z+ dgZ, G d de
j-                  Z. G d de
j-                  Z/ G d de
j-                  Z0 G d de
j-                  Z1	 	 d`dZ2dadZ3dbdZ4 e*i d e4d          d e4dddd !          d" e4deeddd #          d$ e4deed%dd #          d& e4dd'dd !          d( e4dd'dd !          d) e4dd'dd !          d* e4ddd+          d, e4ddd+          d- e4dd'd+          d. e4dd'd+          d/ e4dd'd+          d0 e4dd'dd d12          d3 e4dd'dd d12          d4 e4dd'dd d12          d5 e4dd6          d7 e4dd6           e4dd6           e4dd6           e4dd6           e4d8d9d:;           e4d<d9d:;           e4d=d9d>;           e4d?d9d@;           e4dAd9ddd@B           e4dCd9d:;           e4dDd9d:;           e4dE           e4ddFdGdHdHI           e4ddFdGdHdHI           e4ddFdG+           e4ddFdGdHdHI          dJ          Z5e+dadKe1fdL            Z6e+dadKe1fdM            Z7e+dadKe1fdN            Z8e+dadKe1fdO            Z9e+dadKe1fdP            Z:e+dadKe1fdQ            Z;e+dadKe1fdR            Z<e+dadKe1fdS            Z=e+dadKe1fdT            Z>e+dadKe1fdU            Z?e+dadKe1fdV            Z@e+dadKe1fdW            ZAe+dadKe1fdX            ZBe+dadKe1fdY            ZCe+dadKe1fdZ            ZDe+dadKe1fd[            ZEe+dadKe1fd\            ZFe+dadKe1fd]            ZGe+dadKe1fd^            ZHe+dadKe1fd_            ZIdS )ca   EVA

EVA from https://github.com/baaivision/EVA , paper: https://arxiv.org/abs/2211.07636

@article{EVA,
  title={EVA: Exploring the Limits of Masked Visual Representation Learning at Scale},
  author={Fang, Yuxin and Wang, Wen and Xie, Binhui and Sun, Quan and Wu, Ledell and Wang, Xinggang and Huang,
  Tiejun and Wang, Xinlong and Cao, Yue},
  journal={arXiv preprint arXiv:2211.07636},
  year={2022}
}

EVA-02: A Visual Representation for Neon Genesis - https://arxiv.org/abs/2303.11331
@article{EVA02,
  title={EVA-02: A Visual Representation for Neon Genesis},
  author={Fang, Yuxin and Sun, Quan and Wang, Xinggang and Huang, Tiejun and Wang, Xinlong and Cao, Yue},
  journal={arXiv preprint arXiv:2303.11331},
  year={2023}
}

This file contains EVA & EVA02 model implementations evolved from BEiT, additional models in vision_transformer.py.

Modifications by / Copyright 2023 Ross Wightman, original copyrights below
    N)CallableListOptionalTupleUnion)
checkpoint)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)
PatchEmbedMlpGluMlpSwiGLU	LayerNormDropPathPatchDropoutRotaryEmbeddingCatapply_rot_embed_catapply_keep_indices_nlctrunc_normal_resample_patch_embedresample_abs_pos_embed	to_2tupleuse_fused_attn   )build_model_with_cfg)feature_take_indices)generate_default_cfgsregister_modelEvac                        e Zd ZU ej        j        e         ed<   	 	 	 	 	 	 	 	 	 dded	ed
edededede	de	de
e         de
e         f fdZ	 	 dde
ej                 de
ej                 fdZ xZS )EvaAttention
fused_attn   Tr   F        Ndim	num_headsqkv_bias	qkv_fusednum_prefix_tokensqkv_bias_separate	attn_drop	proj_dropattn_head_dim
norm_layerc                 2   t                                                       || _        ||z  }|	|	}|| j        z  }|dz  | _        || _        t                      | _        || _        |rt          j	        ||dz  d          | _
        dx| _        x| _        | _        |rt          j        t          j        |                    | _        |                     dt          j        |          d           t          j        t          j        |                    | _        ndx| _        x| _        | _        npt          j	        |||          | _        t          j	        ||d          | _        t          j	        |||          | _        d| _
        dx| _        x| _        | _        t          j        |          | _        |
 |
|          nt          j                    | _        t          j	        ||          | _        t          j        |          | _        dS )z

        Args:
            dim:
            num_heads:
            qkv_bias:
            qkv_fused:
            attn_drop:
            proj_drop:
            attn_head_dim:
            norm_layer:
        Ng         F)biask_bias)
persistent)super__init__r(   scaler+   r   r$   r,   nnLinearqkvq_projk_projv_proj	Parametertorchzerosq_biasregister_bufferv_biasr4   Dropoutr-   Identitynormprojr.   )selfr'   r(   r)   r*   r+   r,   r-   r.   r/   r0   head_dimall_head_dim	__class__s                K/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/eva.pyr7   zEvaAttention.__init__2   s   2 	")#$$H$.0%
!2(**!2 	;ylQ&6UCCCDH6::DK:$+ ? l5;|+D+DEE$$Xu{</H/HUZ$[[[ l5;|+D+DEE:>>>dkDKK)CHEEEDK)CEBBBDK)CHEEEDKDH6::DK:$+I..0:0FJJ|,,,BKMM	IlC00	I..    rope	attn_maskc                 z   |j         \  }}}| j        | j        |                     |          }nit          j        | j        | j        | j        f          }| j        r|                     |          }||z  }n!t          j	        || j        j
        |          }|                    ||d| j        d                              ddddd          }|                    d          \  }	}
}n|                     |                              ||| j        d                              dd          }	|                     |                              ||| j        d                              dd          }
|                     |                              ||| j        d                              dd          }|| j        }t          j        |	d d d d d |d d f         t)          |	d d d d |d d d f         |          gd                              |          }	t          j        |
d d d d d |d d f         t)          |
d d d d |d d d f         |          gd                              |          }
| j        r-t          j        |	|
||| j        r| j        j        nd	
          }n|	| j        z  }	|	|
                    dd          z  }|Q|                    t          j                  }|                    |d d d d d d f          t?          d                    }|                     d          }|                     |          }||z  }|                    dd                              |||          }| !                    |          }| "                    |          }| #                    |          }|S )N)weightr3   r2      r   r      r'   r&   )rP   	dropout_pz-inf)$shaper;   rB   r@   catr4   rD   r,   FlinearrR   reshaper(   permuteunbindr<   	transposer=   r>   r+   r   type_asr$   scaled_dot_product_attentiontrainingr-   pr8   toboolmasked_fillfloatsoftmaxrG   rH   r.   )rI   xrO   rP   BNCr;   r)   qkvnptattns                 rM   forwardzEvaAttention.forwardk   s    '1a8{"hhqkk 9dk4;%LMM) M((1++C8OCC(1TX_8LLLC++aAt~r::BB1aAqQQCjjmmGAq!!A&&q!T^R@@JJ1aPPAA&&q!T^R@@JJ1aPPAA&&q!T^R@@JJ1aPPA(C	1QQQ4C4]+-@111aaaqqq=AQSW-X-XY_`aaaiijkllA	1QQQ4C4]+-@111aaaqqq=AQSW-X-XY_`aaaiijkllA? 	.1a#.2mC$.**  AA DJAB+++D$%LL44	''111dD!!!3C)D(DeFmmTT<<B<''D>>$''DqAKK1%%aA..IIaLLIIaLLNN1rN   )	r%   TTr   Fr&   r&   NNNN)__name__
__module____qualname__r@   jitFinalrf   __annotations__intrh   r   r   r7   Tensorrs   __classcell__rL   s   @rM   r#   r#   /   s%        	%%%%
 !"%&&+!!+/-17/ 7/7/ 7/ 	7/
 7/  #7/  $7/ 7/ 7/ $C=7/ !*7/ 7/ 7/ 7/ 7/ 7/x ,004	4 4 5<(4  -	4 4 4 4 4 4 4 4rN   r#   c            "            e Zd Zdddddddddddej        edfdeded	ed
ededededededededede	e         de
de
de	e         f  fdZdde	ej                 de	ej                 fdZ xZS )EvaBlockT      @Fr   r&   Nr'   r(   r)   r*   	mlp_ratio
swiglu_mlp	scale_mlpscale_attn_innerr+   r.   r-   	drop_pathinit_values	act_layerr0   r/   c                 >   t                                                        ||          | _        t          |||||	||
||r|nd	  	        | _        |)t          j        |t          j        |          z            nd| _	        |dk    rt          |          nt          j                    | _         ||          | _        t          ||z            }|rK|rt          |||r|nd|
          | _        nIt#          ||dz  |r|ndt
          j        d|
          | _        nt'          ||||r|nd|
          | _        |)t          j        |t          j        |          z            nd| _        |dk    rt          |          nt          j                    | _        dS 	ay  

        Args:
            dim:
            num_heads:
            qkv_bias:
            qkv_fused:
            mlp_ratio:
            swiglu_mlp:
            scale_mlp:
            scale_attn_inner:
            proj_drop:
            attn_drop:
            drop_path:
            init_values:
            act_layer:
            norm_layer:
            attn_head_dim:
        N)r(   r)   r*   r+   r-   r.   r/   r0   r&   )in_featureshidden_featuresr0   droprT   F)r   r   r0   r   	gate_lastr   )r   r   r   r0   r   )r6   r7   norm1r#   rr   r9   r?   r@   onesgamma_1r   rF   
drop_path1norm2r{   r   mlpr   SiLUr   gamma_2
drop_path2rI   r'   r(   r)   r*   r   r   r   r   r+   r.   r-   r   r   r   r0   r/   r   rL   s                     rM   r7   zEvaBlock.__init__   s   L 	Z__
 /'%5?zz4

 

 

	 GRF]r|K%*S//$ABBBcg1:R(9---R[]]Z__
cIo.. 	 ! #$3-6@zzD"	   " #$3a$7-6@zzD g#"    /#)2<::  DH GRF]r|K%*S//$ABBBcg1:R(9---R[]]rN   rO   rP   c           	      2   | j         ||                     |                     |                     |          ||                    z   }||                     |                     |                     |                              z   }n||                     | j         |                     |                     |          ||          z            z   }||                     | j        |                     |                     |                    z            z   }|S N)rO   rP   )r   r   rr   r   r   r   r   r   rI   rj   rO   rP   s       rM   rs   zEvaBlock.forward   s    <DOODIIdjjmm$R[I$\$\]]]ADOODHHTZZ]]$;$;<<<AADOODL499TZZ]]QUaj93k3k$klllADOODL488DJJqMM3J3J$JKKKArN   rt   )ru   rv   rw   r9   GELUr   r{   rf   rh   r   r   r7   r@   r|   rs   r}   r~   s   @rM   r   r      s|        ""!$#%*%&!!!+/"$'#,+/#TS TSTS TS 	TS
 TS TS TS TS #TS  #TS TS TS TS "%TS  TS  !!TS" $C=#TS TS TS TS TS TSl x5 RWR^I_        rN   r   c            "            e Zd ZdZdddddddddddej        ej        dfded	ed
edede	dedededede	de	de	de
e	         dedede
e         f  fdZdde
ej                 de
ej                 fdZ xZS )EvaBlockPostNormzF EVA block w/ post-norm and support for swiglu, MLP norm scale, ROPE. Tr   Fr   r&   Nr'   r(   r)   r*   r   r   r   r   r+   r.   r-   r   r   r   r0   r/   c                 v   t                                                       t          |||||	||
||r|nd	  	        | _         ||          | _        |dk    rt          |          nt          j                    | _        t          ||z            }|rK|rt          |||r|nd|
          | _        nIt          ||dz  |r|ndt          j        d|
          | _        nt          ||||r|nd|
          | _         ||          | _        |dk    rt          |          nt          j                    | _        dS r   )r6   r7   r#   rr   r   r   r9   rF   r   r{   r   r   r   r   r   r   r   r   s                     rM   r7   zEvaBlockPostNorm.__init__  sz   L 	 /'%5?zz4

 

 

	  Z__
1:R(9---R[]]cIo.. 	 ! #$3-6@zzD"	   " #$3a$7-6@zzD g#"    /#)2<::  DH  Z__
1:R(9---R[]]rN   rO   rP   c           
         ||                      |                     |                     |||                              z   }||                     |                     |                     |                              z   }|S r   )r   r   rr   r   r   r   r   s       rM   rs   zEvaBlockPostNorm.forwardZ  sg    

499QTY9+W+W X XYYY

488A;; 7 7888rN   rt   )ru   rv   rw   __doc__r9   r   r   r{   rf   rh   r   r   r7   r@   r|   rs   r}   r~   s   @rM   r   r     s       PP
 ""!$#%*%&!!!+/"$'#%<+/#RS RSRS RS 	RS
 RS RS RS RS #RS  #RS RS RS RS "%RS  RS  !!RS" $C=#RS RS RS RS RS RSh x5 RWR^I_        rN   r   c            A           e Zd ZdZddddddddd	d	d
dddddddddedd	dd	ddddddfdeeeeef         f         deeeeef         f         dedededededede	de	de
de	de	de	de
de
d e
d!e
d"e
d#e
d$ed%ee
         d&e	d'ed(e	d)e	d*e	d+e	d,e	d-eeeeef         ef                  d.e
f> fd/Zd0 Zd1 Zej        j        d2             Zej        j        dJd3            Zej        j        dKd4            Zej        j        d5ej        fd6            ZdLdedee         fd7Zd5eej        eej                 f         fd8Z	 	 	 	 	 	 dMd:ej        d;eeeee         f                  d<e	d=e	d>e	d?ed@e	d5eeej                 eej        eej                 f         f         fdAZ	 	 	 dNd;eeee         f         dCe	dDe	fdEZdF ZdKdGe	fdHZ dI Z! xZ"S )Or!   a!   Eva Vision Transformer w/ Abs & Rotary Pos Embed

    This class implements the EVA and EVA02 models that were based on the BEiT ViT variant
      * EVA - abs pos embed, global avg pool
      * EVA02 - abs + rope pos embed, global avg pool, SwiGLU, scale Norm in MLP (ala normformer)
          r2     avg      Tr   Fr&   Nr   gMbP?img_size
patch_sizein_chansnum_classesglobal_pool	embed_dimdepthr(   r)   r*   r   r   r   r   	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rater0   r   class_tokennum_reg_tokensuse_abs_pos_embuse_rot_pos_embuse_post_normdynamic_img_sizedynamic_img_padref_feat_shapehead_init_scalec                     	
#$% t                                                       | _        | _        x _        x _         _        |rdnd|z    _        | _        d _	        i } |r$| 
                    t          dd                     t          d||||d|  _         j        j        }!t           j        d          r j                                        n|%|r(t#          j        t'          j        dd                    nd _        |r(t#          j        t'          j        d|                    nd _        |o j        du  _        |r0t#          j        t'          j        d|! j        z                       nd _        t#          j        |	           _        |dk    rt7          | j        d
           _        nd _        |r=|t;          |          nd}t=          z  d|rdn j        j        |           _         nd _         d t'          j!        d||          D             $|rtD          ntF          #t#          j$        #$	
 fdtK          |          D                        _&        %fdtK          |          D              _'         j        dk    }"|"rt#          j(                    n
            _)        |"r           nt#          j(                     _*        t#          j        |           _+        |dk    rt#          j,        |          nt#          j(                     _-         .                     j/                    j        ta           j        d            j        ta           j        d            j        ta           j        d            1                                 te           j-        t"          j,                  reta           j-        j3        d            j-        j3        j4        5                    |            j-        j6        j4        5                    |           dS dS )a  

        Args:
            img_size:
            patch_size:
            in_chans:
            num_classes:
            global_pool:
            embed_dim:
            depth:
            num_heads:
            qkv_bias:
            qkv_fused:
            mlp_ratio:
            swiglu_mlp:
            scale_mlp:
            scale_attn_inner:
            drop_rate:
            pos_drop_rate:
            proj_drop_rate:
            attn_drop_rate:
            drop_path_rate:
            norm_layer:
            init_values:
            class_token:
            use_abs_pos_emb:
            use_rot_pos_emb:
            use_post_norm:
            ref_feat_shape:
            head_init_scale:
        r   r   FNHWC)strict_img_size
output_fmt)r   r   r   r   r   
feat_ratioN)rd   T)r+   return_indices)	in_pixels
feat_shaper   c                 6    g | ]}|                                 S  )item).0rj   s     rM   
<listcomp>z Eva.__init__.<locals>.<listcomp>  s     JJJAqvvxxJJJrN   c                 X    g | ]&} 	j         
|                    'S ))r'   r(   r)   r*   r   r   r   r   r+   r.   r-   r   r0   r   r+   )r   ir   block_fndprr   r   r   r0   r(   r   r)   r*   r   r   rI   r   s     rM   r   z Eva.__init__.<locals>.<listcomp>  sm     %# %# %#" ! H#!##%#!1"&"8((a&%'  %# %# %#rN   c                 :    g | ]}t          d |           S )zblocks.)modulenum_chs	reductiondict)r   r   r   rs     rM   r   z Eva.__init__.<locals>.<listcomp>  sA     ^ ^ ^KLD!yAFFF^ ^ ^rN   r   {Gz?stdr   )7r6   r7   r   r   num_featureshead_hidden_sizer   r+   r   grad_checkpointingupdater   r   patch_embednum_patcheshasattrr   r9   r?   r@   rA   	cls_token	reg_token	cls_embed	pos_embedrE   pos_dropr   
patch_dropr   r   	grid_sizerO   linspacer   r   
ModuleListrangeblocksfeature_inforF   rG   fc_norm	head_dropr:   headapply_init_weightsr   fix_init_weight
isinstancerR   datamul_r3   )'rI   r   r   r   r   r   r   r   r(   r)   r*   r   r   r   r   r   r   r   r   r   r   r0   r   r   r   r   r   r   r   r   r   r   
embed_argsr   use_fc_normr   r   r   rL   s'   `     ` ```````   `` ``            @@@rM   r7   zEva.__init__h  s   B 	&&ENNND1DN'2"9!!^!K 0"'
 	Nd5VLLLMMM% 
!+
 
 
 
 &2-4T5E|-T-TdD'')))ZdGR\ek!Q	&B&BCCCX\Tblek!^Y&O&OPPPhl$?4)? Q`jK;)??KKM M Mei 	
]333Q*"&"8#  DOO #DO 		:H:TY~666Z^N*Y&#3S449I9S-	  DII DIJJ>5!I!IJJJ'4B##(m %# %# %# %# %# %# %# %# %# %# %# %# %# %# %# %# %# %#" 5\\#%# %# %# $ $$^ ^ ^ ^ ^PUV[P\P\^ ^ ^ &%/%0KBKMMMjj6K6K	0;Nzz),,,I..9DqBIi555bkmm	

4%&&&>%$.c2222>%$.c2222>%$.c2222di++ 	6$)*4444I!&&777IN$$_55555	6 	6rN   c                     d }t          | j                  D ]K\  }} ||j        j        j        j        |dz               ||j        j        j        j        |dz              Ld S )Nc                 Z    |                      t          j        d|z                       d S )Ng       @)div_mathsqrt)paramlayer_ids     rM   rescalez$Eva.fix_init_weight.<locals>.rescale  s(    JJtyx0011111rN   r   )	enumerater   rr   rH   rR   r   r   fc2)rI   r   r   layers       rM   r   zEva.fix_init_weight  s}    	2 	2 	2  )55 	= 	=OHeGEJO*/A>>>GEIM(-x!|<<<<	= 	=rN   c                     t          |t          j                  rCt          |j        d           |j        (t          j                            |j                   d S d S d S )Nr   r   )r   r9   r:   r   rR   r3   initzeros_)rI   ms     rM   r   zEva._init_weights  s_    a## 	'!(,,,,v!qv&&&&&	' 	'!!rN   c                     ddh}|S )Nr   r   r   )rI   nwds     rM   no_weight_decayzEva.no_weight_decay  s    K(
rN   c                     || _         d S N)r   )rI   enables     rM   set_grad_checkpointingzEva.set_grad_checkpointing  s    "(rN   c                 ,    t          dddg          }|S )Nz ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   r   )rI   coarsematchers      rM   group_matcherzEva.group_matcher  s)    4-/CD
 
 
 rN   returnc                     | j         S r  )r   )rI   s    rM   get_classifierzEva.get_classifier%  s
    yrN   c                     || _         ||| _        |dk    rt          j        | j        |          nt          j                    | _        d S Nr   )r   r   r9   r:   r   rF   r   )rI   r   r   s      rM   reset_classifierzEva.reset_classifier)  sH    &"*D>IAooBIdnk:::SUS^S`S`			rN   c                    | j         rr|j        \  }}}}| j        t          | j        ||f| j                  }nd }|                    |d|          }| j        | j                            ||f          nd }n)| j        }| j        | j                                        nd }| j        =t          j
        | j                            |j        d         dd          |fd          }|||z   }| j        g }| j        :|                    | j                            |j        d         dd                     |                    | j                            |j        d         dd                     t          j
        ||gz   d          }|                     |          }| j        -|                     |          \  }}	||	t!          |||	          }||fS )Nr   rS   )rY   r   r   rV   )r   rY   r   r   r+   viewrO   	get_embedr   r@   rZ   expandr   appendr   r   r   )
rI   rj   rk   HWrm   r   rot_pos_embedto_catkeep_indicess
             rM   
_pos_embedzEva._pos_embed/  s     	UJAq!Q~)2NF&*&<  		 !	q"a  AAEAVDI//q!f/===\`MMI59Y5JDI//111PTM>%	4>00RDDaHaPPPA IA>%F~)dn33AGAJBGGHHHMM$.//
BCCDDD	&A3,A...AMM! ?&"ooa00OA|(\-E 6q- V V-rN   NCHWrj   indicesreturn_prefix_tokensrG   
stop_earlyr   intermediates_onlyc                 f    |dv s
J d            |dk    }g }	t          t           j                  |          \  }
}|j        \  }}}                     |          }                     |          \  }}t          j                                        s|s j        }n j        d|dz            }t          |          D ]B\  }} |||          }||
v r,|	
                    |r                     |          n|           C j        r fd|	D             } fd|	D             }	|r/ j                            ||f          \  fd	|	D             }	t          j                                        s|rt          t          |	|                    }	|r|	S                      |          }||	fS )
a)   Forward features that returns intermediates.
        Args:
            x: Input image tensor
            indices: Take last n blocks if an int, if is a sequence, select by matching indices
            return_prefix_tokens: Return both prefix and spatial intermediate tokens
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        )r%  NLCz>Output format for EVA-ViT features must be one of NCHW or NLC.r%  Nr   rO   c                 6    g | ]}|d d dj         f         S r  r   r   yrI   s     rM   r   z-Eva.forward_intermediates.<locals>.<listcomp>  s/    SSSQqqq!D$:"::;SSSrN   c                 6    g | ]}|d d j         d f         S r  r   r.  s     rM   r   z-Eva.forward_intermediates.<locals>.<listcomp>  s0    RRRqQqqq$"8"9"99:RRRrN   c                     g | ]B}|                     d                               dddd                                          CS )rS   r   r2   r   rT   )r]   r^   
contiguous)r   r/  rk   r  r   s     rM   r   z-Eva.forward_intermediates.<locals>.<listcomp>  sL    lllYZQYYq!Q33;;Aq!QGGRRTTlllrN   )r   lenr   rY   r   r$  r@   rx   is_scriptingr  r  rG   r+   dynamic_feat_sizelistzip)rI   rj   r&  r'  rG   r(  r   r)  r]   intermediatestake_indices	max_index_heightwidthr!  r   r   blkprefix_tokensrk   r  r   s   `                   @@@rM   forward_intermediateszEva.forward_intermediatesV  s   ( _,,,.n,,,&"6s4;7G7G"Q"Qi  g1feQ??1--=9!!## 	1: 	1[FF[)a-0F'' 	B 	BFAsAM***AL  $$T%@TYYq\\\qAAA ! 	SSSSS]SSSMRRRRMRRRM 	m#55vuoFFDAqllllll^klllMy%%'' 	D,@ 	D ]M!B!BCCM 	!  IIaLL-rN   r   
prune_norm
prune_headc                    t          t          | j                  |          \  }}| j        d|dz            | _        |rt          j                    | _        |r.t          j                    | _        |                     dd           |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r3  r   r9   rF   rG   r   r  )rI   r&  rA  rB  r9  r:  s         rM   prune_intermediate_layerszEva.prune_intermediate_layers  s~     #7s4;7G7G"Q"Qik.9q=.1 	&DI 	);==DL!!!R(((rN   c                 (   |                      |          }|                     |          \  }}| j        D ]G}| j        r1t          j                                        st          |||          }: |||          }H|                     |          }|S )Nr,  )	r   r$  r   r   r@   rx   r4  r   rG   )rI   rj   r!  r>  s       rM   forward_featureszEva.forward_features  s    Q??1--=; 	/ 	/C& /uy/E/E/G/G /sAM:::C...IIaLLrN   
pre_logitsc                    | j         r>| j         dk    r'|d d | j        d f                             d          n|d d df         }|                     |          }|                     |          }|r|n|                     |          S )Nr   r   rV   r   )r   r+   meanr   r   r   )rI   rj   rH  s      rM   forward_headzEva.forward_head  s     	d=A=MQV=V=V!!!T+,,,-22q2999\]^_^_^_ab^b\cALLOONN10qqDIIaLL0rN   c                 Z    |                      |          }|                     |          }|S r  )rG  rK  )rI   rj   s     rM   rs   zEva.forward  s-    !!!$$a  rN   )TFr  )NFFFr%  F)r   FT)#ru   rv   rw   r   r   r   r{   r   strrf   rh   r   r   r7   r   r   r@   rx   ignorer
  r  r  r9   Moduler  r  r|   r$  r   r@  rE  rG  rK  rs   r}   r~   s   @rM   r!   r!   `  s         5868#$ !"!$#%*!#%%'$&$&$&#,+/ $"#$($)"'%*$)DH%*A\6 \6CsCx01\6 c5c?23\6 	\6
 \6 \6 \6 \6 \6 \6 \6 \6 \6 \6 #\6  !\6" !#\6$ #%\6& "'\6( ")\6* "+\6, !-\6. "%/\60 1\62  3\64 "5\66 "7\68  9\6: #;\6< "=\6> %U5c?C+?%@A?\6@ #A\6 \6 \6 \6 \6 \6|= = =' ' ' Y   Y) ) ) ) Y    Y	    a aC ahsm a a a a% uU\8EL3I%IJ %  %  %  % T 8<).$$',8  8 |8  eCcN348  #'	8 
 8  8  8  !%8  
tEL!5tEL7I)I#JJ	K8  8  8  8 x ./$#	 3S	>*  	   "	 	 	1 1$ 1 1 1 1      rN   bicubicTc           	         i }|                      d|           } |                      d|           } |                      d|           } |                      d|           } d| v rd}n	d| v rd}nd	}|d
z   | v }|dz   | v }t          |          }|                                 D ]\  }	}
|r!|	                    |          r|	|d         }	n)d|	v r.d|	v rT|j        j        j        j        \  }}}}|
j        d         |k    s|
j        d         |k    rt          |
||f||d          }
nj|	dk    rd|
j        d         |j	        j        d         k    rCt          |dd          rdnt          |dd          }t          |
|j        j        |||d          }
|	                    dd          }	|	                    dd          }	|	                    dd          }	|	                    d d!          }	|	                    d"d#          }	|	                    d$d%          }	|r,|	                    d&d'          }	|	                    d(d)          }	|r)|	d*v r%|	d+k    s|	d,k    r|	                    d-d.          }	n|
||	<   |S )/zJ convert patch embedding weight from manual patchify + linear proj to conv	model_emamodelr   
state_dictzvisual.trunk.pos_embedzvisual.trunk.zvisual.pos_embedzvisual.rD  
mask_tokenzblocks.0.attn.q_proj.weightNrO   zpatch_embed.proj.weightrS   rX   T)interpolation	antialiasverboser   r   no_embed_classFr   r+   )new_sizer+   rW  rX  rY  z
mlp.ffn_lnzmlp.normzattn.inner_attn_lnz	attn.normzmlp.w12zmlp.fc1zmlp.w1z	mlp.fc1_gzmlp.w2z	mlp.fc1_xzmlp.w3zmlp.fc2rB   zq_proj.biasrD   zv_proj.bias)rV  zlm_head.weightzlm_head.biasnorm.weight	norm.biasr\  r]  rG   r   )getr3  items
startswithr   rH   rR   rY   r   r   getattrr   r   replace)rU  rT  rW  rX  out_dictprefixmim_weightsno_qkv
len_prefixro   rp   r;  r  r   r+   s                  rM   checkpoint_filter_fnrh    s    HZ88J44J*55Jj99J:-- 	z	)	)<':5K33zAFVJ  "" 3 31 	||F## jkkNQ;;$))*/6<JAq!Qwr{a172;!#3#3(F"/'    +!'!*0Ea0H"H"H%,U4De%L%L xRYZ_atvwRxRx&*4"3+#  A IIlJ//II*K88IIi++IIh,,IIh,,IIh	** 	3		(M22A		(M22A 	1 lllM!!Q+%5%5IIfi00 OrN   Fc                     |                     dd          }t          t          | |ft          t	          |d          d|}|S )Nout_indicesr2   getter)rj  feature_cls)pretrained_filter_fnfeature_cfg)popr   r!   rh  r   )variant
pretrainedkwargsrj  rT  s        rM   _create_evars    sY    **]A..K Wj1[hGGG  	 E LrN   rD  c                 8    | ddd dddt           t          dddd	|S )
Nr   )r2   r   r   g?rQ  Tzpatch_embed.projr   mit)urlr   
input_size	pool_sizecrop_pctrW  fixed_input_sizerJ  r   
first_conv
classifierlicense)r   r   )rv  rr  s     rM   _cfgr~    s9    =t (  # rN   z"eva_giant_patch14_224.clip_ft_in1kztimm/)	hf_hub_idz"eva_giant_patch14_336.clip_ft_in1k)r2   P  r  g      ?squash)r  rw  ry  	crop_modez(eva_giant_patch14_336.m30m_ft_in22k_in1k)r  rJ  r   rw  ry  r  z(eva_giant_patch14_560.m30m_ft_in22k_in1k)r2   0  r  z.eva02_base_patch14_448.mim_in22k_ft_in22k_in1k)r2     r  z/eva02_large_patch14_448.mim_in22k_ft_in22k_in1kz.eva02_large_patch14_448.mim_m38m_ft_in22k_in1kz(eva02_tiny_patch14_336.mim_in22k_ft_in1k)r  rw  ry  z)eva02_small_patch14_336.mim_in22k_ft_in1kz(eva02_base_patch14_448.mim_in22k_ft_in1kz)eva02_large_patch14_448.mim_in22k_ft_in1kz(eva02_large_patch14_448.mim_m38m_ft_in1kz)eva02_base_patch14_448.mim_in22k_ft_in22kiQU  )r  rw  ry  r  r   z*eva02_large_patch14_448.mim_in22k_ft_in22kz)eva02_large_patch14_448.mim_m38m_ft_in22kz eva02_tiny_patch14_224.mim_in22k)r  r   z!eva02_small_patch14_224.mim_in22kz3timm/eva_giant_patch14_clip_224.laion400m_s11b_b41kzopen_clip_pytorch_model.bin   )r  hf_hub_filenamer   z8timm/eva_giant_patch14_plus_clip_224.merged2b_s11b_b114kz3timm/eva02_base_patch16_clip_224.merged2b_s8b_b131k   z4timm/eva02_large_patch14_clip_224.merged2b_s4b_b131kr   z3timm/eva02_large_patch14_clip_336.merged2b_s6b_b61k)r  r  rw  ry  r   z6timm/eva02_enormous_patch14_clip_224.laion2b_s4b_b115kz;timm/eva02_enormous_patch14_plus_clip_224.laion2b_s9b_b144k)r   )r2      r  gffffff?)      ?r  r  )r  rw  ry  rJ  r   )z eva02_base_patch14_224.mim_in22kz!eva02_large_patch14_224.mim_in22kz eva02_large_patch14_224.mim_m38mz$eva_giant_patch14_clip_224.laion400mz#eva_giant_patch14_clip_224.merged2bz$eva02_base_patch16_clip_224.merged2bz%eva02_large_patch14_clip_224.merged2bz%eva02_large_patch14_clip_336.merged2bz'eva02_enormous_patch14_clip_224.laion2bz,eva02_enormous_patch14_clip_224.laion2b_plusz(eva02_enormous_patch14_clip_224.pretrainz-vit_medium_patch16_rope_reg1_gap_256.sbb_in1kz.vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1kz.vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1kz+vit_base_patch16_rope_reg1_gap_256.sbb_in1kr  c           	      `    t          ddddd          }t          d	d| it          |fi |}|S )
. EVA-g model https://arxiv.org/abs/2211.07636      (   r   tE]t@r   r   r   r(   r   eva_giant_patch14_224rq  )r  r   rs  rq  rr  
model_argsrT  s       rM   r  r    K     t2WbcccJeeJe$zJdJd]cJdJdeeELrN   c           	      `    t          ddddd          }t          d	d| it          |fi |}|S )
r  r  r  r  r   r  r  eva_giant_patch14_336rq  )r  r  r  s       rM   r  r    r  rN   c           	      `    t          ddddd          }t          d	d| it          |fi |}|S )
r  r  r  r  r   r  r  eva_giant_patch14_560rq  )r  r  r  s       rM   r  r    r  rN   c                 h    t          ddddddddd		  	        }t          dd| it          |fi |}|S )Nr   r     r   r2   UUUUUU@Tr   r   	r   r   r   r   r(   r   r   r   r   eva02_tiny_patch14_224rq  )r  r  r  s       rM   r  r    b    
 
 
J ffZf4PZKeKe^dKeKeffELrN   c                 h    t          ddddddddd		  	        }t          dd| it          |fi |}|S )Nr   r    r      r  Tr  r  eva02_small_patch14_224rq  )r  r  r  s       rM   r  r    b    
 
 
J ggjgDQ[LfLf_eLfLfggELrN   c                 l    t          ddddddddddd	          }t          dd| it          |fi |}|S )Nr   r  r   r   Fr  Tr  r   r   r   r   r(   r*   r   r   r   r   r   eva02_base_patch14_224rq  )r  r  r  s       rM   r  r    h      J ffZf4PZKeKe^dKeKeffELrN   c                 l    t          ddddddddddd	
          }t          dd| it          |fi |}|S )Nr   r  r     r   r  FTr  r   r   r   r   r(   r   r*   r   r   r   r   eva02_large_patch14_224rq  )r  r  r  s       rM   r  r  "  h      J ggjgDQ[LfLf_eLfLfggELrN   c                 h    t          ddddddddd		  	        }t          dd| it          |fi |}|S )Nr  r  r  r   r2   r  Tr  r  eva02_tiny_patch14_336rq  )r  r  r  s       rM   r  r  5  r  rN   c                 h    t          ddddddddd		  	        }t          dd| it          |fi |}|S )Nr  r  r  r   r  r  Tr  r  eva02_small_patch14_336rq  )r  r  r  s       rM   r  r  F  r  rN   c                 l    t          ddddddddddd	          }t          dd| it          |fi |}|S )Nr  r  r   r   Fr  Tr  r  eva02_base_patch14_448rq  )r  r  r  s       rM   r  r  W  r  rN   c                 l    t          ddddddddddd	
          }t          dd| it          |fi |}|S )Nr  r  r  r  r   r  FTr  r  eva02_large_patch14_448rq  )r  r  r  s       rM   r  r  j  r  rN   c                     t          ddddd|                    dd                    }t          dd
| it          |fi |}|S )zB EVA-g CLIP model (only difference from non-CLIP is the pooling)  r  r  r  r   r  r   token)r   r   r   r(   r   r   eva_giant_patch14_clip_224rq  )r  r   ro  rs  r  s       rM   r  r  }  se     R2JJ}g668 8 8J jjjtT^OiOibhOiOijjELrN   c                     t          dddddddddddd|                    d	d
                    }t          dd| it          |fi |}|S )zU A EVA-CLIP specific variant that adds additional attn scale layernorm to eva02_base r   r   r   r   Fr  Tr  r   r  )r   r   r   r   r(   r*   r   r   r   r   r   r   r   eva02_base_patch16_clip_224rq  )r  r  r  s       rM   r  r    s     JJ}g66  J kk*kPTU_PjPjciPjPjkkELrN   c                     t          dddddddddddd	|                    d
d                    }t          dd| it          |fi |}|S )V A EVA-CLIP specific variant that adds additional attn scale layernorm to eva02_large r   r  r  r  r   r  FTr  r   r  r   r   r   r   r(   r   r*   r   r   r   r   r   r   eva02_large_patch14_clip_224rq  )r  r  r  s       rM   r  r         JJ}g66  J ll:lQUV`QkQkdjQkQkllELrN   c                     t          dddddddddddd	|                    d
d                    }t          dd| it          |fi |}|S )r  r  r  r  r  r   r  FTr  r   r  r  eva02_large_patch14_clip_336rq  )r  r  r  s       rM   r  r    r  rN   c                     t          ddddddd|                    dd	          
          }t          dd| it          |fi |}|S )zD A EVA-CLIP specific variant that uses residual post-norm in blocks r   r  i   @   r   gI$I$!@Tr   r  )r   r   r   r   r(   r   r   r   eva02_enormous_patch14_clip_224rq  )r  r  r  s       rM   r  r    sp     JJ}g66	 	 	J oojoTXYcTnTngmTnTnooELrN   c                 p    t          dddddddddd	ddd
          }t          dd| it          |fi |}|S )Nr  r   r  r   r%   Th㈵>Fr   r  r   r   r   r   r(   r*   r)   r   r   r   r   r   r   $vit_medium_patch16_rope_reg1_gap_256rq  )r  r  r  s       rM   r  r    so      J tt:tY]^hYsYslrYsYsttELrN   c                 p    t          dddddddddd	ddd
          }t          dd| it          |fi |}|S )Nr  r   r     r%   TFr  r   r  r  %vit_mediumd_patch16_rope_reg1_gap_256rq  )r  r  r  s       rM   r  r    so      J uuJuZ^_iZtZtmsZtZtuuELrN   c                 p    t          dddddddddd	ddd
          }t          dd| it          |fi |}|S )Nr  r   i  r   
   Tr  FrU   r  r  %vit_betwixt_patch16_rope_reg4_gap_256rq  )r  r  r  s       rM   r  r    so      J uuJuZ^_iZtZtmsZtZtuuELrN   c                 p    t          ddddddddddddd	
          }t          dd| it          |fi |}|S )Nr  r   r   r   Tr  Fr   r  r  "vit_base_patch16_rope_reg1_gap_256rq  )r  r  r  s       rM   r  r    so      J rrrW[\fWqWqjpWqWqrrELrN   )rQ  TrM  )rD  )Jr   r   typingr   r   r   r   r   r@   torch.nnr9   torch.nn.functional
functionalr[   torch.utils.checkpointr   	timm.datar	   r
   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _builderr   	_featuresr   	_registryr   r    __all__rP  r#   r   r   r!   rh  rs  r~  default_cfgsr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   rN   rM   <module>r     s
   4  9 9 9 9 9 9 9 9 9 9 9 9 9 9                 - - - - - - d d d d d d d d d d d d                                  + * * * * * + + + + + + < < < < < < < <'p p p p p29 p p pf_ _ _ _ _ry _ _ _DY Y Y Y Yry Y Y YxV V V V V") V V Vx
  	L L L L^       %$ t& )$$+ + +t& )$$ 3(+D +D +Dt& /"(< 3(	1D 1D 1Dt&$ /"(< 3(	1D 1D 1D%t&2 5dd 3(7 7 73t&< 6tt 3(8 8 8=t&F 5dd 3(7 7 7Gt&T / 31 1 1Ut&^ 0 32 2 2_t&h / 31 1 1it&r 0 32 2 2st&| / 31 1 1}t&J 0 3(PU2 2 2Kt&T 1$$ 3(PU3 3 3Ut&^ 0 3(PU2 2 2_t&l ') ) )mt&v (* * *wt&@ )-) ) )
 *.* * *
 )-) ) ) -1DG5	- - - ,04L5	, , , -1DG5	- - - .2TH5	. . . .2TG5 3. . . 04tJ5	0 0 0 59DO5	5 5 5 151 1 1
 6:T 4/6 6 6
 7;d 4/7 7 7
 7;d 47 7 7 484 4/4 4 4_t& t& t& t tn                    #       3       #    $  3    $  #       3       #    $  3    $  c      s    *      *      *  3           (      (      (  c      rN   