
    Ngio                        d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 	 ddlmZ n# e$ r	 ddlmZ Y nw xY wddlZddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZmZmZ d
dl m!Z! d
dl"m#Z# d
dl$m%Z% d
dl&m'Z'm(Z( d
dl)m*Z* dgZ+ ej,        e-          Z. G d dej/                  Z0 G d dej/                  Z1 G d dej/                  Z2 G d dej/                  Z3 G d dej/                  Z4d<dZ5d=dZ6 e' e6ddd !           e6dd"#           e6d$d%           e6d&d%           e6d'd%           e6d(d%           e6d)d%           e6d*d%           e6             e6d+d%           e6             e6d,d%           e6            d-          Z7e(d<d.e4fd/            Z8e(d<d.e4fd0            Z9e(d<d.e4fd1            Z:e(d<d.e4fd2            Z;e(d<d.e4fd3            Z<e(d<d.e4fd4            Z=e(d<d.e4fd5            Z>e(d<d.e4fd6            Z?e(d<d.e4fd7            Z@e(d<d.e4fd8            ZAe(d<d.e4fd9            ZBe(d<d.e4fd:            ZCe(d<d.e4fd;            ZDdS )>z Relative Position Vision Transformer (ViT) in PyTorch

NOTE: these models are experimental / WIP, expect changes

Hacked together by / Copyright 2022, Ross Wightman
    N)partial)ListOptionalTupleTypeUnion)Literal)Final)
checkpointIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STD)
PatchEmbedMlpDropPath	RelPosMlp
RelPosBiasuse_fused_attn	LayerType   )build_model_with_cfg)feature_take_indices)named_apply)generate_default_cfgsregister_model)get_init_weights_vitVisionTransformerRelPosc                   t     e Zd ZU ee         ed<   ddddddej        f fd	Zd	de	e
j                 fdZ xZS )
RelPosAttention
fused_attn   FN        c	                 |   t                                                       ||z  dk    s
J d            || _        ||z  | _        | j        dz  | _        t                      | _        t          j        ||dz  |          | _	        |r || j                  nt          j
                    | _        |r || j                  nt          j
                    | _        |r ||          nd | _        t          j        |          | _        t          j        ||          | _        t          j        |          | _        d S )Nr   z$dim should be divisible by num_headsg         )bias	num_heads)super__init__r'   head_dimscaler   r    nnLinearqkvIdentityq_normk_normrel_posDropout	attn_dropproj	proj_drop)
selfdimr'   qkv_biasqk_normrel_pos_clsr4   r6   
norm_layer	__class__s
            a/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/vision_transformer_relpos.pyr)   zRelPosAttention.__init__&   s     	Y!###%K###"y(]d*
(**9S#'9993:Mjj///3:Mjj///;FP{{Y7777DI..Ic3''	I..    shared_rel_posc                    |j         \  }}}|                     |                              ||d| j        | j                                      ddddd          }|                    d          \  }}}	|                     |          }|                     |          }| j	        re| j
        | j
                                        }
n||}
nd }
t          j        j                            |||	|
| j        r| j        j        nd          }ny|| j        z  }||                    dd	          z  }| j
        | 
                    ||
          }n|||z   }|                    d	          }|                     |          }||	z  }|                    dd                              |||          }|                     |          }|                     |          }|S )Nr$      r   r      r"   )	attn_mask	dropout_pr@   r8   )shaper.   reshaper'   r*   permuteunbindr0   r1   r    r2   get_biastorchr,   
functionalscaled_dot_product_attentiontrainingr4   pr+   	transposesoftmaxr5   r6   )r7   xr@   BNCr.   qkv	attn_biasattns               r>   forwardzRelPosAttention.forward@   s   '1ahhqkk!!!Q4>4=IIQQRSUVXY[\^_``**Q--1aKKNNKKNN? 	|' L1133		+*		 	#@@1a#.2mC$.** A  AA DJAq{{2r***D|'||D|HH+n,<<B<''D>>$''DqAKK1%%aA..IIaLLNN1r?   N)__name__
__module____qualname__r
   bool__annotations__r,   	LayerNormr)   r   rO   Tensorr_   __classcell__r=   s   @r>   r   r   #   s         d
 |/ / / / / /4" "%,)? " " " " " " " "r?   r   c                   &     e Zd Zd fd	Zd Z xZS )
LayerScaleh㈵>Fc                     t                                                       || _        t          j        |t          j        |          z            | _        d S r`   )r(   r)   inplacer,   	ParameterrO   onesgamma)r7   r8   init_valuesrn   r=   s       r>   r)   zLayerScale.__init__f   sB    \+
3"?@@


r?   c                 X    | j         r|                    | j                  n	|| j        z  S r`   )rn   mul_rq   r7   rV   s     r>   r_   zLayerScale.forwardk   s(    %)\Eqvvdj!!!q4:~Er?   )rl   F)ra   rb   rc   r)   r_   rh   ri   s   @r>   rk   rk   e   sY        A A A A A A
F F F F F F Fr?   rk   c            
       l     e Zd Zddddddddej        ej        f
 fd	Zddeej	                 fdZ
 xZS )	RelPosBlock      @FNr"   c           	      n   t                                                        ||          | _        t          ||||||	|          | _        |rt          ||          nt          j                    | _        |
dk    rt          |
          nt          j                    | _
         ||          | _        t          |t          ||z            ||          | _        |rt          ||          nt          j                    | _        |
dk    rt          |
          nt          j                    | _        d S )Nr9   r:   r;   r4   r6   )rr   r"   in_featureshidden_features	act_layerdrop)r(   r)   norm1r   r^   rk   r,   r/   ls1r   
drop_path1norm2r   intmlpls2
drop_path2r7   r8   r'   	mlp_ratior9   r:   r;   rr   r6   r4   	drop_pathr~   r<   r=   s                r>   r)   zRelPosBlock.__init__q   s2    	Z__
##
 
 
	 @K]:c{;;;;PRP[P]P]1:R(9---R[]]Z__
i00	
 
 
 @K]:c{;;;;PRP[P]P]1:R(9---R[]]r?   r@   c           
      N   ||                      |                     |                     |                     |          |                              z   }||                     |                     |                     |                     |                                        z   }|S NrH   )r   r   r^   r   r   r   r   r   r7   rV   r@   s      r>   r_   zRelPosBlock.forward   s|    4::a==Q_)`)` a abbb$**Q--)@)@ A ABBBr?   r`   )ra   rb   rc   r,   GELUrf   r)   r   rO   rg   r_   rh   ri   s   @r>   rw   rw   o   s         g|&S &S &S &S &S &SP %,)?        r?   rw   c            
       r     e Zd Zddddddddej        ej        f
 fd	Zd Zd	dee	j
                 fdZ xZS )
ResPostRelPosBlockrx   FNr"   c           	         t                                                       || _        t          ||||||	|          | _         ||          | _        |
dk    rt          |
          nt          j                    | _	        t          |t          ||z            ||          | _         ||          | _        |
dk    rt          |
          nt          j                    | _        |                                  d S )Nrz   r"   r{   )r(   r)   rr   r   r^   r   r   r,   r/   r   r   r   r   r   r   init_weightsr   s                r>   r)   zResPostRelPosBlock.__init__   s     	&##
 
 
	  Z__
1:R(9---R[]]i00	
 
 
  Z__
1:R(9---R[]]r?   c                     | j         `t          j                            | j        j        | j                    t          j                            | j        j        | j                    d S d S r`   )rr   r,   init	constant_r   weightr   r7   s    r>   r   zResPostRelPosBlock.init_weights   sY    'Gdj/1ABBBGdj/1ABBBBB ('r?   r@   c           	         ||                      |                     |                     ||                              z   }||                     |                     |                     |                              z   }|S r   )r   r   r^   r   r   r   r   s      r>   r_   zResPostRelPosBlock.forward   se    

499Q~9+V+V W WXXX

488A;; 7 7888r?   r`   )ra   rb   rc   r,   r   rf   r)   r   r   rO   rg   r_   rh   ri   s   @r>   r   r      s         g|' ' ' ' ' 'RC C C %,)?        r?   r   c            8       t    e Zd ZdZddddddddd	d
dddddddddddddeddefdeeeeef         f         deeeeef         f         dedede	d         dededede
dededee
         deded ed!ee         d"ed#e
d$e
d%e
d&e
d'e	d(         d)ed*eej                 d+ee         d,ee         d-eej                 f6 fd.ZdId0Zd1 Zej        j        d2             Zej        j        dJd3            Zej        j        dKd4            Zej        j        d5ej        fd6            ZdLdedee         fd7Z	 	 	 	 	 	 dMd9ej        d:eeeee         f                  d;ed<ed=ed>ed?ed5eeej                 eej        eej                 f         f         fd@Z	 	 	 dNd:eeee         f         dBedCefdDZ dE Z!dJdFefdGZ"dH Z# xZ$S )Or   ah   Vision Transformer w/ Relative Position Bias

    Differing from classic vit, this impl
      * uses relative position index (swin v1 / beit) or relative log coord + mlp (swin v2) pos embed
      * defaults to no class token (can be enabled)
      * defaults to global avg pool for head (can be changed)
      * layer-scale (residual branch gain) enabled
          r$     avg      rx   TFư>r   Nr"   skipimg_size
patch_sizein_chansnum_classesglobal_pool) r   tokenmap	embed_dimdepthr'   r   r9   r:   rr   class_tokenfc_normrel_pos_typerel_pos_dimr@   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)r   jaxmocor   fix_initembed_layerr<   r~   block_fnc                   	
  t                                                       |dv sJ |s|dk    sJ pt          t          j        d          pt          j        || _        || _        x| _        x| _	        | _
        |rdnd| _        d| _         ||||          | _        | j        j        }t          | j        d	          r| j                                        n|t#          || j        
          }|                    d          r#|r||d<   d|v rd|d<   t          t&          fi | nt          t(          fi | d| _        |r            | _        d |r-t          j        t/          j        d| j                            nd| _        d t/          j        d||          D             t          j        	
 fdt9          |          D                       | _        fdt9          |          D             | _        |s           nt          j                    | _         |r           nt          j                    | _!        t          j"        |          | _#        |dk    rt          j$        | j
        |          nt          j                    | _%        |dk    r| &                    |           |r| '                                 dS dS )aE  
        Args:
            img_size: input image size
            patch_size: patch size
            in_chans: number of input channels
            num_classes: number of classes for classification head
            global_pool: type of global pooling for final sequence (default: 'avg')
            embed_dim: embedding dimension
            depth: depth of transformer
            num_heads: number of attention heads
            mlp_ratio: ratio of mlp hidden dim to embedding dim
            qkv_bias: enable bias for qkv if True
            qk_norm: Enable normalization of query and key in attention
            init_values: layer-scale init values
            class_token: use class token (default: False)
            fc_norm: use pre classifier norm instead of pre-pool
            rel_pos_type: type of relative position
            shared_rel_pos: share relative pos across all blocks
            drop_rate: dropout rate
            proj_drop_rate: projection dropout rate
            attn_drop_rate: attention dropout rate
            drop_path_rate: stochastic depth rate
            weight_init: weight init scheme
            fix_init: apply weight initialization fix (scaling w/ layer index)
            embed_layer: patch embedding layer
            norm_layer: normalization layer
            act_layer: MLP activation layer
        r   r   r   r   r   )epsr   r   F)r   r   r   r   
feat_ratio)window_sizeprefix_tokensr   
hidden_dimswinmodeNr&   c                 6    g | ]}|                                 S  )item).0rV   s     r>   
<listcomp>z4VisionTransformerRelPos.__init__.<locals>.<listcomp>@  s     JJJAqvvxxJJJr?   c                 J    g | ]} 
|         	            S ))r8   r'   r   r9   r:   r;   rr   r6   r4   r   r<   r~   r   )r   ir~   r   r   dprr   rr   r   r<   r'   r   r:   r9   r;   s     r>   r   z4VisionTransformerRelPos.__init__.<locals>.<listcomp>A  sd     %# %# %#  H##!''((a&%#  %# %# %#r?   c                 :    g | ]}t          d |           S )zblocks.)modulenum_chs	reductiondict)r   r   r   rs     r>   r   z4VisionTransformerRelPos.__init__.<locals>.<listcomp>Q  sA     ^ ^ ^KLD!yAFFF^ ^ ^r?   r   )(r(   r)   r   r,   rf   r   r   r   num_featureshead_hidden_sizer   num_prefix_tokensgrad_checkpointingpatch_embed	grid_sizehasattrr   r   
startswithr   r   r@   ro   rO   zeros	cls_tokenlinspace
ModuleListrangeblocksfeature_infor/   normr   r3   	head_dropr-   headr   fix_init_weight)"r7   r   r   r   r   r   r   r   r'   r   r9   r:   rr   r   r   r   r   r@   r   r   r   r   r   r   r   r<   r~   r   	feat_sizerel_pos_argsr   r   r;   r=   s"         ` `````      ``    ```  @@@r>   r)   z VisionTransformerRelPos.__init__   s   t 	222224kW4444B72<T#B#B#B
(	&&ENNND1DN&1!8q"'&;!	
 
 
 $.	-4T5E|-T-TdD'')))Zd	AWXXX""5)) 	> 9-8\*%%'-V$!)<<|<<KK!*====K" 	"-+	"B"B"BDK\gqek!T5KY&W&WXXXmqJJ>5!I!IJJJm %# %# %# %# %# %# %# %# %# %# %# %# %# %# %# %# 5\\%# %# %# $ $ ^ ^ ^ ^ ^PUV[P\P\^ ^ ^18KJJy)))bkmm	 18Jzz),,,R[]]I..>IAooBIdnk:::SUS^S`S`	&  k*** 	#  """""	# 	#r?   r   c                     |dv sJ | j         &t          j                            | j         d           t	          t          |          |            d S )N)r   r   r   r   )std)r   r,   r   normal_r   r   )r7   r   s     r>   r   z$VisionTransformerRelPos.init_weights_  sU    *****>%GOODNO555(..55555r?   c                     d }t          | j                  D ]K\  }} ||j        j        j        j        |dz               ||j        j        j        j        |dz              Ld S )Nc                 Z    |                      t          j        d|z                       d S )Ng       @)div_mathsqrt)param	_layer_ids     r>   rescalez8VisionTransformerRelPos.fix_init_weight.<locals>.rescalef  s(    JJtyy1122222r?   r   )	enumerater   r^   r5   r   datar   fc2)r7   r   layer_idlayers       r>   r   z'VisionTransformerRelPos.fix_init_weighte  s}    	3 	3 	3  )55 	= 	=OHeGEJO*/A>>>GEIM(-x!|<<<<	= 	=r?   c                     dhS )Nr   r   r   s    r>   no_weight_decayz'VisionTransformerRelPos.no_weight_decaym  s
    }r?   c                 (    t          dddg          S )Nz^cls_token|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   r   )r7   coarses     r>   group_matcherz%VisionTransformerRelPos.group_matcherq  s%    *-/CD
 
 
 	
r?   c                     || _         d S r`   )r   )r7   enables     r>   set_grad_checkpointingz.VisionTransformerRelPos.set_grad_checkpointingx  s    "(r?   returnc                     | j         S r`   )r   r   s    r>   get_classifierz&VisionTransformerRelPos.get_classifier|  s
    yr?   c                     || _         ||dv sJ || _        |dk    rt          j        | j        |          nt          j                    | _        d S )Nr   r   )r   r   r,   r-   r   r/   r   )r7   r   r   s      r>   reset_classifierz(VisionTransformerRelPos.reset_classifier  sZ    &""66666*D>IAooBIdnk:::SUS^S`S`			r?   NCHWrV   indicesreturn_prefix_tokensr   
stop_early
output_fmtintermediates_onlyc                     |dv s
J d            |dk    }g }	t          t           j                  |          \  }
}|j        \  }}}                     |          } j        =t          j         j                            |j        d         dd          |fd          } j	         j	        
                                nd}t          j                                        s|s j        }n j        d|dz            }t          |          D ]B\  }} |||	          }||
v r,|	                    |r                     |          n|           C j        r fd
|	D             } fd|	D             }	|r/ j                            ||f          \  fd|	D             }	t          j                                        s|rt%          t'          |	|                    }	|r|	S                      |          }||	fS )a=   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            return_prefix_tokens: Return both prefix and spatial intermediate tokens
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  NLCz)Output format must be one of NCHW or NLC.r  Nr   rG   r   rI   rH   c                 6    g | ]}|d d dj         f         S )Nr   r   r   yr7   s     r>   r   zAVisionTransformerRelPos.forward_intermediates.<locals>.<listcomp>  s/    SSSQqqq!D$:"::;SSSr?   c                 6    g | ]}|d d j         d f         S r`   r  r  s     r>   r   zAVisionTransformerRelPos.forward_intermediates.<locals>.<listcomp>  s0    RRRqQqqq$"8"9"99:RRRr?   c                     g | ]B}|                     d                               dddd                                          CS )rG   r   r$   r   rB   )rK   rL   
contiguous)r   r  rW   HWs     r>   r   zAVisionTransformerRelPos.forward_intermediates.<locals>.<listcomp>  sL    lllYZQYYq!Q33;;Aq!QGGRRTTlllr?   )r   lenr   rJ   r   r   rO   catexpandr@   rN   jitis_scriptingr   appendr   r   dynamic_feat_sizelistzip)r7   rV   r  r  r   r  r	  r
  rK   intermediatestake_indices	max_index_heightwidthr@   r   r   blkr   rW   r  r  s   `                   @@@r>   forward_intermediatesz-VisionTransformerRelPos.forward_intermediates  sU   . _,,,.Y,,,&"6s4;7G7G"Q"Qi  g1feQ>%	4>00RDDaHaPPPA;?;N;Z,55777`d9!!## 	1: 	1[FF[)a-0F'' 	B 	BFAsAn555AL  $$T%@TYYq\\\qAAA ! 	SSSSS]SSSMRRRRMRRRM 	m#55vuoFFDAqllllll^klllMy%%'' 	D,@ 	D ]M!B!BCCM 	!  IIaLL-r?   r   
prune_norm
prune_headc                    t          t          | j                  |          \  }}| j        d|dz            | _        |rt          j                    | _        |r.t          j                    | _        |                     dd           |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r   )r   r  r   r,   r/   r   r   r  )r7   r  r'  r(  r   r!  s         r>   prune_intermediate_layersz1VisionTransformerRelPos.prune_intermediate_layers  s~     #7s4;7G7G"Q"Qik.9q=.1 	&DI 	);==DL!!!R(((r?   c                    |                      |          }| j        =t          j        | j                            |j        d         dd          |fd          }| j        | j                                        nd }| j        D ]G}| j	        r1t          j
                                        st          |||          }: |||          }H|                     |          }|S )Nr   rG   r   rI   rH   )r   r   rO   r  r  rJ   r@   rN   r   r   r  r  r   r   )r7   rV   r@   r%  s       r>   forward_featuresz(VisionTransformerRelPos.forward_features  s    Q>%	4>00RDDaHaPPPA;?;N;Z,55777`d; 	: 	:C& :uy/E/E/G/G :sAnEEEC.999IIaLLr?   
pre_logitsc                    | j         r>| j         dk    r'|d d | j        d f                             d          n|d d df         }|                     |          }|                     |          }|r|n|                     |          S )Nr   r   rI   r   )r   r   meanr   r   r   )r7   rV   r-  s      r>   forward_headz$VisionTransformerRelPos.forward_head  s     	d=A=MQV=V=V!!!T+,,,-22q2999\]^_^_^_ab^b\cALLOONN10qqDIIaLL0r?   c                 Z    |                      |          }|                     |          }|S r`   )r,  r0  ru   s     r>   r_   zVisionTransformerRelPos.forward  s-    !!!$$a  r?   r   F)Tr`   )NFFFr  F)r   FT)%ra   rb   rc   __doc__r   rw   r   r   r   r	   floatrd   r   strr   r,   Moduler   r)   r   r   rO   r  ignorer   r   r   r  r  rg   r   r&  r*  r,  r0  r_   rh   ri   s   @r>   r   r      sL         5868#>C !!!+/ %! %)-#(!$&$&$&>D"+5.2-1(39}# }#CsCx01}# c5c?23}# 	}#
 }# !!:;}# }# }# }# }# }# }# "%}# }# }#  !}#" "##}#$ !%}#& '}#( ")}#* "+}#, "-}#. !!:;/}#0 1}#2 bi3}#4 !+5}#6  	*7}#8 29o9}# }# }# }# }# }#~6 6 6 6= = = Y   Y
 
 
 
 Y) ) ) ) Y	    a aC ahsm a a a a 8<).$$',?  ? |?  eCcN34?  #'	? 
 ?  ?  ?  !%?  
tEL!5tEL7I)I#JJ	K?  ?  ?  ? F ./$#	 3S	>*  	   "  1 1$ 1 1 1 1      r?   Fc                 |    |                     dd          }t          t          | |fdt          |d          i|}|S )Nout_indicesr$   feature_cfggetter)r:  feature_cls)popr   r   r   )variant
pretrainedkwargsr:  models        r>   !_create_vision_transformer_relposrC    sV    **]A..K * [hGGG  E
 Lr?   r   c                 6    | ddd dddt           t          ddd|S )	Nr   )r$   r   r   g?bicubicTzpatch_embed.projr   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer/  r   
first_conv
classifierr   )rF  rA  s     r>   _cfgrN    s7    =t'0F(   r?   zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_replos_base_patch32_plus_rpn_256-sw-dd486f51.pthztimm/)r$      rO  )rF  	hf_hub_idrG  )r$      rQ  )rF  rG  zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_small_patch16_224-sw-ec2778b4.pth)rF  rP  zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_224-sw-11c174af.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_base_patch16_224-sw-49049aed.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_srelpos_small_patch16_224-sw-6cdb8849.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_srelpos_medium_patch16_224-sw-ad702b8c.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_cls_224-sw-cfe8e259.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_base_patch16_gapcls_224-sw-1a341d6c.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_rpn_224-sw-5d2befd8.pth)z,vit_relpos_base_patch32_plus_rpn_256.sw_in1kz*vit_relpos_base_patch16_plus_240.untrainedz$vit_relpos_small_patch16_224.sw_in1kz%vit_relpos_medium_patch16_224.sw_in1kz#vit_relpos_base_patch16_224.sw_in1kz%vit_srelpos_small_patch16_224.sw_in1kz&vit_srelpos_medium_patch16_224.sw_in1kz)vit_relpos_medium_patch16_cls_224.sw_in1kz)vit_relpos_base_patch16_cls_224.untrainedz*vit_relpos_base_patch16_clsgap_224.sw_in1kz*vit_relpos_small_patch16_rpn_224.untrainedz)vit_relpos_medium_patch16_rpn_224.sw_in1kz)vit_relpos_base_patch16_rpn_224.untrainedr   c           	      l    t          ddddt                    }t          	 dd| it          |fi |}|S )	z` ViT-Base (ViT-B/32+) w/ relative log-coord position and residual post-norm, no class token
          r      )r   r   r   r'   r   $vit_relpos_base_patch32_plus_rpn_256r@  )rV  r   r   rC  r@  rA  
model_argsrB  s       r>   rV  rV  1  s_     s"UghhhJ-.e e;EeIMjIcIc\bIcIce eELr?   c           	      `    t          dddd          }t          	 dd| it          |fi |}|S )	zI ViT-Base (ViT-B/16+) w/ relative log-coord position, no class token
    r   rT  r   rU  )r   r   r   r'    vit_relpos_base_patch16_plus_240r@  )r[  r   rC  rX  s       r>   r[  r[  ;  s\     s"KKKJ-*a a7AaEI*E_E_X^E_E_a aELr?   c           	      d    t          dddddd          }t          	 d
d	| it          |fi |}|S )H ViT-Base (ViT-B/16) w/ relative log-coord position, no class token
    r     r      FTr   r   r   r'   r9   r   vit_relpos_small_patch16_224r@  )rb  r\  rX  s       r>   rb  rb  E  sb     s"TYcghhhJ-&] ]3=]AEjA[A[TZA[A[] ]ELr?   c           	      d    t          dddddd          }t          	 d
d	| it          |fi |}|S )r^  r      r   r!   FTra  vit_relpos_medium_patch16_224r@  )re  r\  rX  s       r>   re  re  O  sk     B!eUY[ [ [J-'^ ^4>^BFzB\B\U[B\B\^ ^ELr?   c           	      d    t          dddddd          }t          	 d	d| it          |fi |}|S )
r^  r   r   r   FTra  vit_relpos_base_patch16_224r@  )rg  r\  rX  s       r>   rg  rg  Z  sk     B"uVZ\ \ \J-%\ \2<\@DZ@Z@ZSY@Z@Z\ \ELr?   c           
      h    t          dddddddd          }t          	 d
d	| it          |fi |}|S )O ViT-Base (ViT-B/16) w/ shared relative log-coord position, no class token
    r   r_  r   r`  FTr   r   r   r'   r9   r   r   r@   vit_srelpos_small_patch16_224r@  )rk  r\  rX  s       r>   rk  rk  e  so     B!eUZ. . .J .'^ ^4>^BFzB\B\U[B\B\^ ^ELr?   c           
      h    t          dddddddd          }t          	 d
d	| it          |fi |}|S )ri  r   rd  r   r!   FTrj  vit_srelpos_medium_patch16_224r@  )rm  r\  rX  s       r>   rm  rm  q  so     B!eUZ. . .J .(_ _5?_CG
C]C]V\C]C]_ _ELr?   c                 j    t          ddddddddd		  	        }t          	 dd| it          |fi |}|S )zM ViT-Base (ViT-M/16) w/ relative log-coord position, class token present
    r   rd  r   r!   FrO  Tr   )	r   r   r   r'   r9   r   r   r   r   !vit_relpos_medium_patch16_cls_224r@  )ro  r\  rX  s       r>   ro  ro  }  st     B!eUZTw@ @ @J .+b b8BbFJ:F`F`Y_F`F`b bELr?   c           	      f    t          ddddddd          }t          	 d
d	| it          |fi |}|S )zM ViT-Base (ViT-B/16) w/ relative log-coord position, class token present
    r   r   r   FTr   )r   r   r   r'   r9   r   r   vit_relpos_base_patch16_cls_224r@  )rq  r\  rX  s       r>   rq  rq    sn     B"uZ^lsu u uJ-)` `6@`DHD^D^W]D^D^` `ELr?   c           	      f    t          ddddddd          }t          	 d	d| it          |fi |}|S )
a   ViT-Base (ViT-B/16) w/ relative log-coord position, class token present
    NOTE this config is a bit of a mistake, class token was enabled but global avg-pool w/ fc-norm was not disabled
    Leaving here for comparisons w/ a future re-train as it performs quite well.
    r   r   r   FT)r   r   r   r'   r9   r   r   "vit_relpos_base_patch16_clsgap_224r@  )rs  r\  rX  s       r>   rs  rs    sn     B"uVZhln n nJ-,c c9CcGKJGaGaZ`GaGac cELr?   c           	      n    t          dddddt                    }t          	 d	d| it          |fi |}|S )
_ ViT-Base (ViT-B/16) w/ relative log-coord position and residual post-norm, no class token
    r   r_  r   r`  Fr   r   r   r'   r9   r    vit_relpos_small_patch16_rpn_224r@  )rw  rW  rX  s       r>   rw  rw    sk     B!eVhj j jJ-*a a7AaEI*E_E_X^E_E_a aELr?   c           	      n    t          dddddt                    }t          	 d	d| it          |fi |}|S )
ru  r   rd  r   r!   Frv  !vit_relpos_medium_patch16_rpn_224r@  )ry  rW  rX  s       r>   ry  ry    sk     B!eVhj j jJ-+b b8BbFJ:F`F`Y_F`F`b bELr?   c           	      n    t          dddddt                    }t          	 dd| it          |fi |}|S )	ru  r   r   r   Frv  vit_relpos_base_patch16_rpn_224r@  )r{  rW  rX  s       r>   r{  r{    sk     B"uWik k kJ-)` `6@`DHD^D^W]D^D^` `ELr?   r3  r2  )Er4  loggingr   	functoolsr   typingr   r   r   r   r   r	   ImportErrortyping_extensionsrO   torch.nnr,   	torch.jitr
   torch.utils.checkpointr   	timm.datar   r   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   vision_transformerr   __all__	getLoggerra   _loggerr7  r   rk   rw   r   r   rC  rN  default_cfgsrV  r[  rb  re  rg  rk  rm  ro  rq  rs  rw  ry  r{  r   r?   r>   <module>r     s            5 5 5 5 5 5 5 5 5 5 5 5 5 5* * * *))))))))*              - - - - - - E E E E E E E E c c c c c c c c c c c c c c c c c c * * * * * * + + + + + + $ $ $ $ $ $ < < < < < < < < 4 4 4 4 4 4$
%
'
H
%
%? ? ? ? ?bi ? ? ?DF F F F F F F F- - - - -") - - -`4 4 4 4 4 4 4 4n[ [ [ [ [bi [ [ [|       %$48D X 5" 5" 5" 37$2-2X2X2X,0D P- - - .2T Q. . . ,04 O, , , .2T Q. . . /3d R/ / / 26 U2 2 2 2626$ V3 3 3 37$&&15 U2 2 2 26I%& %& % %P  H_      D[      @W      AX      ?V      AX      BY      E\      CZ     	 	F] 	 	 	 	  D[      E\      CZ      s   ' 55