
    Ng                     
   d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
mZ ddlmZ ddlmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddl m!Z! ddl"m#Z# dgZ$e G d dej%                              Z&ddZ' G d dej%                  Z( G d dej%                  Z) G d dej%                  Z* G d dej%                  Z+ G d dej%                  Z, G d dej%                  Z-d Z.dd Z/dd"Z0 ei d# e0d$d%&          d' e0d$d(&          d) e0d$d*d+,          d- e0d$d.&          d/ e0d$d0&          d1 e0d$d2d+,          d3 e0d$d4&          d5 e0d$d6&          d7 e0d$d8d+,          d9 e0d$d:&          d; e0d$d<&          d= e0d$d>d+,          d? e0d$d@&          dA e0d$dB&          dC e0d$dDd+,          dE e0d$dF&          dG e0d$dH&          i dI e0d$dJd+,          dK e0d$dL&          dM e0d$dN&          dO e0d$dPd+,          dQ e0d$dR&          dS e0d$dT&          dU e0d$dVd+,          dW e0d$dX&          dY e0d$dZ&          d[ e0d$d\d+,          d] e0d$d^&          d_ e0d$d`&          da e0d$dbd+,          dc e0d$dd&          de e0d$df&          dg e0d$dhd+,          di e0d$dj&           e0d$dk&           e0d$dld+,           e0d$dm&           e0d$dn&           e0d$dod+,           e0d$dp&           e0d$dq&           e0d$drd+,          ds          Z1eddte-fdu            Z2eddte-fdv            Z3eddte-fdw            Z4eddte-fdx            Z5eddte-fdy            Z6eddte-fdz            Z7eddte-fd{            Z8eddte-fd|            Z9eddte-fd}            Z:eddte-fd~            Z;eddte-fd            Z<eddte-fd            Z=eddte-fd            Z>eddte-fd            Z?eddte-fd            Z@eddte-fd            ZAeddte-fd            ZBeddte-fd            ZCeddte-fd            ZDeddte-fd            ZEeddte-fd            ZFeddte-fd            ZGeddte-fd            ZHeddte-fd            ZIeddte-fd            ZJeddte-fd            ZKeddte-fd            ZLeddte-fd            ZM eeNi dd'dd)dd/dd1dd5dd7dd;dd=ddAddCddGddIddMddOddSddUddYd[d_dadedgddddddd           dS )a]   Cross-Covariance Image Transformer (XCiT) in PyTorch

Paper:
    - https://arxiv.org/abs/2106.09681

Same as the official implementation, with some minor adaptations, original copyright below
    - https://github.com/facebookresearch/xcit/blob/master/xcit.py

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    N)partial)ListOptionalTupleUnion)
checkpointIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathtrunc_normal_	to_2tupleuse_fused_attn   )build_model_with_cfg)feature_take_indices)register_notrace_module)register_modelgenerate_default_cfgsregister_model_deprecations)	ClassAttn)MlpXcitc                   8     e Zd ZdZd
 fd	Zdededefd	Z xZS )PositionalEncodingFourierz
    Positional encoding relying on a fourier kernel matching the one used in the "Attention is all you Need" paper.
    Based on the official XCiT code
        - https://github.com/facebookresearch/xcit/blob/master/xcit.py
           '  c                     t                                                       t          j        |dz  |d          | _        dt
          j        z  | _        || _        || _	        || _
        d| _        d S )N   r   )kernel_sizeư>)super__init__nnConv2dtoken_projectionmathpiscaletemperature
hidden_dimdimeps)selfr,   r-   r+   	__class__s       L/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/xcit.pyr$   z"PositionalEncodingFourier.__init__*   sd     "	*q.#1 M M M[
&$    BHWc           
         | j         j        j        }| j         j        j        }t	          j        d|dz   |                              t          j                                      d          	                    dd|          }t	          j        d|dz   |                              t          j                  	                    d|d          }||d d dd d d f         | j
        z   z  | j        z  }||d d d d dd f         | j
        z   z  | j        z  }t	          j        | j        |                              t          j                  }| j        dt	          j        |dd          z  | j        z  z  }|d d d d d d d f         |z  }	|d d d d d d d f         |z  }
t	          j        |	d d d d d d dd df                                         |	d d d d d d dd df                                         gd	                              d
          }	t	          j        |
d d d d d d dd df                                         |
d d d d d d dd df                                         gd	                              d
          }
t	          j        |
|	fd
	                              dd
dd          }|                      |                    |                    }|	                    |ddd          S )Nr   )devicer    floor)rounding_moder      r-      )r'   weightr7   dtypetorcharangetofloat32	unsqueezerepeatr.   r*   r,   r+   divstacksincosflattencatpermute)r/   r3   r4   r5   r7   r?   y_embedx_embeddim_tpos_xpos_yposs               r1   forwardz!PositionalEncodingFourier.forward3   s    &-4%,2,q!a%777::5=IISSTUVV]]^_abdeff,q!a%777::5=IIPPQRTUWXYYWQQQQQQY/$(:;djHWQQQ233Y/$(:;djHT_V<<<??NN Q5!7)S)S)S%SVZVe%ef111aaa&.111aaa&.U111aaaADqD=15577qqq!!!QQQ1}9M9Q9Q9S9STZ[\\\ddefggU111aaaADqD=15577qqq!!!QQQ1}9M9Q9Q9S9STZ[\\\ddefggiA...66q!QBB##CFF5MM22zz!Q1%%%r2   )r   r   r   )__name__
__module____qualname____doc__r$   intrS   __classcell__r0   s   @r1   r   r   "   sp              & & & & & & & & & & &r2   r   c           
          t           j                            t          j        | |d|dd          t          j        |                    S )z3x3 convolution + batch normr=   r   F)r!   stridepaddingbias)r@   r%   
Sequentialr&   BatchNorm2d)	in_planes
out_planesr\   s      r1   conv3x3rc   E   sD    8
	)ZQvqW\]]]
z""  r2   c                   >     e Zd ZdZddddej        f fd	Zd Z xZS )ConvPatchEmbedz<Image to Patch Embedding using multiple convolutional layers      r=   r   c                    t                                                       t          |          }|d         |z  |d         |z  z  }|| _        || _        || _        |dk    rt          j                            t          ||dz  d           |            t          |dz  |dz  d           |            t          |dz  |dz  d           |            t          |dz  |d                    | _
        d S |dk    rst          j                            t          ||dz  d           |            t          |dz  |dz  d           |            t          |dz  |d                    | _
        d S d)Nr   r   rg      r    r;   z=For convolutional projection, patch size has to be in [8, 16])r#   r$   r   img_size
patch_sizenum_patchesr@   r%   r_   rc   proj)r/   rj   rk   in_chans	embed_dim	act_layerrl   r0   s          r1   r$   zConvPatchEmbed.__init__P   so   X&&{j0Xa[J5NO $&++)q.!44		Q	Q::		Q	Q::		Q	155 DIII 1__++)q.!44		Q	Q::		Q	155 DIII RRr2   c                     |                      |          }|j        d         |j        d         }}|                    d                              dd          }|||ffS )Nr    r=   r   )rm   shaperJ   	transpose)r/   xHpWps       r1   rS   zConvPatchEmbed.forwardm   sS    IIaLLQWQZBIIaLL""1a((2r({r2   )	rT   rU   rV   rW   r%   GELUr$   rS   rY   rZ   s   @r1   re   re   M   sg        FF #Q#Y[Y` S S S S S S:      r2   re   c                   D     e Zd ZdZdej        df fd	ZdedefdZ xZ	S )LPIa  
    Local Patch Interaction module that allows explicit communication between tokens in 3x3 windows to augment the
    implicit communication performed by the block diagonal scatter attention. Implemented using 2 layers of separable
    3x3 convolutions with GeLU and BatchNorm2d
    Nr=   c                 N   t                                                       |p|}|dz  }t          j                            |||||          | _         |            | _        t          j        |          | _        t          j                            |||||          | _	        d S )Nr    )r!   r]   groups)
r#   r$   r@   r%   r&   conv1actr`   bnconv2)r/   in_featuresout_featuresrp   r!   r]   r0   s         r1   r$   zLPI.__init__{   s    #2{"X__+wWb % d d
9;;.--X__;Xd % f f


r2   r4   r5   c                 v   |j         \  }}}|                    ddd                              ||||          }|                     |          }|                     |          }|                     |          }|                     |          }|                    |||                              ddd          }|S )Nr   r    r   )rr   rL   reshaper|   r}   r~   r   )r/   rt   r4   r5   r3   NCs          r1   rS   zLPI.forward   s    '1aIIaA&&q!Q22JJqMMHHQKKGGAJJJJqMMIIaA&&q!Q//r2   )
rT   rU   rV   rW   r%   rw   r$   rX   rS   rY   rZ   s   @r1   ry   ry   t   s{          26VW f f f f f fC C        r2   ry   c            	       P     e Zd ZdZdddddej        ej        ddf	 fd	Zd Z xZ	S )ClassAttentionBlockzAClass Attention Layer as in CaiT https://arxiv.org/abs/2103.17239      @F              ?c                 N   t                                                        |	|          | _        t          |||||          | _        |dk    rt          |          nt          j                    | _         |	|          | _	        t          |t          ||z            ||          | _        |
]t          j        |
t          j        |          z            | _        t          j        |
t          j        |          z            | _        nd\  | _        | _        || _        d S )N	num_headsqkv_bias	attn_drop	proj_dropr   r   hidden_featuresrp   drop)r   r   )r#   r$   norm1r   attnr   r%   Identity	drop_pathnorm2r   rX   mlp	Parameterr@   onesgamma1gamma2tokens_norm)r/   r-   r   	mlp_ratior   r   r   r   rp   
norm_layeretar   r0   s               r1   r$   zClassAttentionBlock.__init__   s	    	Z__
9x9Xac c c	 1:B),,,BKMMZ__
3C)O8L8LXahqrrr?,sUZ__'<==DK,sUZ__'<==DKK'/$DK 'r2   c                 p   |                      |          }t          j        |                     |          |d d dd f         gd          }||                     | j        |z            z   }| j        r|                     |          }nCt          j        |                     |d d ddf                   |d d dd f         gd          }|}|d d ddf         }| j        | 	                    |          z  }t          j        ||d d dd f         gd          }||                     |          z   }|S )Nr   r<   r   )
r   r@   rK   r   r   r   r   r   r   r   )r/   rt   x_norm1x_attnx_res	cls_tokens         r1   rS   zClassAttentionBlock.forward   s;   **Q--DIIg..122?QGGGt{V3444 	D

1AA	4::a1Q3i00!AAAqrrE(;CCCAaaa1fI	K$((9"5"55	Iy!AAAqrrE(+333DNN1%%%r2   )
rT   rU   rV   rW   r%   rw   	LayerNormr$   rS   rY   rZ   s   @r1   r   r      st        KK g|' ' ' ' ' 'B      r2   r   c                   ~     e Zd ZU ej        j        e         ed<   	 d fd	Zd Z	ej        j
        d             Z xZS )	XCA
fused_attnri   Fr   c                    t                                                       || _        t          d          | _        t          j        t          j        |dd                    | _	        t          j
        ||dz  |          | _        t          j        |          | _        t          j
        ||          | _        t          j        |          | _        d S )NT)experimentalr   r=   )r^   )r#   r$   r   r   r   r%   r   r@   r   r+   LinearqkvDropoutr   rm   r   )r/   r-   r   r   r   r   r0   s         r1   r$   zXCA.__init__   s    "(d;;;<
9a(C(CDD9S#'999I..Ic3''	I..r2   c                    |j         \  }}}|                     |                              ||d| j        || j        z                                ddddd          }|                    d          \  }}}| j        r}t          j        j	        
                    |d          | j        z  }t          j        j	        
                    |d          }t          j        j	                            |||d	          }nt          j        j	        
                    |d          }t          j        j	        
                    |d          }||                    d
d          z  | j        z  }	|	                    d          }	|                     |	          }	|	|z  }|                    dddd                              |||          }|                     |          }|                     |          }|S )Nr=   r    r   r;   r   r8   r<   r   )r*   )rr   r   r   r   rL   unbindr   r@   r%   
functional	normalizer+   scaled_dot_product_attentionrs   softmaxr   rm   r   )
r/   rt   r3   r   r   r   qkvr   s
             r1   rS   zXCA.forward   s   '1ahhqkk!!!Q4>1;NOOWWXY[\^_abdeff**Q--1a? 	#--aR-884;KKA#--aR-88A#@@AqPS@TTAA #--aR-88A#--aR-88AB+++t/??D<<B<''D>>$''DqAIIaAq!!))!Q22IIaLLNN1r2   c                     dhS )Nr+    r/   s    r1   no_weight_decayzXCA.no_weight_decay   s
    r2   )ri   Fr   r   )rT   rU   rV   r@   jitFinalbool__annotations__r$   rS   ignorer   rY   rZ   s   @r1   r   r      s         	%%%%
/ / / / / /  0 Y      r2   r   c                   T     e Zd Zdddddej        ej        df fd	ZdedefdZ xZ	S )	XCABlockr   Fr   r   c                    t                                                        |	|          | _        t          |||||          | _        |dk    rt          |          nt          j                    | _         |	|          | _	        t          ||          | _         |	|          | _        t          |t          ||z            ||          | _        t          j        |
t#          j        |          z            | _        t          j        |
t#          j        |          z            | _        t          j        |
t#          j        |          z            | _        d S )Nr   r   )r   rp   r   )r#   r$   r   r   r   r   r%   r   r   norm3ry   local_mpr   r   rX   r   r   r@   r   r   gamma3r   )r/   r-   r   r   r   r   r   r   rp   r   r   r0   s              r1   r$   zXCABlock.__init__   s    	Z__
y8ydmnnn	09B),,,BKMMZ__
yAAAZ__
3C)O8L8LXahqrrrl3C#899l3C#899l3C#899r2   r4   r5   c           	         ||                      | j        |                     |                     |                    z            z   }||                      | j        |                     |                     |          ||          z            z   }||                      | j        |                     | 	                    |                    z            z   }|S N)
r   r   r   r   r   r   r   r   r   r   )r/   rt   r4   r5   s       r1   rS   zXCABlock.forward  s    t{TYYtzz!}}-E-EEFFF t{T]]4::a==!Q-O-OOPPPt{TXXdjjmm-D-DDEEEr2   )
rT   rU   rV   r%   rw   r   r$   rX   rS   rY   rZ   s   @r1   r   r      s}        
 g|: : : : : :8C C        r2   r   c                   8    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d* fd	Zd Zej        j        d             Z	ej        j        d+d            Z
ej        j        d,d            Zej        j        dej        fd            Zd-dedee         fdZ	 	 	 	 	 d.dej        deeeee         f                  dededed edeeej                 eej        eej                 f         f         fd!Z	 	 	 d/deeee         f         d#ed$efd%Zd& Zd+d'efd(Zd) Z xZS )0r   z
    Based on timm and DeiT code bases
    https://github.com/rwightman/pytorch-image-models/tree/master/timm
    https://github.com/facebookresearch/deit/
    rf   rg   r=     tokenr      r   Tr   Nr    r   Fc                   	
 t                                                       |dv sJ t          |          }|d         |z  dk    r|d         |z  dk    s
J d            pt          t          j        d          pt          j        || _        x| _        x| _	        | _
        || _        d| _        t          |||          | _        |t	          j        t!          j        dd                    | _        |rt'          	          | _        nd
| _        t	          j        |          | _        t	          j        	
f
dt1          |          D                       | _        fdt1          |          D             | _        t	          j        	
f
dt1          |          D                       | _                   | _        t	          j                  | _        |dk    rt	          j        | j        |          nt	          j                    | _         tC          | j        d           | "                    | j#                   d
S )a  
        Args:
            img_size (int, tuple): input image size
            patch_size (int): patch size
            in_chans (int): number of input channels
            num_classes (int): number of classes for classification head
            embed_dim (int): embedding dimension
            depth (int): depth of transformer
            num_heads (int): number of attention heads
            mlp_ratio (int): ratio of mlp hidden dim to embedding dim
            qkv_bias (bool): enable bias for qkv if True
            drop_rate (float): dropout rate after positional embedding, and in XCA/CA projection + MLP
            pos_drop_rate: position embedding dropout rate
            proj_drop_rate (float): projection dropout rate
            attn_drop_rate (float): attention dropout rate
            drop_path_rate (float): stochastic depth rate (constant across all layers)
            norm_layer: (nn.Module): normalization layer
            cls_attn_layers: (int) Depth of Class attention layers
            use_pos_embed: (bool) whether to use positional encoding
            eta: (float) layerscale initialization value
            tokens_norm: (bool) Whether to normalize all tokens or just the cls_token in the CA

        Notes:
            - Although `layer_norm` is user specifiable, there are hard-coded `BatchNorm2d`s in the local patch
              interaction (class LPI) and the patch embedding (class ConvPatchEmbed)
         avgr   r   z2`patch_size` should divide image dimensions evenlyr"   )r.   F)rj   rk   rn   ro   rp   r   r<   N)pc                 B   
 g | ]}t          	
 
  
        S ))
r-   r   r   r   r   r   r   rp   r   r   )r   ).0_rp   attn_drop_ratedrop_path_ratero   r   r   r   r   proj_drop_rater   s     r1   
<listcomp>z!Xcit.__init__.<locals>.<listcomp>q  sX     %# %# %#  ##!(((#%  %# %# %#r2   c                 :    g | ]}t          d |           S )zblocks.)num_chs	reductionmoduledict)r   iro   rs     r1   r   z!Xcit.__init__.<locals>.<listcomp>  s0    nnn\]T)qSTWWWnnnr2   c                 B   
 g | ]}t          	
 
  
        S ))
r-   r   r   r   r   r   rp   r   r   r   )r   )r   r   rp   r   	drop_ratero   r   r   r   r   r   r   s     r1   r   z!Xcit.__init__.<locals>.<listcomp>  sX     .- .- .-   ##!#(#%'  .- .- .-r2   {Gz?std)$r#   r$   r   r   r%   r   rw   num_classesnum_featureshead_hidden_sizero   global_poolgrad_checkpointingre   patch_embedr   r@   zerosr   r   	pos_embedr   pos_drop
ModuleListrangeblocksfeature_infocls_attn_blocksnorm	head_dropr   r   headr   apply_init_weights)r/   rj   rk   rn   r   r   ro   depthr   r   r   r   pos_drop_rater   r   r   rp   r   cls_attn_layersuse_pos_embedr   r   r   r0   s         ` ```` `````  ``@r1   r$   zXcit.__init__"  s   d 	22222X&&j(A--HQK*4LPQ4Q4Q4Q@ 5R4Q4QB72<T#B#B#B
(	&ENNND1DN&"')!
 
 
 ek!Q	&B&BCC 	"69EEEDNN!DN
]333m %# %# %# %# %# %# %# %# %# %# %# %# %# 5\\%# %# %# $ $ onnnnafglamamnnn!} .- .- .- .- .- .- .- .- .- .- .- .- .- ?++.- .- .-  .  .  Jy))	I..ALqBId/===VXVaVcVc	 	dn#....

4%&&&&&r2   c                     t          |t          j                  r^t          |j        d           t          |t          j                  r0|j        +t          j                            |j        d           d S d S d S d S )Nr   r   r   )
isinstancer%   r   r   r>   r^   init	constant_)r/   ms     r1   r   zXcit._init_weights  s    a## 	-!(,,,,!RY'' -AF,>!!!&!,,,,,	- 	-- -,>,>r2   c                 
    ddhS )Nr   r   r   r   s    r1   r   zXcit.no_weight_decay  s    [))r2   c                 *    t          ddddg          S )Nz ^cls_token|pos_embed|patch_embedz^blocks\.(\d+))z^cls_attn_blocks\.(\d+)N)z^norm)i )stemr   r   r   )r/   coarses     r1   group_matcherzXcit.group_matcher  s(    4$?AUV
 
 
 	
r2   c                     || _         d S r   )r   )r/   enables     r1   set_grad_checkpointingzXcit.set_grad_checkpointing  s    "(r2   returnc                     | j         S r   )r   r   s    r1   get_classifierzXcit.get_classifier  s
    yr2   r   r   c                     || _         ||dv sJ || _        |dk    rt          j        | j        |          nt          j                    | _        d S )Nr   r   )r   r   r%   r   r   r   r   )r/   r   r   s      r1   reset_classifierzXcit.reset_classifier  s[    &""66666*DALqBId/===VXVaVcVc			r2   NCHWrt   indicesr   
stop_early
output_fmtintermediates_onlyc                    |dv s
J d            |dk    }g }t          t          | j                  |          \  }	}
|j        \  }}}|                     |          \  }\  | j        Q|                                                   d|j        d                                       ddd          }||z   }|                     |          }t          j
                                        s|s| j        }n| j        d|
dz            }t          |          D ]B\  }} ||          }||	v r,|                    |r|                     |          n|           C|rfd	|D             }|r|S t          j        | j                            dd          |fd
          }| j        D ]} ||          }|                     |          }||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  NLCz)Output format must be one of NCHW or NLC.r  Nr8   r   r   r    c                     g | ]B}|                     d                               dddd                                          CS )r8   r   r=   r   r    )r   rL   
contiguous)r   yr3   ru   rv   s     r1   r   z.Xcit.forward_intermediates.<locals>.<listcomp>  sL    nnn[\QYYq"b"55==aAqIITTVVnnnr2   r<   )r   lenr   rr   r   r   r   rL   r   r@   r   is_scripting	enumerateappendr   rK   r   expandr   )r/   rt   r  r   r  r  r  r   intermediatestake_indices	max_indexr   heightwidthpos_encodingr   r   blkr3   ru   rv   s                     @@@r1   forward_intermediateszXcit.forward_intermediates  s   * _,,,.Y,,,&"6s4;7G7G"Q"Qi  g1fe&&q))8B>%>>!R44<<QAGAJOOWWXY[\^_``LL AMM!9!!## 	1: 	1[FF[)a-0F'' 	B 	BFAsAr2AL  $$T%@TYYq\\\qAAA  	onnnnnn`mnnnM 	!   It~,,QB77;CCC' 	 	CCFFIIaLL-r2   r   
prune_norm
prune_headc                    t          t          | j                  |          \  }}| j        d|dz            | _        |rt          j                    | _        |r.t          j                    | _        |                     dd           |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r   )	r   r  r   r%   r   r   r   r   r  )r/   r  r(  r)  r!  r"  s         r1   prune_intermediate_layerszXcit.prune_intermediate_layers  s     #7s4;7G7G"Q"Qik.9q=.1 	&DI 	)#%=??D !!!R(((r2   c                    |j         d         }|                     |          \  }\  }}| j        Q|                     |||                              |d|j         d                                       ddd          }||z   }|                     |          }| j        D ]G}| j        r1t          j	        
                                st          ||||          }: ||||          }Ht          j        | j                            |dd          |fd          }| j        D ]C}| j        r/t          j	        
                                st          ||          }8 ||          }D|                     |          }|S )Nr   r8   r   r    r<   )rr   r   r   r   rL   r   r   r   r@   r   r  r   rK   r   r  r   r   )r/   rt   r3   ru   rv   r%  r&  s          r1   forward_featureszXcit.forward_features  sy   GAJ&&q))8B>%>>!R44<<QAGAJOOWWXY[\^_``LL AMM!; 	# 	#C& #uy/E/E/G/G #sAr2..C2rNNIt~,,QB77;CCC' 	 	C& uy/E/E/G/G sA&&CFFIIaLLr2   
pre_logitsc                     | j         r9| j         dk    r"|d d dd f                             d          n|d d df         }|                     |          }|r|n|                     |          S )Nr   r   r<   r   )r   meanr   r   )r/   rt   r.  s      r1   forward_headzXcit.forward_head$  sy     	O(,(8E(A(A!!!QRR%!$$$qAwANN10qqDIIaLL0r2   c                 Z    |                      |          }|                     |          }|S r   )r-  r1  )r/   rt   s     r1   rS   zXcit.forward*  s-    !!!$$a  r2   )rf   rg   r=   r   r   r   r   r   r   Tr   r   r   r   r   NNr    Tr   FF)Tr   )NFFr  F)r   FT)rT   rU   rV   rW   r$   r   r@   r   r   r   r  r  r%   Moduler  rX   r   strr  Tensorr   r   r   r   r'  r+  r-  r1  rS   rY   rZ   s   @r1   r   r     s         -u' u' u' u' u' u'n- - - Y* * * Y
 
 
 
 Y) ) ) ) Y	    d dC dhsm d d d d 8<$$',;  ; |;  eCcN34;  	; 
 ;  ;  !%;  
tEL!5tEL7I)I#JJ	K;  ;  ;  ; ~ ./$#	 3S	>*  	   "  81 1$ 1 1 1 1      r2   c           	         d| v r| d         } t          |dd           d u}d | D             }|D ]4}|r-|                     |          | |                    dd          <   1| |= 5d| v rd|                                v rt	          |j                  }t          |          D ]}|                     d| d	          }|                    d
d|j        d                   }t          d          D ]\  }}	||         | d| d|	 d<   |                     d| dd           }
|
=|
                    d
d          }
t          d          D ]\  }}	|
|         | d| d|	 d<   | S )Nmodelr   c                 <    g | ]}|                     d           |S )r   )
startswith)r   r   s     r1   r   z(checkpoint_filter_fn.<locals>.<listcomp>6  s)    IIIAq||K/H/HIaIIIr2   zpos_embeder.z
pos_embed.z!cls_attn_blocks.0.attn.qkv.weightzcls_attn_blocks.0.attn.q.weightzcls_attn_blocks.z.attn.qkv.weightr=   r8   r   z.attn.z.weightz.attn.qkv.biasz.bias)
getattrpopreplace
state_dictr  r   r   r   rr   r  )r>  r8  r   pos_embed_keysr   num_ca_blocksr   
qkv_weightj	subscriptr   s              r1   checkpoint_filter_fnrD  0  s   *(
 E;55TAMIIIIIN   	BL..QRBSBSJqyy>>??1 +j88=^bgbrbrbtbt=t=tE122}%% 		[ 		[A#(N1(N(N(NOOJ#++Ar:3CB3GHHJ )% 0 0 [ [9MWXY]
IaIIyIIIJJ!~~&J&J&J&JDQQH##++Ar22$-e$4$4 [ [LAyOWXY{JK!KK9KKKLLr2   Fc                     |                     dd          }t          t          | |ft          t	          |d          d|}|S )Nout_indicesr=   getter)rF  feature_cls)pretrained_filter_fnfeature_cfg)r<  r   r   rD  r   )variant
pretraineddefault_cfgkwargsrF  r8  s         r1   _create_xcitrO  M  s]    **]A..K  2[hGGG   E Lr2   r   c                 6    | ddd dddt           t          ddd|S )	Nr   )r=   rf   rf   r   bicubicTzpatch_embed.proj.0.0r   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer0  r   
first_conv
classifierr	   )rR  rN  s     r1   _cfgrZ  Z  s7    =t)%.B,F   r2   zxcit_nano_12_p16_224.fb_in1kztimm/z<https://dl.fbaipublicfiles.com/xcit/xcit_nano_12_p16_224.pth)	hf_hub_idrR  z!xcit_nano_12_p16_224.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_nano_12_p16_224_dist.pthz!xcit_nano_12_p16_384.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_nano_12_p16_384_dist.pth)r=     r\  )r[  rR  rS  zxcit_tiny_12_p16_224.fb_in1kz<https://dl.fbaipublicfiles.com/xcit/xcit_tiny_12_p16_224.pthz!xcit_tiny_12_p16_224.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_tiny_12_p16_224_dist.pthz!xcit_tiny_12_p16_384.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_tiny_12_p16_384_dist.pthzxcit_tiny_24_p16_224.fb_in1kz<https://dl.fbaipublicfiles.com/xcit/xcit_tiny_24_p16_224.pthz!xcit_tiny_24_p16_224.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_tiny_24_p16_224_dist.pthz!xcit_tiny_24_p16_384.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_tiny_24_p16_384_dist.pthzxcit_small_12_p16_224.fb_in1kz=https://dl.fbaipublicfiles.com/xcit/xcit_small_12_p16_224.pthz"xcit_small_12_p16_224.fb_dist_in1kzBhttps://dl.fbaipublicfiles.com/xcit/xcit_small_12_p16_224_dist.pthz"xcit_small_12_p16_384.fb_dist_in1kzBhttps://dl.fbaipublicfiles.com/xcit/xcit_small_12_p16_384_dist.pthzxcit_small_24_p16_224.fb_in1kz=https://dl.fbaipublicfiles.com/xcit/xcit_small_24_p16_224.pthz"xcit_small_24_p16_224.fb_dist_in1kzBhttps://dl.fbaipublicfiles.com/xcit/xcit_small_24_p16_224_dist.pthz"xcit_small_24_p16_384.fb_dist_in1kzBhttps://dl.fbaipublicfiles.com/xcit/xcit_small_24_p16_384_dist.pthzxcit_medium_24_p16_224.fb_in1kz>https://dl.fbaipublicfiles.com/xcit/xcit_medium_24_p16_224.pthz#xcit_medium_24_p16_224.fb_dist_in1kzChttps://dl.fbaipublicfiles.com/xcit/xcit_medium_24_p16_224_dist.pthz#xcit_medium_24_p16_384.fb_dist_in1kzChttps://dl.fbaipublicfiles.com/xcit/xcit_medium_24_p16_384_dist.pthzxcit_large_24_p16_224.fb_in1kz=https://dl.fbaipublicfiles.com/xcit/xcit_large_24_p16_224.pthz"xcit_large_24_p16_224.fb_dist_in1kzBhttps://dl.fbaipublicfiles.com/xcit/xcit_large_24_p16_224_dist.pthz"xcit_large_24_p16_384.fb_dist_in1kzBhttps://dl.fbaipublicfiles.com/xcit/xcit_large_24_p16_384_dist.pthzxcit_nano_12_p8_224.fb_in1kz;https://dl.fbaipublicfiles.com/xcit/xcit_nano_12_p8_224.pthz xcit_nano_12_p8_224.fb_dist_in1kz@https://dl.fbaipublicfiles.com/xcit/xcit_nano_12_p8_224_dist.pthz xcit_nano_12_p8_384.fb_dist_in1kz@https://dl.fbaipublicfiles.com/xcit/xcit_nano_12_p8_384_dist.pthzxcit_tiny_12_p8_224.fb_in1kz;https://dl.fbaipublicfiles.com/xcit/xcit_tiny_12_p8_224.pthz xcit_tiny_12_p8_224.fb_dist_in1kz@https://dl.fbaipublicfiles.com/xcit/xcit_tiny_12_p8_224_dist.pthz xcit_tiny_12_p8_384.fb_dist_in1kz@https://dl.fbaipublicfiles.com/xcit/xcit_tiny_12_p8_384_dist.pthzxcit_tiny_24_p8_224.fb_in1kz;https://dl.fbaipublicfiles.com/xcit/xcit_tiny_24_p8_224.pthz xcit_tiny_24_p8_224.fb_dist_in1kz@https://dl.fbaipublicfiles.com/xcit/xcit_tiny_24_p8_224_dist.pthz xcit_tiny_24_p8_384.fb_dist_in1kz@https://dl.fbaipublicfiles.com/xcit/xcit_tiny_24_p8_384_dist.pthzxcit_small_12_p8_224.fb_in1kz<https://dl.fbaipublicfiles.com/xcit/xcit_small_12_p8_224.pthz!xcit_small_12_p8_224.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_small_12_p8_224_dist.pthz!xcit_small_12_p8_384.fb_dist_in1kzAhttps://dl.fbaipublicfiles.com/xcit/xcit_small_12_p8_384_dist.pthzxcit_small_24_p8_224.fb_in1kz<https://dl.fbaipublicfiles.com/xcit/xcit_small_24_p8_224.pthzAhttps://dl.fbaipublicfiles.com/xcit/xcit_small_24_p8_224_dist.pthzAhttps://dl.fbaipublicfiles.com/xcit/xcit_small_24_p8_384_dist.pthz=https://dl.fbaipublicfiles.com/xcit/xcit_medium_24_p8_224.pthzBhttps://dl.fbaipublicfiles.com/xcit/xcit_medium_24_p8_224_dist.pthzBhttps://dl.fbaipublicfiles.com/xcit/xcit_medium_24_p8_384_dist.pthz<https://dl.fbaipublicfiles.com/xcit/xcit_large_24_p8_224.pthzAhttps://dl.fbaipublicfiles.com/xcit/xcit_large_24_p8_224_dist.pthzAhttps://dl.fbaipublicfiles.com/xcit/xcit_large_24_p8_384_dist.pth)!xcit_small_24_p8_224.fb_dist_in1k!xcit_small_24_p8_384.fb_dist_in1kzxcit_medium_24_p8_224.fb_in1k"xcit_medium_24_p8_224.fb_dist_in1k"xcit_medium_24_p8_384.fb_dist_in1kzxcit_large_24_p8_224.fb_in1k!xcit_large_24_p8_224.fb_dist_in1k!xcit_large_24_p8_384.fb_dist_in1kr  c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg      r   r;   r   Frk   ro   r   r   r   r   xcit_nano_12_p16_224rL  )rf  r   rO  rL  rN  
model_argsr8  s       r1   rf  rf    sU    B!RWY Y YJeeJe$zJdJd]cJdJdeeELr2   c           	      d    t          ddddddd          }t          dd
| it          |fi |}|S )Nrg   rd  r   r;   r   Fr\  )rk   ro   r   r   r   r   rj   xcit_nano_12_p16_384rL  )rk  rg  rh  s       r1   rk  rk    sX    B!RWbeg g gJeeJe$zJdJd]cJdJdeeELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg      r   r;   r   Tre  xcit_tiny_12_p16_224rL  )rn  rg  rh  s       r1   rn  rn    U    B!RVX X XJeeJe$zJdJd]cJdJdeeELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   rm  r   r;   r   Tre  xcit_tiny_12_p16_384rL  )rq  rg  rh  s       r1   rq  rq    ro  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   r\  r   ri   r   Tre  xcit_small_12_p16_224rL  )rs  rg  rh  s       r1   rs  rs  
  V    B!RVX X XJffZf4PZKeKe^dKeKeffELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   r\  r   ri   r   Tre  xcit_small_12_p16_384rL  )rv  rg  rh  s       r1   rv  rv    rt  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   rm     r;   h㈵>Tre  xcit_tiny_24_p16_224rL  )rz  rg  rh  s       r1   rz  rz    U    B!SWY Y YJeeJe$zJdJd]cJdJdeeELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   rm  rx  r;   ry  Tre  xcit_tiny_24_p16_384rL  )r}  rg  rh  s       r1   r}  r}  "  r{  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   r\  rx  ri   ry  Tre  xcit_small_24_p16_224rL  )r  rg  rh  s       r1   r  r  *  V    B!SWY Y YJffZf4PZKeKe^dKeKeffELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   r\  rx  ri   ry  Tre  xcit_small_24_p16_384rL  )r  rg  rh  s       r1   r  r  2  r  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg      rx  ri   ry  Tre  xcit_medium_24_p16_224rL  )r  rg  rh  s       r1   r  r  :  V    B!SWY Y YJggjgDQ[LfLf_eLfLfggELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nrg   r  rx  ri   ry  Tre  xcit_medium_24_p16_384rL  )r  rg  rh  s       r1   r  r  B  r  r2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nrg   r   rx  ry  Tre  xcit_large_24_p16_224rL  )r  rg  rh  s       r1   r  r  J  V    B"$TXZ Z ZJffZf4PZKeKe^dKeKeffELr2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nrg   r   rx  ry  Tre  xcit_large_24_p16_384rL  )r  rg  rh  s       r1   r  r  R  r  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   rd  r   r;   r   Fre  xcit_nano_12_p8_224rL  )r  rg  rh  s       r1   r  r  [  U    2QVX X XJdd:djIcIc\bIcIcddELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   rd  r   r;   r   Fre  xcit_nano_12_p8_384rL  )r  rg  rh  s       r1   r  r  c  r  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   rm  r   r;   r   Tre  xcit_tiny_12_p8_224rL  )r  rg  rh  s       r1   r  r  k  U    2QUW W WJdd:djIcIc\bIcIcddELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   rm  r   r;   r   Tre  xcit_tiny_12_p8_384rL  )r  rg  rh  s       r1   r  r  s  r  r2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nri   r\  r   r   Tre  xcit_small_12_p8_224rL  )r  rg  rh  s       r1   r  r  {  U    2QUW W WJeeJe$zJdJd]cJdJdeeELr2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nri   r\  r   r   Tre  xcit_small_12_p8_384rL  )r  rg  rh  s       r1   r  r    r  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   rm  rx  r;   ry  Tre  xcit_tiny_24_p8_224rL  )r  rg  rh  s       r1   r  r    U    2RVX X XJdd:djIcIc\bIcIcddELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   rm  rx  r;   ry  Tre  xcit_tiny_24_p8_384rL  )r  rg  rh  s       r1   r  r    r  r2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nri   r\  rx  ry  Tre  xcit_small_24_p8_224rL  )r  rg  rh  s       r1   r  r    U    2RVX X XJeeJe$zJdJd]cJdJdeeELr2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nri   r\  rx  ry  Tre  xcit_small_24_p8_384rL  )r  rg  rh  s       r1   r  r    r  r2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nri   r  rx  ry  Tre  xcit_medium_24_p8_224rL  )r  rg  rh  s       r1   r  r    V    2RVX X XJffZf4PZKeKe^dKeKeffELr2   c           	      b    t          dddddd          }t          d	d| it          |fi |}|S )
Nri   r  rx  ry  Tre  xcit_medium_24_p8_384rL  )r  rg  rh  s       r1   r  r    r  r2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   r   rx  rg   ry  Tre  xcit_large_24_p8_224rL  )r  rg  rh  s       r1   r  r    U    2SWY Y YJeeJe$zJdJd]cJdJdeeELr2   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nri   r   rx  rg   ry  Tre  xcit_large_24_p8_384rL  )r  rg  rh  s       r1   r  r    r  r2   xcit_nano_12_p16_224_distxcit_nano_12_p16_384_distxcit_tiny_12_p16_224_distxcit_tiny_12_p16_384_distxcit_tiny_24_p16_224_distxcit_tiny_24_p16_384_distxcit_small_12_p16_224_distxcit_small_12_p16_384_distxcit_small_24_p16_224_distxcit_small_24_p16_384_distxcit_medium_24_p16_224_distxcit_medium_24_p16_384_distxcit_large_24_p16_224_distxcit_large_24_p16_384_distxcit_nano_12_p8_224_distxcit_nano_12_p8_384_distxcit_tiny_12_p8_224_distr]  r^  r_  r`  ra  rb  )xcit_tiny_12_p8_384_distxcit_tiny_24_p8_224_distxcit_tiny_24_p8_384_distxcit_small_12_p8_224_distxcit_small_12_p8_384_distxcit_small_24_p8_224_distxcit_small_24_p8_384_distxcit_medium_24_p8_224_distxcit_medium_24_p8_384_distxcit_large_24_p8_224_distxcit_large_24_p8_384_dist)r   )FN)r   r3  )OrW   r(   	functoolsr   typingr   r   r   r   r@   torch.nnr%   torch.utils.checkpointr   	timm.datar
   r   timm.layersr   r   r   r   _builderr   	_featuresr   _features_fxr   	_registryr   r   r   caitr   vision_transformerr   __all__r4  r   rc   re   ry   r   r   r   r   rD  rO  rZ  default_cfgsrf  rk  rn  rq  rs  rv  rz  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rT   r   r2   r1   <module>r     s  	 	        / / / / / / / / / / / /        - - - - - - A A A A A A A A J J J J J J J J J J J J * * * * * * + + + + + + 1 1 1 1 1 1 Y Y Y Y Y Y Y Y Y Y       # # # # # #( & & & & &	 & & &D   $ $ $ $ $RY $ $ $N    ")   >1 1 1 1 1") 1 1 1h+ + + + +") + + +\# # # # #ry # # #LR R R R R29 R R Rj  :
 
 
 
    %$ B&"DDJ%L %L %LB&
 (O*Q *Q *QB& (O\i*k *k *kB& #DDJ%L %L %LB& (O*Q *Q *QB&" (O\i*k *k *k#B&( #DDJ%L %L %L)B&. (O*Q *Q *Q/B&4 (O\i*k *k *k5B&: $TTK&M &M &M;B&@ )$$P+R +R +RAB&F )$$P]j+l +l +lGB&L $TTK&M &M &MMB&R )$$P+R +R +RSB&X )$$P]j+l +l +lYB&^ %ddL'N 'N 'N_B&d *44Q,S ,S ,SeB& B&j *44Q^k,m ,m ,mkB&p $TTK&M &M &MqB&v )$$P+R +R +RwB&| )$$P]j+l +l +l}B&F "44I$K $K $KGB&L 'N)P )P )PMB&R 'N[h)j )j )jSB&X "44I$K $K $KYB&^ 'N)P )P )P_B&d 'N[h)j )j )jeB&j "44I$K $K $KkB&p 'N)P )P )PqB&v 'N[h)j )j )jwB&| #DDJ%L %L %L}B&B (O*Q *Q *QCB&H (O\i*k *k *kIB&N #DDJ%L %L %LOB& B&T *.O*Q *Q *Q *.O\i*k *k *k &*TK&M &M &M +/$P+R +R +R +/$P]j+l +l +l %)DJ%L %L %L *.O*Q *Q *Q *.O\i*k *k *kB& B& B& B BJ                                                              $      $                  t      t      t      t                  t      t                                          H  '!D '  !D '  !D	 '
  !D '  !D '  !D ' !"F ' !"F ' !"F ' !"F ' "#H ' "#H ' !"F ' !"F '$  B% '&  B' '(  B) '* !C B B!D!D!D!D"F"F!D!D? '  '  '          r2   