
    Ng;                        d Z 	 ddlmZ ddlZddlmZ ddlmZmZ ddl	m
Z
mZmZmZmZmZ ddlmZ ddlmZ dd	lmZmZ d
gZe G d dej                              Z G d dej                  Z G d dej                  Z G d d
ej                  ZddZddZ e ed           ed           ed          d          Zeddefd            Z eddefd            Z!eddefd            Z"dS )a   ConViT Model

@article{d2021convit,
  title={ConViT: Improving Vision Transformers with Soft Convolutional Inductive Biases},
  author={d'Ascoli, St{'e}phane and Touvron, Hugo and Leavitt, Matthew and Morcos, Ari and Biroli, Giulio and Sagun, Levent},
  journal={arXiv preprint arXiv:2103.10697},
  year={2021}
}

Paper link: https://arxiv.org/abs/2103.10697
Original code: https://github.com/facebookresearch/convit, original copyright below

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    )OptionalNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathtrunc_normal_
PatchEmbedMlp	LayerNormHybridEmbed   )build_model_with_cfg)register_notrace_module)register_modelgenerate_default_cfgsConVitc                   ^     e Zd Z	 	 	 	 	 d fd	Zd Zd ZddZd	 Zd
ede	j
        fdZ xZS )GPSA   F              ?c                 j   t                                                       || _        || _        ||z  }|dz  | _        || _        t          j        ||dz  |          | _        t          j        |||          | _	        t          j
        |          | _        t          j        ||          | _        t          j        d|          | _        t          j
        |          | _        t          j        t!          j        | j                            | _        t!          j        dddd          | _        d S )N         bias   r   )super__init__	num_headsdimscalelocality_strengthnnLinearqkvDropout	attn_dropprojpos_proj	proj_drop	Parametertorchonesgating_paramzerosrel_indices)	selfr!   r    qkv_biasr)   r,   r#   head_dim	__class__s	           N/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/convit.pyr   zGPSA.__init__)   s     	")#%
!2)Cqx8883(333I..Ic3''		!Y//I..LDN)C)CDD).Q1a)@)@    c                     |j         \  }}}| j        | j        j         d         |k    r|                     |          | _        |                     |          }|                     |                              ||| j        || j        z                                dddd          }||z                      dd                              |||          }| 	                    |          }| 
                    |          }|S )Nr   r   r   r   )shaper2   get_rel_indicesget_attentionr'   reshaper    permute	transposer*   r,   )r3   xBNCattnr'   s          r7   forwardzGPSA.forwardC   s    '1a#t'7'=a'@A'E'E#33A66D!!!$$FF1IIaDNA4GHHPPQRTUWXZ[\\AX  A&&..q!Q77IIaLLNN1r8   c                 &   |j         \  }}}|                     |                              ||d| j        || j        z                                ddddd          }|d         |d         }}| j                            |ddd          }|                     |                              dddd          }||                    dd          z  | j	        z  }	|	
                    d          }	|
                    d          }| j                            dddd          }
d	t          j        |
          z
  |	z  t          j        |
          |z  z   }||                    d                              d          z  }|                     |          }|S )
Nr   r   r   r      r!   r   )r:   r&   r=   r    r>   r2   expandr+   r?   r"   softmaxr0   viewr.   sigmoidsum	unsqueezer)   )r3   r@   rA   rB   rC   r&   qk	pos_scorepatch_scoregatingrD   s               r7   r<   zGPSA.get_attentionN   s{   '1aWWQZZ1adn9LMMUUVWYZ\]_`bcdd!ube1$++Ar2r::	MM),,44Q1a@@	1;;r2...$*<!))b)11%%"%--	"''2q!44U]6***k9EM&<Q<QT]<]]R  **2...~~d##r8   c                    |                      |                              d          }| j                                        d d d d df         dz  }t	          j        d||f          |                    d          z  }|r||fS |S )Nr   rH         ?	nm,hnm->h)r<   meanr2   squeezer.   einsumsize)r3   r@   
return_mapattn_map	distancesdists         r7   get_attention_mapzGPSA.get_attention_map^   s    %%a((--a00$,,..qqq!!!Rx8B>	|K)X)>??)..QRBSBSS 	>!Kr8   c                    | j         j        j                            t	          j        | j                             d}t          | j        dz            }|dz  dk    r|dz
  dz  n|dz  }t          |          D ]p}t          |          D ]^}|||z  z   }d| j
        j        j        |df<   d||z
  z  |z  | j
        j        j        |df<   d||z
  z  |z  | j
        j        j        |df<   _q| j
        j        xj        | j        z  c_        d S )Nr   rW   r   r   rH   )r'   weightdatacopy_r.   eyer!   intr    ranger+   r#   )r3   locality_distancekernel_sizecenterh1h2positions          r7   
local_initzGPSA.local_initg   s8     48!4!4555$.B.//*5/Q*>*>+/Q&&KSTDT$$ 	_ 	_BK(( _ _b 009;$)(A+69:b6k9JM^9^$)(A+69:b6k9JM^9^$)(A+66	_
 	!!T%;;!!!!r8   num_patchesreturnc                    t          |dz            }t          j        d||d          }t          j        |                              dd          t          j        |                              dd          z
  }|                    ||          }|                    |d                              |d          }|dz  |dz  z   }|                    d          |d d d d d d df<   |                    d          |d d d d d d df<   |                    d          |d d d d d d df<   | j        j	        j
        }|                    |          S )NrW   r   r   rH   r   rJ   r   )rg   r.   r1   arangerM   repeatrepeat_interleaverP   r&   rc   deviceto)	r3   rp   img_sizer2   indindxindyinddrv   s	            r7   r;   zGPSA.get_rel_indicesu   sU   {b())k![+qAAl8$$))!R005<3I3I3N3NrST3U3UUzz(H--$$X1$55GGVWGXXqy419$"&.."3"3AAAqqq!!!QJ"&.."3"3AAAqqq!!!QJ"&.."3"3AAAqqq!!!QJ&~~f%%%r8   )r   Fr   r   r   F)__name__
__module____qualname__r   rE   r<   ra   ro   rg   r.   Tensorr;   __classcell__r6   s   @r7   r   r   '   s        
  A A A A A A4	 	 	      < < <&3 &5< & & & & & & & &r8   r   c                   6     e Zd Z	 	 	 	 d fd	ZddZd Z xZS )	MHSAr   Fr   c                 J   t                                                       || _        ||z  }|dz  | _        t	          j        ||dz  |          | _        t	          j        |          | _        t	          j        ||          | _	        t	          j        |          | _
        d S )Nr   r   r   )r   r   r    r"   r$   r%   qkvr(   r)   r*   r,   )r3   r!   r    r4   r)   r,   r5   r6   s          r7   r   zMHSA.__init__   s     	")#%
9S#'999I..Ic3''	I..r8   c                 ^   |j         \  }}}|                     |                              ||d| j        || j        z                                ddddd          }|d         |d         |d         }	}}||                    dd          z  | j        z  }
|
                    d                              d          }
t          |d	z            }t          j        |                              dd          t          j        |                              dd          z
  }|                    ||          }|                    |d                              |d          }|dz  |dz  z   }|d	z  }|                    |j                  }t          j        d
||
f          |z  }|r||
fS |S )Nr   r   r   r   rG   rI   rH   rJ   rW   rX   )r:   r   r=   r    r>   r?   r"   rL   rY   rg   r.   rs   rM   rt   ru   rw   rv   r[   )r3   r@   r]   rA   rB   rC   r   rQ   rR   r'   r^   rx   ry   rz   r{   r|   r_   r`   s                     r7   ra   zMHSA.get_attention_map   s   '1ahhqkk!!!Q4>1;NOOWWXY[\^_abdeffa&#a&#a&a1B+++tz9###++0033qBw<<l8$$))!R005<3I3I3N3NrST3U3UUzz(H--$$X1$55GGVWGXXqy419$BJ	LL**	|K)X)>??!C 	>!Kr8   c                 6   |j         \  }}}|                     |                              ||d| j        || j        z                                ddddd          }|                    d          \  }}}||                    dd          z  | j        z  }	|	                    d          }	| 	                    |	          }	|	|z                      dd                              |||          }| 
                    |          }|                     |          }|S )	Nr   r   r   r   rG   rI   rH   rJ   )r:   r   r=   r    r>   unbindr?   r"   rL   r)   r*   r,   )
r3   r@   rA   rB   rC   r   rQ   rR   r'   rD   s
             r7   rE   zMHSA.forward   s    '1ahhqkk!!!Q4>1;NOOWWXY[\^_abdeff**Q--1aAKKB'''4:5|||##~~d##AX  A&&..q!Q77IIaLLNN1r8   )r   Fr   r   r}   )r~   r   r   r   ra   rE   r   r   s   @r7   r   r      sn         / / / / / /$   *      r8   r   c            	       B     e Zd Zdddddej        eddf	 fd	Zd Z xZS )Block      @Fr   Tr   c                    t                                                        |	|          | _        |
| _        | j        rt	          ||||||          | _        nt          |||||          | _        |dk    rt          |          nt          j	                    | _
         |	|          | _        t          ||z            }t          ||||          | _        d S )N)r    r4   r)   r,   r#   )r    r4   r)   r,   r   )in_featureshidden_features	act_layerdrop)r   r   norm1use_gpsar   rD   r   r   r$   Identity	drop_pathnorm2rg   r
   mlp)r3   r!   r    	mlp_ratior4   r,   r)   r   r   
norm_layerr   r#   mlp_hidden_dimr6   s                r7   r   zBlock.__init__   s     	Z__
 = 	#!##"3  DII #!##  DI 1:B),,,BKMMZ__
S9_--*	
 
 
r8   c                     ||                      |                     |                     |                              z   }||                      |                     |                     |                              z   }|S N)r   rD   r   r   r   r3   r@   s     r7   rE   zBlock.forward   s_    tyyA77888txx

166777r8   )	r~   r   r   r$   GELUr   r   rE   r   r   s   @r7   r   r      sl         g  *
 *
 *
 *
 *
 *
X      r8   r   c                   F    e Zd ZdZddddddddd	d
ddddddedddf fd	Zd Zej        j	        d             Z
ej        j	        dd            Zej        j	        dd            Zej        j	        dej        fd            Zddedee         fdZd ZddefdZd Z xZS ) r   zI Vision Transformer with support for patch or hybrid CNN input stage
          r     tokeni      r   Fr   Nr   Tc                 
  	
 t                                                       |dv sJ z  || _        || _        | _        x| _        x| _        | _        | _        || _	        |t          |||          | _        nt          |||          | _        | j        j        }|| _        t          j        t!          j        dd                    | _        t          j        |          | _        | j	        rCt          j        t!          j        d|                    | _        t-          | j        d           d t!          j        d	||          D             t          j        	
f
d
t3          |          D                       | _                   | _        t9          d	d          g| _        t          j        |          | _        |d	k    rt          j        |          nt          j                     | _!        t-          | j        d           | "                    | j#                   | $                                D ])\  }}tK          |d          r|&                                 *d S )N) avgr   )rx   in_chans	embed_dim)rx   
patch_sizer   r   r   )p{Gz?stdc                 6    g | ]}|                                 S  )item).0r@   s     r7   
<listcomp>z#ConVit.__init__.<locals>.<listcomp>'  s     JJJAqvvxxJJJr8   r   c                 V   
 g | ]%}t          	
|         |k      
  
        &S ))
r!   r    r   r4   r,   r)   r   r   r   r#   )r   )r   iattn_drop_ratedprr   local_up_to_layerr#   r   r   r    proj_drop_rater4   s     r7   r   z#ConVit.__init__.<locals>.<listcomp>(  sb     %% %% %%  ##!((a&%.."3  %% %% %%r8   head)num_chs	reductionmodulero   )'r   r   num_classesglobal_poolr   num_featureshead_hidden_sizer   r#   use_pos_embedr   patch_embedr	   rp   r$   r-   r.   r1   	cls_tokenr(   pos_drop	pos_embedr   linspace
ModuleListrh   blocksnormdictfeature_info	head_dropr%   r   r   apply_init_weightsnamed_moduleshasattrro   )r3   rx   r   r   r   r   r   depthr    r   r4   	drop_ratepos_drop_rater   r   drop_path_ratehybrid_backboner   r   r#   r   rp   nmr   r6   s         ` ```  ``  ```    @r7   r   zConVit.__init__   s   . 	22222Y	&&!2ENNND1DN!2*&*(XQZ \  \  \D  *!%!#	     D &2&ek!Q	&B&BCC
]333 	3\%+ai*P*PQQDN$.c2222JJ>5!I!IJJJm %% %% %% %% %% %% %% %% %% %% %% %% %% U||%% %% %% & & Jy))	 ")qPPPQI..9DqBIi555bkmm	dn#....

4%&&&&&(( 	 	DAqq,'' 	 	r8   c                    t          |t          j                  rbt          |j        d           t          |t          j                  r.|j        )t          j                            |j        d           d S d S d S t          |t          j                  rLt          j                            |j        d           t          j                            |j        d           d S d S )Nr   r   r   r   )	
isinstancer$   r%   r   rc   r   init	constant_r   )r3   r   s     r7   r   zConVit._init_weightsB  s    a## 	-!(,,,,!RY'' -AF,>!!!&!,,,,,- -,>,>2<(( 	-Gafa(((Gah,,,,,	- 	-r8   c                 
    ddhS )Nr   r   r   r3   s    r7   no_weight_decayzConVit.no_weight_decayK  s    [))r8   c                 (    t          dddg          S )Nz ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )r3   coarses     r7   group_matcherzConVit.group_matcherO  s%    4-/CD
 
 
 	
r8   c                     |r
J d            d S )Nz$gradient checkpointing not supportedr   )r3   enables     r7   set_grad_checkpointingzConVit.set_grad_checkpointingV  s    AAAAAAAAr8   rq   c                     | j         S r   )r   r   s    r7   get_classifierzConVit.get_classifierZ  s
    yr8   r   r   c                     || _         ||dv sJ || _        |dk    rt          j        | j        |          nt          j                    | _        d S )N)r   r   r   r   )r   r   r$   r%   r   r   r   )r3   r   r   s      r7   reset_classifierzConVit.reset_classifier^  sZ    &""66666*D>IAooBIdnk:::SUS^S`S`			r8   c                    |                      |          }| j        r
|| j        z   }|                     |          }| j                            |j        d         dd          }t          | j                  D ]3\  }}|| j	        k    rt          j        ||fd          } ||          }4|                     |          }|S )Nr   rH   r   rJ   )r   r   r   r   r   rK   r:   	enumerater   r   r.   catr   )r3   r@   
cls_tokensublks        r7   forward_featureszConVit.forward_featurese  s    Q 	#DN"AMM!^**171:r2>>
,, 	 	FAsD***Iz1o1555AAAIIaLLr8   
pre_logitsc                     | j         r9| j         dk    r"|d d dd f                             d          n|d d df         }|                     |          }|r|n|                     |          S )Nr   r   rJ   r   )r   rY   r   r   )r3   r@   r   s      r7   forward_headzConVit.forward_headr  sy     	O(,(8E(A(A!!!QRR%!$$$qAwANN10qqDIIaLL0r8   c                 Z    |                      |          }|                     |          }|S r   )r   r   r   s     r7   rE   zConVit.forwardx  s-    !!!$$a  r8   r}   )Tr   )r~   r   r   __doc__r   r   r   r.   jitignorer   r   r   r$   Moduler   rg   r   strr   r   boolr   rE   r   r   s   @r7   r   r      s        
    +N N N N N N`- - - Y* * * Y
 
 
 
 YB B B B Y	    a aC ahsm a a a a  1 1$ 1 1 1 1      r8   Fc                 t    |                     dd           rt          d          t          t          | |fi |S )Nfeatures_onlyz<features_only not implemented for Vision Transformer models.)getRuntimeErrorr   r   )variant
pretrainedkwargss      r7   _create_convitr  ~  sB    zz/4(( [YZZZFFvFFFr8   r   c           
      2    | ddd t           t          dddd	|S )Nr   )r   r   r   Tzpatch_embed.projr   )	urlr   
input_size	pool_sizerY   r   fixed_input_size
first_conv
classifierr   )r  r  s     r7   _cfgr    s3    =t%.BX\(	 
  r8   ztimm/)	hf_hub_id)zconvit_tiny.fb_in1kzconvit_small.fb_in1kzconvit_base.fb_in1krq   c           	      `    t          dddd          }t          dd| dt          |fi |}|S )	N
   r   0   rG   r   r#   r   r    convit_tinyr  r  r   r   r  r  r  
model_argsmodels       r7   r  r    sS    rQP P PJf=Zff4PZKeKe^dKeKeffELr8   c           	      `    t          dddd          }t          dd| dt          |fi |}|S )	Nr  r   r  	   r  convit_smallr  r   r  r  s       r7   r  r    sS    rQP P PJg>jggDQ[LfLf_eLfLfggELr8   c           	      `    t          dddd          }t          dd| dt          |fi |}|S )	Nr  r   r  r   r  convit_baser  r   r  r  s       r7   r  r    sS    rRQ Q QJf=Zff4PZKeKe^dKeKeffELr8   r}   )r   )#r   typingr   r.   torch.nnr$   	timm.datar   r   timm.layersr   r   r	   r
   r   r   _builderr   _features_fxr   	_registryr   r   __all__r   r   r   r   r   r  r  default_cfgsr  r  r  r   r8   r7   <module>r&     s   (              A A A A A A A A X X X X X X X X X X X X X X X X * * * * * * 1 1 1 1 1 1 < < < < < < < < * X& X& X& X& X&29 X& X& X&v4 4 4 4 429 4 4 4n1 1 1 1 1BI 1 1 1hM M M M MRY M M M`G G G G    %$4'222 D73334'222	& &    v            v      r8   