
    Ng:                     0   d Z ddlZddlZddlmZ ddlmZmZmZ ddl	Z	ddl	m
Z
 ddlmZmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZ dgZ G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Z G d de
j                  Zd Z d)dZ!d*dZ" e e"d           e"d           e"d           e"d           e"dd           e"dd           e"dd           e"dd          d          Z#ed)d efd!            Z$ed)d efd"            Z%ed)d efd#            Z&ed)d efd$            Z'ed)d efd%            Z(ed)d efd&            Z)ed)d efd'            Z*ed)d efd(            Z+dS )+a   Pooling-based Vision Transformer (PiT) in PyTorch

A PyTorch implement of Pooling-based Vision Transformers as described in
'Rethinking Spatial Dimensions of Vision Transformers' - https://arxiv.org/abs/2103.16302

This code was adapted from the original version at https://github.com/naver-ai/pit, original copyright below.

Modifications for timm by / Copyright 2020 Ross Wightman
    N)partial)OptionalSequenceTuple)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)trunc_normal_	to_2tuple   )build_model_with_cfg)register_modelgenerate_default_cfgs)BlockPoolingVisionTransformerc                   z     e Zd ZdZ fdZdeej        ej        f         deej        ej        f         fdZ xZ	S )SequentialTuplezI This module exists to work around torchscript typing issues list -> listc                 B     t          t          |           j        |  d S N)superr   __init__)selfargs	__class__s     K/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/pit.pyr   zSequentialTuple.__init__"   s#    -ot$$-t4444    xreturnc                 &    | D ]} ||          }|S r    )r   r   modules      r   forwardzSequentialTuple.forward%   s%     	 	Fq		AAr   )
__name__
__module____qualname____doc__r   r   torchTensorr#   __classcell__r   s   @r   r   r       s~        SS5 5 5 5 5u|U\9: uU\SXS_E_?`        r   r   c                        e Zd Z	 	 	 	 	 d fd	Zdeej        ej        f         deej        ej        f         fdZ xZS )TransformerN        c
                 &  
 t          t          |                                            |z  
|| _        |	r |	
          nt	          j                    | _        t	          j        
fdt          |          D              | _	        d S )Nc                 |    g | ]8}t          d |         t          t          j        d                    9S )Tư>eps)dim	num_heads	mlp_ratioqkv_bias	proj_drop	attn_drop	drop_path
norm_layer)r   r   r   	LayerNorm).0ir9   drop_path_prob	embed_dimheadsr6   r8   s     r   
<listcomp>z(Transformer.__init__.<locals>.<listcomp>=   sf     &# &# &#  ###(+"2<T:::	 	 	&# &# &#r   )
r   r-   r   poolr   Identitynorm
Sequentialrangeblocks)r   base_dimdepthrA   r6   rC   r8   r9   r?   r;   r@   r   s      `` ``` @r   r   zTransformer.__init__,   s     	k4  ))+++u$		-7JJJy)))R[]]	m &# &# &# &# &# &# &# &# &# 5\\&# &# &# $r   r   r   c                    |\  }}|j         d         }| j        |                     ||          \  }}|j         \  }}}}|                    d                              dd          }t	          j        ||fd          }|                     |          }|                     |          }|d d d |f         }|d d |d f         }|                    dd                              ||||          }||fS )Nr      )r4   )	shaperC   flatten	transposer(   catrE   rH   reshape)r   r   
cls_tokenstoken_lengthBCHWs           r   r#   zTransformer.forwardJ   s    :!'*9  IIa44MAzW
1aIIaLL""1a((Iz1o1---IIaLLKKNNqqq-<-'(
aaaKK1%%aAq11*}r   )Nr.   r.   NN	r$   r%   r&   r   r   r(   r)   r#   r*   r+   s   @r   r-   r-   +   s         $ $ $ $ $ $<u|U\9: uU\SXS_E_?`        r   r-   c                   P     e Zd Zd fd	Zdeej        ej        f         fdZ xZS )Poolingzerosc           	          t          t          |                                            t          j        |||dz   |dz  |||          | _        t          j        ||          | _        d S )Nr   rL   )kernel_sizepaddingstridepadding_modegroups)r   rZ   r   r   Conv2dconvLinearfc)r   
in_featureout_featurer_   r`   r   s        r   r   zPooling.__init___   sm    gt%%'''I
aK%
 
 
	 )J44r   r   c                 ^    |                      |          }|                     |          }||fS r   )rc   re   )r   r   	cls_tokens      r   r#   zPooling.forwardm   s,    IIaLLGGI&&	)|r   )r[   rX   r+   s   @r   rZ   rZ   ^   sf        5 5 5 5 5 5uU\5<-G'H        r   rZ   c            	       @     e Zd Z	 	 	 	 ddedededef fd	Zd
 Z xZS )ConvEmbedding         r   img_size
patch_sizer_   r^   c                    t          t          |                                            |}t          |          | _        t          |          | _        t          j        | j        d         d|z  z   | j        d         z
  |z  dz             | _        t          j        | j        d         d|z  z   | j        d         z
  |z  dz             | _	        | j        | j	        f| _
        t          j        |||||d          | _        d S )Nr   rL   r   T)r]   r_   r^   bias)r   rk   r   r   ro   rp   mathfloorheightwidth	grid_sizer   rb   rc   )r   in_channelsout_channelsro   rp   r_   r^   r   s          r   r   zConvEmbedding.__init__t   s     	mT""++---!(++#J//j$-"2Q["@4?STCU"UY_!_bc!cddZq!1AK!?$/RSBT!TX^ ^ab bcc
+tz2I:77 7 7			r   c                 0    |                      |          }|S r   )rc   r   r   s     r   r#   zConvEmbedding.forward   s    IIaLLr   )rl   rm   rn   r   )r$   r%   r&   intr   r#   r*   r+   s   @r   rk   rk   s   s        
   7 7 	7
 7 7 7 7 7 7 7 7*      r   rk   c                   p    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'dededededee         dee         dee         def fdZd Z	e
j        j        d             Ze
j        j        d(d            Ze
j        j        d(d            Zdej        fdZd)d ed!ee         fd"Zd# Zd*d$ede
j        fd%Zd& Z xZS )+r   z Pooling-based Vision Transformer

    A PyTorch implement of 'Rethinking Spatial Dimensions of Vision Transformers'
        - https://arxiv.org/abs/2103.16302
    rl   rm   rn   overlap0   r   r   rL         rL   r   rn   r        tokenFr.   ro   rp   r_   	stem_type	base_dimsrJ   rA   r6   c                 n   t          t          |                                            |dv sJ || _        || _        |d         |d         z  }|	| _        || _        |rdnd| _        g | _        t          |
||||          | _
        t          j        t          j        d|| j
        j        | j
        j                            | _        t          j        t          j        d| j        |                    | _        t          j        |          | _        g }d t          j        d|t-          |                                        |          D             }|}t1          t3          |                    D ]}d }||         ||         z  }|dk    rt5          ||d          }|t7          ||         ||         ||         ||||||                   gz  }|}| xj        t9          ||dz
  d|z  z  d	| 
          gz  c_        t;          | | _        t          j        |d         |d         z  d          | _         |x| _!        x| _"        | _#        t          j        |          | _$        |	dk    rt          j%        | j#        |	          nt          j&                    | _'        d | _(        |r=|	dk    rt          j%        | j#        | j                  nt          j&                    | _(        d| _)        tU          | j        d           tU          | j        d           | +                    | j,                   d S )N)r   r   rL   r   )pc                 6    g | ]}|                                 S r!   )tolist)r=   r   s     r   rB   z5PoolingVisionTransformer.__init__.<locals>.<listcomp>   s     ^^^aqxxzz^^^r   )r_   )rC   r8   r9   r?   transformers.)num_chs	reductionr"   r1   r2   Fg{Gz?)std)-r   r   r   r   rA   num_classesglobal_pool
num_tokensfeature_infork   patch_embedr   	Parameterr(   randnru   rv   	pos_embedri   Dropoutpos_droplinspacesumsplitrG   lenrZ   r-   dictr   transformersr<   rE   num_featureshead_hidden_sizer@   	head_droprd   rD   head	head_distdistilled_trainingr   apply_init_weights)r   ro   rp   r_   r   r   rJ   rA   r6   r   in_chansr   	distilled	drop_ratepos_drop_drateproj_drop_rateattn_drop_ratedrop_path_rater@   r   dprprev_dimr>   rC   r   s                           r   r   z!PoolingVisionTransformer.__init__   s:   ( 	&--66888j(((("
aL58+	&&(/!!a(9h
TZ[[ek!Y@P@WY]YiYo&p&pqqek!T_i&P&PQQ
^444^^5>!^SZZ#P#P#V#VW\#]#]^^^s5zz"" 	u 	uAD!!uQx/I1uu  
 [!aa(("1v	 	 	 	 	L !H$xFQJRSUVRVCV_rop_r_r"s"s"s!tt+\:L2r!:EEE	ENNND1DN I..>IAooBIdnk:::SUS^S`S`	 	oLWZ[OORYt~t7GHHHacalananDN"'dn#....dn#....

4%&&&&&r   c                     t          |t          j                  rLt          j                            |j        d           t          j                            |j        d           d S d S )Nr   g      ?)
isinstancer   r<   init	constant_rr   weight)r   ms     r   r   z&PoolingVisionTransformer._init_weights   sZ    a&& 	-Gafa(((Gah,,,,,	- 	-r   c                 
    ddhS )Nr   ri   r!   r   s    r   no_weight_decayz(PoolingVisionTransformer.no_weight_decay   s    [))r   Tc                     || _         d S r   )r   r   enables     r   set_distilled_trainingz/PoolingVisionTransformer.set_distilled_training   s    "(r   c                     |r
J d            d S )Nz$gradient checkpointing not supportedr!   r   s     r   set_grad_checkpointingz/PoolingVisionTransformer.set_grad_checkpointing   s    AAAAAAAAr   r   c                 :    | j         | j        | j         fS | j        S r   )r   r   r   s    r   get_classifierz'PoolingVisionTransformer.get_classifier   s!    >%9dn,,9r   Nr   r   c                 "   || _         ||| _        |dk    rt          j        | j        |          nt          j                    | _        | j        ?|dk    rt          j        | j        | j                   nt          j                    | _        d S d S )Nr   )r   r   r   rd   r@   rD   r   r   )r   r   r   s      r   reset_classifierz)PoolingVisionTransformer.reset_classifier   s    &"*D>IAooBIdnk:::SUS^S`S`	>%LWZ[OORYt~t7GHHHacalananDNNN &%r   c                    |                      |          }|                     || j        z             }| j                            |j        d         dd          }|                     ||f          \  }}|                     |          }|S )Nr   r   )r   r   r   ri   expandrM   r   rE   )r   r   rR   s      r   forward_featuresz)PoolingVisionTransformer.forward_features  s~    QMM!dn,--^**171:r2>>
))1j/:::YYz**
r   
pre_logitsc                     | j         | j        dk    sJ |d d df         |d d df         }}|                     |          }|                     |          }|s*|                     |          }|                      |          }| j        r)| j        r"t          j                                        s||fS ||z   dz  S | j        dk    r|d d df         }|                     |          }|s|                     |          }|S )Nr   r   r   rL   )	r   r   r   r   r   trainingr(   jitis_scripting)r   r   r   x_dists       r   forward_headz%PoolingVisionTransformer.forward_head	  s   >%#w....!!!Q$111a4vAq!!A^^A&&F 0IIaLL//& (4= (AWAWAYAY (&y  F
a''7**aaadGq!!A !IIaLLHr   c                 Z    |                      |          }|                     |          }|S r   )r   r   r{   s     r   r#   z PoolingVisionTransformer.forward   s-    !!!$$a  r   )rl   rm   rn   r~   r   r   r   r   r   r   r   Fr.   r.   r.   r.   r.   )Tr   F)r$   r%   r&   r'   r|   strr   floatr   r   r(   r   ignorer   r   r   r   Moduler   r   r   r   boolr)   r   r#   r*   r+   s   @r   r   r      s           &'3#,#, %L' L'L' L' 	L'
 L'  }L' C=L' C=L' L' L' L' L' L' L'\- - -
 Y* * * Y) ) ) ) YB B B B	    o oC ohsm o o o o   $ 5<    .      r   c                     i }t          j        d          }|                                 D ]!\  }}|                    d |          }|||<   "|S )z preprocess checkpoints zpools\.(\d)\.c                 T    dt          |                     d                    dz    dS )Nr   r   z.pool.)r|   group)exps    r   <lambda>z&checkpoint_filter_fn.<locals>.<lambda>/  s)    %RS15F5F5J%R%R%R r   )recompileitemssub)
state_dictmodelout_dictp_blockskvs         r   checkpoint_filter_fnr   &  s`    Hz*++H  ""  1
 LLRRTUVVOr   Fc           	          t          t          d                    }|                    d|          }t          t          | |ft
          t          dd|          d|}|S )Nr   out_indiceshookT)feature_cls
no_rewriter   )pretrained_filter_fnfeature_cfg)tuplerG   popr   r   r   r   )variant
pretrainedkwargsdefault_out_indicesr   r   s         r   _create_pitr   4  sr    a//**],?@@K   2V+VVV   E Lr    c                 6    | ddd dddt           t          ddd|S )	Nr   )r   rl   rl   g?bicubicTzpatch_embed.convr   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanr   
first_conv
classifierr   )r   r   s     r   _cfgr  C  s7    =t%.B(   r   ztimm/)	hf_hub_id)r   r   )r  r  )zpit_ti_224.in1kzpit_xs_224.in1kzpit_s_224.in1kzpit_b_224.in1kzpit_ti_distilled_224.in1kzpit_xs_distilled_224.in1kzpit_s_distilled_224.in1kzpit_b_distilled_224.in1kr   c           	      j    t          ddg dg dg dd          }t          d| fi t          |fi |S )	N      @   r  r  r   r   r   r   rn   rm   r   rp   r_   r   rJ   rA   r6   	pit_b_224r   r   r   r   
model_argss      r   r  r  c  ^    ,,iijj  J {JMM$z2L2LV2L2LMMMr   c           	      j    t          ddg dg dg dd          }t          d| fi t          |fi |S )	Nrm   rn   r   r   r   r      r   r  	pit_s_224r  r  s      r   r  r  p  r  r   c           	      j    t          ddg dg dg dd          }t          d| fi t          |fi |S )	Nrm   rn   r   r   r   r   r  
pit_xs_224r  r  s      r   r  r  }  ^    ,,iiii  J |ZNN4
3M3Mf3M3MNNNr   c           	      j    t          ddg dg dg dd          }t          d| fi t          |fi |S )	Nrm   rn       r  r  r   r   r   r  
pit_ti_224r  r  s      r   r  r    r  r   c           	      l    t          ddg dg dg ddd          }t          d	| fi t          |fi |S )
Nr  r  r  r	  r
  r   Trp   r_   r   rJ   rA   r6   r   pit_b_distilled_224r  r  s      r   r  r    b    ,,iijj  J ,jWWD<V<Vv<V<VWWWr   c           	      l    t          ddg dg dg ddd          }t          d	| fi t          |fi |S )
Nrm   rn   r   r   r  r   Tr  pit_s_distilled_224r  r  s      r   r!  r!    r  r   c           	      l    t          ddg dg dg ddd          }t          d	| fi t          |fi |S )
Nrm   rn   r   r   r   r   Tr  pit_xs_distilled_224r  r  s      r   r#  r#    c    ,,iiii  J -zXXT*=W=WPV=W=WXXXr   c           	      l    t          ddg dg dg ddd          }t          d	| fi t          |fi |S )
Nrm   rn   r  r   r   r   Tr  pit_ti_distilled_224r  r  s      r   r&  r&    r$  r   r   )r   ),r'   rs   r   	functoolsr   typingr   r   r   r(   r   	timm.datar	   r
   timm.layersr   r   _builderr   	_registryr   r   vision_transformerr   __all__rF   r   r   r-   rZ   rk   r   r   r   r  default_cfgsr  r  r  r  r  r!  r#  r&  r!   r   r   <module>r0     sq     				       , , , , , , , , , ,        A A A A A A A A 0 0 0 0 0 0 0 0 * * * * * * < < < < < < < < % % % % % % &
&    bm   0 0 0 0 0") 0 0 0f    bi   *    BI   6U U U U Ury U U Up         %$tg...tg...dW---dW---!%("* "* "* "&("* "* "* !%(!* !* !* !%(!* !* !*& &  * 	N 	N-E 	N 	N 	N 	N 	N 	N-E 	N 	N 	N 	N 	O 	O.F 	O 	O 	O 	O 	O 	O.F 	O 	O 	O 	O 
X 
X7O 
X 
X 
X 
X 
X 
X7O 
X 
X 
X 
X 
Y 
Y8P 
Y 
Y 
Y 
Y 
Y 
Y8P 
Y 
Y 
Y 
Y 
Y 
Yr   