
    Ng4                        d Z ddlZddlmZ ddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ dgZd dZ eddd           edd          dZ G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Zd Z d!dZ!ed!defd            Z"ed!defd            Z#dS )"a   Transformer in Transformer (TNT) in PyTorch

A PyTorch implement of TNT as described in
'Transformer in Transformer' - https://arxiv.org/abs/2103.00112

The official mindspore code is released and available at
https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/TNT
    N)Optional)
checkpointIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpDropPathtrunc_normal__assert	to_2tuple   )build_model_with_cfg)register_model)resize_pos_embedTNT c                 6    | ddd dddt           t          ddd|S )	N  )      r   g?bicubicTzpixel_embed.projhead)urlnum_classes
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanstd
first_conv
classifierr   )r   kwargss     K/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/tnt.py_cfgr&      s7    =t%.B(       zchttps://github.com/contrastive/pytorch-image-models/releases/download/TNT/tnt_s_patch16_224.pth.tar)      ?r(   r(   )r   r    r!   )r    r!   )tnt_s_patch16_224tnt_b_patch16_224c                   *     e Zd ZdZd fd	Zd Z xZS )	Attentionz Multi-Head Attention
       F        c                    t                                                       || _        || _        ||z  }|| _        |dz  | _        t          j        ||dz  |          | _        t          j        |||          | _	        t          j
        |d          | _        t          j        ||          | _        t          j
        |d          | _        d S )Ng         biasT)inplace)super__init__
hidden_dim	num_headshead_dimscalennLinearqkvDropout	attn_dropproj	proj_drop)	selfdimr6   r7   qkv_biasr?   rA   r8   	__class__s	           r%   r5   zAttention.__init__2   s    $"* %
)Cah???3(333It<<<Ic3''	It<<<r'   c                    |j         \  }}}|                     |                              ||d| j        | j                                      ddddd          }|                    d          \  }}|                     |                              ||| j        d                              dddd          }||                    dd          z  | j	        z  }	|	
                    d          }	|                     |	          }	|	|z                      dd                              ||d          }|                     |          }|                     |          }|S )	Nr0   r   r   r      rC   )shaper<   reshaper7   r8   permuteunbindr=   	transposer9   softmaxr?   r@   rA   )
rB   xBNCr<   qkr=   attns
             r%   forwardzAttention.forward@   s2   '1aWWQZZ1aGGOOPQSTVWYZ\]^^yy||1FF1IIaDNB77??1aKKAKKB'''4:5|||##~~d##AX  A&&..q!R88IIaLLNN1r'   )r-   Fr.   r.   __name__
__module____qualname____doc__r5   rX   __classcell__rE   s   @r%   r,   r,   /   sV         = = = = = =      r'   r,   c            	       P     e Zd ZdZdddddddej        ej        f	 fd	Zd Z xZ	S )	Blockz TNT Block
    rG            @Fr.   c                    t                                                        ||          | _        t          |||||	|          | _         ||          | _        t          |t          |dz            |||          | _         ||          | _	        t          j        ||z  |d          | _         ||          | _        t          |||||	|          | _        |
dk    rt          |
          nt          j                    | _         ||          | _        t          |t          ||z            |||          | _        d S )N)r7   rD   r?   rA   rG   )in_featureshidden_featuresout_features	act_layerdropTr1   r.   )r4   r5   norm_inr,   attn_innorm_mlp_inr   intmlp_in
norm1_projr:   r;   r@   norm_outattn_outr	   Identity	drop_pathnorm_mlpmlp)rB   rC   dim_out	num_pixelnum_heads_innum_heads_out	mlp_ratiorD   rA   r?   rs   rh   
norm_layerrE   s                r%   r5   zBlock.__init__S   sf    	!z# "
 
 
 &:c??aLL
 
 
 %*S//IcIowTBBB	 #
7++!#
 
 
 1:B),,,BKMM"
7++) 344 
 
 
r'   c                    ||                      |                     |                     |                              z   }||                      |                     |                     |                              z   }|                                \  }}}t          j        |d d ddf         |d d dd f         |                     | 	                    |          
                    ||dz
  d                    z   gd          }||                      |                     |                     |                              z   }||                      |                     |                     |                              z   }||fS )Nr   r   rH   rJ   )rs   rk   rj   rn   rl   sizetorchcatr@   ro   rL   rq   rp   ru   rt   )rB   pixel_embedpatch_embedrR   rS   rT   s         r%   rX   zBlock.forward   s]   !DNN4<<[@Y@Y3Z3Z$[$[[!DNN4;;t?O?OP[?\?\3]3]$^$^^""$$1aiAaC +aaae"4tyyQ\A]A]AeAefgijmninprAsAs7t7t"tu   "DNN4=={A[A[3\3\$]$]]!DNN488DMM+<V<V3W3W$X$XXK''r'   )
rZ   r[   r\   r]   r:   GELU	LayerNormr5   rX   r^   r_   s   @r%   ra   ra   P   sx          g|:
 :
 :
 :
 :
 :
x( ( ( ( ( ( (r'   ra   c                   *     e Zd ZdZd	 fd	Zd Z xZS )

PixelEmbedz Image to Pixel Embedding
    r      r   0   rG   c                    t                                                       t          |          }t          |          }|d         |d         z  |d         |d         z  f| _        | j        d         | j        d         z  }|| _        || _        || _        fd|D             }|| _        t          j	        || j        dd          | _
        t          j        ||          | _        d S )Nr   r   c                 >    g | ]}t          j        |z            S  )mathceil).0psstrides     r%   
<listcomp>z'PixelEmbed.__init__.<locals>.<listcomp>   s'    FFFR$)BK00FFFr'      r   )kernel_sizepaddingr   )r   r   )r4   r5   r   	grid_sizeimg_sizenum_patchesin_dimnew_patch_sizer:   Conv2dr@   Unfoldunfold)	rB   r   
patch_sizein_chansr   r   r   r   rE   s	        `  r%   r5   zPixelEmbed.__init__   s    X&&z**
"1+A6zRS}8TU~a(T^A->? &FFFF:FFF,IhAV\]]]	iN>RRRr'   c                    |j         \  }}}}t          || j        d         k    d| d| d| j        d          d| j        d          d	           t          || j        d         k    d| d| d| j        d          d| j        d          d	           |                     |          }|                     |          }|                    dd                              || j        z  | j        | j	        d         | j	        d                   }||z   }|                    || j        z  | j        d                              dd          }|S )	Nr   zInput image size (*z) doesn't match model (r   z).r0   rH   )
rK   r   r   r@   r   rO   rL   r   r   r   )rB   rQ   	pixel_posrR   rT   HWs          r%   rX   zPixelEmbed.forward   sd   W
1aT]1%%fffQfft}Q?OffRVR_`aRbfff	h 	h 	hT]1%%fffQfft}Q?OffRVR_`aRbfff	h 	h 	hIIaLLKKNNKK1%%a$*:&:DKI\]^I_aeatuvawxx	MIIa$**DK<<FFq!LLr'   )r   r   r   r   rG   rY   r_   s   @r%   r   r      s\         S S S S S S       r'   r   c                   N    e Zd ZdZdddddddd	d
d	dddddddej        d
f fd	Zd Zej	        j
        d             Zej	        j
        dd            Zej	        j
        dd            Zej	        j
        dej        fd            Zd dedee         fdZd ZddefdZd Z xZS )!r   zC Transformer in Transformer - https://arxiv.org/abs/2103.00112
    r   r   r   r   tokeni   r   rb   rG   rc   Fr.   c                    t                                                       |dv sJ || _        || _        |x| _        x| _        | _        d| _        t          |||||          | _	        | j	        j
        }|| _
        | j	        j        }|d         |d         z  } |||z            | _        t          j        ||z  |          | _         ||          | _        t          j        t%          j        dd|                    | _        t          j        t%          j        d|dz   |                    | _        t          j        t%          j        d||d         |d                             | _        t          j        |          | _        d t%          j        d||          D             }g }t5          |          D ]5}|                    t9          ||||	|
||||||         |                     6t          j        |          | _         ||          | _        t          j        |          | _         |dk    rt          j        ||          nt          j!                    | _"        tG          | j        d	
           tG          | j        d	
           tG          | j        d	
           | $                    | j%                   d S )Nr   r   avgF)r   r   r   r   r   r   r   )pc                 6    g | ]}|                                 S r   )item)r   rQ   s     r%   r   z TNT.__init__.<locals>.<listcomp>   s     JJJAqvvxxJJJr'   )rC   rv   rw   rx   ry   rz   rD   rA   r?   rs   r{   {Gz?r!   )&r4   r5   r   global_poolnum_featureshead_hidden_size	embed_dimgrad_checkpointingr   r   r   r   ro   r:   r;   r@   
norm2_proj	Parameterr~   zeros	cls_token	patch_posr   r>   pos_droplinspacerangeappendra   
ModuleListblocksnorm	head_droprr   r   r
   apply_init_weights)rB   r   r   r   r   r   r   	inner_dimdepthnum_heads_innernum_heads_outerrz   rD   	drop_ratepos_drop_rateproj_drop_rateattn_drop_ratedrop_path_rater{   first_strider   r   rw   dprr   irE   s                             r%   r5   zTNT.__init__   s   , 	22222&&ENNND1DN"'%!
 
 
 &2&)8"1%q(99	$*Y%:;;Ii)3Y??	$*Y//ek!Q	&B&BCCek![1_i&P&PQQek!Yq@QSabcSd&e&eff
]333JJ>5!I!IJJJu 	 	AMM%!#,-#!((a&%       mF++Jy))	I..9DqBIi555bkmm	dn#....dn#....dn#....

4%&&&&&r'   c                    t          |t          j                  rbt          |j        d           t          |t          j                  r.|j        )t          j                            |j        d           d S d S d S t          |t          j                  rLt          j                            |j        d           t          j                            |j        d           d S d S )Nr   r   r   g      ?)	
isinstancer:   r;   r
   weightr2   init	constant_r   )rB   ms     r%   r   zTNT._init_weights  s    a## 	-!(,,,,!RY'' -AF,>!!!&!,,,,,- -,>,>2<(( 	-Gafa(((Gah,,,,,	- 	-r'   c                 
    h dS )N>   r   r   r   r   rB   s    r%   no_weight_decayzTNT.no_weight_decay  s    6666r'   c                 ,    t          dddg          }|S )Nz=^cls_token|patch_pos|pixel_pos|pixel_embed|norm[12]_proj|proj)z^blocks\.(\d+)N)z^norm)i )stemr   )dict)rB   coarsematchers      r%   group_matcherzTNT.group_matcher  s+    Q)$
 
 
 r'   Tc                     || _         d S N)r   )rB   enables     r%   set_grad_checkpointingzTNT.set_grad_checkpointing&  s    "(r'   returnc                     | j         S r   )r   r   s    r%   get_classifierzTNT.get_classifier*  s
    yr'   Nr   r   c                     || _         ||dv sJ || _        |dk    rt          j        | j        |          nt          j                    | _        d S )Nr   r   )r   r   r:   r;   r   rr   r   )rB   r   r   s      r%   reset_classifierzTNT.reset_classifier.  sZ    &""66666*D>IAooBIdnk:::SUS^S`S`			r'   c                    |j         d         }|                     || j                  }|                     |                     |                     |                    || j        d                                        }t          j	        | j
                            |dd          |fd          }|| j        z   }|                     |          }| j        r=t          j                                        s| j        D ]}t%          |||          \  }}n| j        D ]} |||          \  }}|                     |          }|S )Nr   rH   r   rJ   )rK   r   r   r   r@   ro   rL   r   r~   r   r   expandr   r   r   jitis_scriptingr   r   r   )rB   rQ   rR   r   r   blks         r%   forward_featureszTNT.forward_features5  sG   GAJ&&q$.99oodii@S@STUW[Wgik@l@l0m0m&n&nooi!6!6q"b!A!A; OUVWWW!DN2mmK00" 	I59+A+A+C+C 	I{ U U+5c;+T+T([[U { I I+.3{K+H+H([[ii,,r'   
pre_logitsc                     | j         r9| j         dk    r"|d d dd f                             d          n|d d df         }|                     |          }|r|n|                     |          S )Nr   r   rJ   r   )r   r    r   r   )rB   rQ   r   s      r%   forward_headzTNT.forward_headH  sy     	O(,(8E(A(A!!!QRR%!$$$qAwANN10qqDIIaLL0r'   c                 Z    |                      |          }|                     |          }|S r   )r   r   )rB   rQ   s     r%   rX   zTNT.forwardN  s-    !!!$$a  r'   F)Tr   )rZ   r[   r\   r]   r:   r   r5   r   r~   r   ignorer   r   r   Moduler   rm   r   strr   r   boolr   rX   r^   r_   s   @r%   r   r      s         |)K' K' K' K' K' K'Z- - - Y7 7 7 Y    Y) ) ) ) Y	    a aC ahsm a a a a  &1 1$ 1 1 1 1      r'   c           	          | d         j         |j        j         k    r9t          | d         |j        t          |dd          |j        j                  | d<   | S )zJ convert patch embedding weight from manual patchify + linear proj to convr   
num_tokensr   )rK   r   r   getattrr   r   )
state_dictmodels     r%   checkpoint_filter_fnr   T  sX    +$(==="2:k3JOWUL!<<e>O>Y#[ #[
;r'   Fc                     |                     dd           rt          d          t          t          | |fdt          i|}|S )Nfeatures_onlyz<features_only not implemented for Vision Transformer models.pretrained_filter_fn)getRuntimeErrorr   r   r   )variant
pretrainedr$   r   s       r%   _create_tntr  \  s\    zz/4(( [YZZZ Wj 1  E Lr'   r   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nr   i     rb      Fr   r   r   r   r   rD   r)   r   )r)   r   r  r   r$   	model_cfgr   s       r%   r)   r)   g  sT    "a  I ``
`d9F_F_X^F_F_``ELr'   c           	      b    t          dddddd          }t          d
d	| it          |fi |}|S )Nr   i  (   rb   
   Fr  r*   r   )r*   r  r  s       r%   r*   r*   p  sT    "b  I ``
`d9F_F_X^F_F_``ELr'   )r   r   )$r]   r   typingr   r~   torch.nnr:   torch.utils.checkpointr   	timm.datar   r   timm.layersr   r	   r
   r   r   _builderr   	_registryr   vision_transformerr   __all__r&   default_cfgsr   r,   ra   r   r   r   r  r)   r*   r   r'   r%   <module>r     sz                  - - - - - - A A A A A A A A H H H H H H H H H H H H H H * * * * * * % % % % % % 0 0 0 0 0 0'    q/   /       	   BJ( J( J( J( J(BI J( J( J(Z       BS S S S S") S S Sl       S      S      r'   