
    Ng                        d Z ddlZddlmZmZmZmZ ddlZddl	Z	ddl
mZ ddlmc mZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lm Z m!Z! dgZ" G d dej#                  Z$ G d dej#                  Z% G d dej#                  Z& G d dej#                  Z' G d dej#                  Z( G d dej#                  Z)d Z*dJdZ+ G d dej#                  Z, G d dej#                  Z-	 	 	 	 	 	 	 	 dKd#Z.	 	 	 	 dLd$Z/ G d% dej#                  Z0dMd&Z1dNd(Z2 e! e2d)d*d+,           e2d)d-d.d/0           e2d)d1d+,           e2d)d2d.d/0           e2d)d3d+,           e2d)d4d.d50           e2d)d6d+,           e2d)d7d8d50           e2d)d9d+,           e2d)d:d8d50           e2d)d;d8d<0          d=          Z3e dMd>e0fd?            Z4e dMd>e0fd@            Z5e dMd>e0fdA            Z6e dMd>e0fdB            Z7e dMd>e0fdC            Z8e dMd>e0fdD            Z9e dMd>e0fdE            Z:e dMd>e0fdF            Z;e dMd>e0fdG            Z<e dMd>e0fdH            Z=e dMd>e0fdI            Z>dS )Oa5   Vision OutLOoker (VOLO) implementation

Paper: `VOLO: Vision Outlooker for Visual Recognition` - https://arxiv.org/abs/2106.13112

Code adapted from official impl at https://github.com/sail-sg/volo, original copyright in comment below

Modifications and additions for timm by / Copyright 2022, Ross Wightman
    N)ListOptionalTupleUnion)
checkpointIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathMlp	to_2tuple	to_ntupletrunc_normal_use_fused_attn   )build_model_with_cfg)feature_take_indices)register_modelgenerate_default_cfgsVOLOc                   2     e Zd Z	 	 	 	 	 	 d fd	Zd Z xZS )OutlookAttention   r   F        c	                    t                                                       ||z  }	|| _        || _        || _        || _        |	dz  | _        t          j        |||          | _	        t          j        ||dz  |z            | _
        t          j        |          | _        t          j        ||          | _        t          j        |          | _        t          j        |||          | _        t          j        ||d          | _        d S )N      ࿩bias   )kernel_sizepaddingstrideT)r    r"   	ceil_mode)super__init__	num_headsr    r!   r"   scalennLinearvattnDropout	attn_dropproj	proj_dropUnfoldunfold	AvgPool2dpool)selfdimr&   r    r!   r"   qkv_biasr-   r/   head_dim	__class__s             L/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/volo.pyr%   zOutlookAttention.__init__*   s     	)#"&%
3(333Ic;!#3i#?@@	I..Ic3''	I..iKQWXXXLVFdSSS			    c                    |j         \  }}}}|                     |                              dddd          }t          j        || j        z            t          j        || j        z            }}|                     |                              || j        || j        z  | j	        | j	        z  ||z                                ddddd          }| 
                    |                    dddd                                        dddd          }	|                     |	                              |||z  | j        | j	        | j	        z  | j	        | j	        z                                ddddd          }	|	| j        z  }	|	                    d          }	|                     |	          }	|	|z                      ddddd                              ||| j	        z  | j	        z  ||z            }t          j        |||f| j	        | j        | j                  }|                     |                    dddd                    }|                     |          }|S )	Nr   r   r      r   r5   )output_sizer    r!   r"   )shaper*   permutemathceilr"   r1   reshaper&   r    r3   r+   r'   softmaxr-   Ffoldr!   r.   r/   )
r4   xBHWCr*   hwr+   s
             r9   forwardzOutlookAttention.forwardG   s-   W
1aFF1IIaAq))yT[))49Q_+E+E1KKNN""t~qDN2t//Q8 88?1aA8N8N 	
 yy1aA..//771aCCyy&&q1udnd&69I&It//1 118Aq!Q1G1G 	 dj |||##~~d##AXq!Q1--55aT=M9MPTP`9`bcfgbghhF11a&d6FPTP\eiepqqqIIaii1a++,,NN1r:   )r   r   r   Fr   r   __name__
__module____qualname__r%   rO   __classcell__r8   s   @r9   r   r   (   sf         T T T T T T:      r:   r   c                   J     e Zd Zdddddej        ej        df fd	Zd Z xZS )	Outlookerr         @r   Fc           	         t                                                        |
|          | _        t          |||||||          | _        |dk    rt          |          nt          j                    | _         |
|          | _	        t          |t          ||z            |	          | _        |dk    rt          |          nt          j                    | _        d S )N)r    r!   r"   r6   r-   r   in_featureshidden_features	act_layer)r$   r%   norm1r   r+   r   r(   Identity
drop_path1norm2r   intmlp
drop_path2)r4   r5   r    r!   r"   r&   	mlp_ratior-   	drop_pathr]   
norm_layerr6   r8   s               r9   r%   zOutlooker.__init__c   s     	Z__
$#
 
 
	 2;R(9---R[]]Z__
i00
 
 

 2;R(9---R[]]r:   c                     ||                      |                     |                     |                              z   }||                     |                     |                     |                              z   }|S Nr`   r+   r^   rd   rc   ra   r4   rH   s     r9   rO   zOutlooker.forward   _    		$**Q-- 8 8999A 7 7888r:   	rQ   rR   rS   r(   GELU	LayerNormr%   rO   rT   rU   s   @r9   rW   rW   b   sq         g|!S !S !S !S !S !SF      r:   rW   c                   Z     e Zd ZU ej        j        e         ed<   	 	 	 	 d fd	Zd Z	 xZ
S )	Attention
fused_attn   Fr   c                 p   t                                                       || _        ||z  }|dz  | _        t	                      | _        t          j        ||dz  |          | _        t          j	        |          | _
        t          j        ||          | _        t          j	        |          | _        d S )Nr   r   r   )r$   r%   r&   r'   r   rr   r(   r)   qkvr,   r-   r.   r/   )r4   r5   r&   r6   r-   r/   r7   r8   s          r9   r%   zAttention.__init__   s     	")#%
(**9S#'999I..Ic3''	I..r:   c                    |j         \  }}}}|                     |                              |||z  d| j        || j        z                                ddddd          }|                    d          \  }}}	| j        r,t          j        |||	| j	        r| j
        j        nd          }nS|| j        z  }||                    dd	          z  }
|
                    d	
          }
| 
                    |
          }
|
|	z  }|                    dd                              ||||          }|                     |          }|                     |          }|S )Nr   r<   r   r   r   r   )	dropout_pr=   r>   )r@   ru   rD   r&   rA   unbindrr   rF   scaled_dot_product_attentiontrainingr-   pr'   	transposerE   r.   r/   )r4   rH   rI   rJ   rK   rL   ru   qkr*   r+   s              r9   rO   zAttention.forward   sO   W
1ahhqkk!!!QUAt~qDN?RSS[[\]_`bcefhijj**Q--1a? 
	.1a.2mC$.**  AA
 DJAq{{2r***D<<B<''D>>$''DqAKK1%%aAq11IIaLLNN1r:   )rs   Fr   r   )rQ   rR   rS   torchjitFinalbool__annotations__r%   rO   rT   rU   s   @r9   rq   rq      sq         	%%%%
 / / / / / /&      r:   rq   c                   F     e Zd Zddddej        ej        f fd	Zd Z xZS )Transformer      @Fr   c	                    t                                                        ||          | _        t          ||||          | _        |dk    rt          |          nt          j                    | _         ||          | _	        t          |t          ||z            |          | _        |dk    rt          |          nt          j                    | _        d S )N)r&   r6   r-   r   rZ   )r$   r%   r^   rq   r+   r   r(   r_   r`   ra   r   rb   rc   rd   )
r4   r5   r&   re   r6   r-   rf   r]   rg   r8   s
            r9   r%   zTransformer.__init__   s     	Z__
cYU^___	1:R(9---R[]]Z__
3C)O8L8LXabbb1:R(9---R[]]r:   c                     ||                      |                     |                     |                              z   }||                     |                     |                     |                              z   }|S ri   rj   rk   s     r9   rO   zTransformer.forward   rl   r:   rm   rU   s   @r9   r   r      sj         g|S S S S S S(      r:   r   c                   0     e Zd Z	 	 	 	 	 d fd	Zd Z xZS )ClassAttentionrs   NFr   c                    t                                                       || _        ||| _        n||z  }|| _        |dz  | _        t          j        || j        | j        z  dz  |          | _        t          j        || j        | j        z  |          | _        t          j	        |          | _
        t          j        | j        | j        z  |          | _        t          j	        |          | _        d S )Nr   r<   r   )r$   r%   r&   r7   r'   r(   r)   kvr~   r,   r-   r.   r/   )r4   r5   r&   r7   r6   r-   r/   r8   s          r9   r%   zClassAttention.__init__   s     	"$DMMi'H$DM%
)C!?!!C(SSS3 >XNNNI..Idmdn<cBB	I..r:   c                    |j         \  }}}|                     |                              ||d| j        | j                                      ddddd          }|                    d          \  }}|                     |d d d dd d f                                       || j        d| j                  | j        z  }||	                    dd          z  }	|	
                    d          }	|                     |	          }	|	|z  	                    dd                              |d| j        | j        z            }
|                     |
          }
|                     |
          }
|
S )	Nr<   r   r   r   r   rx   r=   r>   )r@   r   rD   r&   r7   rA   ry   r~   r'   r}   rE   r-   r.   r/   )r4   rH   rI   NrL   r   r   r*   r~   r+   	cls_embeds              r9   rO   zClassAttention.forward   sD   '1aWWQZZ1aGGOOPQSTVWYZ\]^^yy||1FF1QQQAAAX;''4>1dmLLtzY1;;r2&&&|||##~~d##AX((A..66q!T]T^=[\\	IIi((	NN9--	r:   )rs   NFr   r   rP   rU   s   @r9   r   r      s]        
 / / / / / /0      r:   r   c                   J     e Zd Zddddddej        ej        f fd	Zd Z xZS )
ClassBlockNr   Fr   c                    t                                                        |
|          | _        t          ||||||          | _        |dk    rt          |          nt          j                    | _         |
|          | _	        t          |t          ||z            |	|          | _        |dk    rt          |          nt          j                    | _        d S )N)r&   r7   r6   r-   r/   r   )r[   r\   r]   drop)r$   r%   r^   r   r+   r   r(   r_   r`   ra   r   rb   rc   rd   )r4   r5   r&   r7   re   r6   r   r-   rf   r]   rg   r8   s              r9   r%   zClassBlock.__init__  s     	Z__
"
 
 
	 2;R(9---R[]]Z__
i00	
 
 
 2;R(9---R[]]r:   c                 ^   |d d d df         }||                      |                     |                     |                              z   }||                     |                     |                     |                              z   }t          j        ||d d dd f         gd          S )Nr   r>   )r`   r+   r^   rd   rc   ra   r   cat)r4   rH   r   s      r9   rO   zClassBlock.forward&  s    aaa!eH			$**Q--0H0H I II	I9N9N0O0O P PP	y)Qqqq!""uX.A6666r:   rm   rU   s   @r9   r   r     sq         g| S  S  S  S  S  SD7 7 7 7 7 7 7r:   r   c                 *    | dk    rt          di |S d S )Nca )r   )
block_typekargss     r9   	get_blockr   -  s)    T""E""" r:   c                 8   | d         |z  }| d         |z  }t          j        d|z
            }||z                      t                    }||z                      t                    }t           j                            |          }t           j                            |          }	t          j        ||dz  z
  d|          }
t          j        |	|dz  z
  d|          }t          j        ||dz  z   d|          }t          j        |	|dz  z   d|          }|
|||fS )zt
    get bounding box as token labeling (https://github.com/zihangJiang/TokenLabeling)
    return: bounding box
    r   r<         ?r   )npsqrtastyperb   randomrandintclip)sizelamr'   rK   rJ   cut_ratcut_wcut_hcxcybbx1bby1bbx2bby2s                 r9   	rand_bboxr   2  s   
 	Q5AQ5Agb3hG[  %%E[  %%E 
		1		B			1		B72
?Aq))D72
?Aq))D72
?Aq))D72
?Aq))DtT!!r:   c                   8     e Zd ZdZ	 	 	 	 	 	 	 d fd		Zd
 Z xZS )
PatchEmbedzs Image to Patch Embedding.
    Different with ViT use 1 conv layer, we use 4 conv layers to do patch embedding
       Fr   rs   r   @     c                 ~   t                                                       |dv sJ |rt          j        t          j        ||d|dd          t          j        |          t          j        d          t          j        ||dddd          t          j        |          t          j        d          t          j        ||dddd          t          j        |          t          j        d          	  	        | _        nd | _        t          j        ||||z  ||z  	          | _        ||z  ||z  z  | _	        d S )
N)r   rs         r   F)r    r"   r!   r   T)inplacer   r    r"   )
r$   r%   r(   
SequentialConv2dBatchNorm2dReLUconvr.   num_patches)	r4   img_size	stem_convstem_stride
patch_sizein_chans
hidden_dim	embed_dimr8   s	           r9   r%   zPatchEmbed.__init__N  sJ    	Z'''' 		(JAk[\chiiiz**%%%	*jaST[`aaaz**%%%	*jaST[`aaaz**%%%
 
DII DII	z[/HQ[_jQjl l l	$
2x:7MNr:   c                 h    | j         |                      |          }|                     |          }|S ri   )r   r.   rk   s     r9   rO   zPatchEmbed.forwardm  s-    9 		!AIIaLLr:   )r   Fr   rs   r   r   r   rQ   rR   rS   __doc__r%   rO   rT   rU   s   @r9   r   r   I  ss          O O O O O O>      r:   r   c                   *     e Zd ZdZd fd	Zd Z xZS )
DownsamplezF Image to Patch Embedding, downsampling between stage1 and stage2
    r<   c                     t                                                       t          j        ||||          | _        d S )Nr   )r$   r%   r(   r   r.   )r4   in_embed_dimout_embed_dimr   r8   s       r9   r%   zDownsample.__init__x  s8    IlMzZdeee			r:   c                     |                     dddd          }|                     |          }|                     dddd          }|S )Nr   r   r   r<   )rA   r.   rk   s     r9   rO   zDownsample.forward|  sD    IIaAq!!IIaLLIIaAq!!r:   )r<   r   rU   s   @r9   r   r   t  s\         f f f f f f      r:   r   r   r<   rX   Fr   c                    g }t          ||                   D ]Y}||t          |d|                   z   z  t          |          dz
  z  }|                     | |||||||	|
|	  	                   Zt          j        | }|S )zI
    generate outlooker layer in stage1
    return: outlooker layers
    Nr   )r    r!   r"   r&   re   r6   r-   rf   rangesumappendr(   r   )block_fnindexr5   layersr&   r    r!   r"   re   r6   r-   drop_path_ratekwargsblocks	block_idx	block_dprs                   r9   outlooker_blocksr     s    & F6%=))  	"i#fVeVn2E2E&EF#f++XY/Z	hh#

 

 

 
	 
	 
	 
	 ]F#FMr:   c	                     g }
t          ||                   D ]V}||t          |d|                   z   z  t          |          dz
  z  }|
                     | ||||||                     Wt          j        |
 }
|
S )zN
    generate transformer layers in stage2
    return: transformer layers
    Nr   )re   r6   r-   rf   r   )r   r   r5   r   r&   re   r6   r-   r   r   r   r   r   s                r9   transformer_blocksr     s      F6%=)) 	 		"i#fVeVn2E2E&EF#f++XY/Z	hh
 
 
 	 	 	 	 ]F#FMr:   c                   T    e Zd ZdZddddddddd	d	d
dddddej        ddddf fd	Zd Zej	        j
        d             Zej	        j
        d-d            Zej	        j
        d.d            Zej	        j
        dej        fd            Zd/dedee         fdZd Zd Zd Z	 	 	 	 	 d0dej        deeeee         f                  d ed!ed"ed#edeeej                 eej        eej                 f         f         fd$Z	 	 	 d1deeee         f         d&ed'efd(Zd) Zd-d*efd+Zd, Z  xZ!S )2r   z7
    Vision Outlooker, the main class of our model
    r   r     tokenrs   r   N)TFFFrX   Fr   )r   r   Tr<   c                 0  	 t                                                       t          |          } t          |                    t	          |          }|| _        || _        || _        || _        d         x| _	        | _
        |rd| _        |dk    s
J d            d| _        t          dd|||d         	          | _        |}|d         |z  |z  |d
         |z  |z  f}t          j        t#          j        d
|d         |d
         d                             | _        t          j        |          | _        g | _        g | _        g }d}t1          t          |                    D ]}||         r0t3          t4          ||         |	|         |         	  	        }n0t7          t8          ||         |	|         |         |
  
        }|                    |           | j                            |           | j                            t=          |         |d|                      |d
z  }|
|         r=|                    t?          |         |d
z            d                     |dz  }|d
z  }t          j         |          | _!        d | _"        t          j         	fdt1          t                              D                       | _"        t          j        t#          j        d
d
d                             | _#        tI          | j#        d           |r9|dk    rt          j%        | j	        |          nt          j&                    | _'        nd | _'         | j	                  | _(        t          j        |          | _)        |dk    rt          j%        | j	        |          nt          j&                    | _*        tI          | j        d           | +                    | j,                   d S )Nr=   r   r   z)return all tokens if mix_token is enabledFTr<   r   )r   r   r   r   r   r   r   )r|   )re   r6   r-   rg   )re   r6   r   r-   rg   znetwork.)num_chs	reductionmodulec                 n    g | ]1}t          |         d          d          d          d          2S )r=   r   )r5   r&   re   r6   r-   rf   rg   )r   )	.0iattn_drop_rate
embed_dimsre   rg   r&   post_layersr6   s	     r9   
<listcomp>z!VOLO.__init__.<locals>.<listcomp>3  sb     / / /  N"2'm'm%, )+ + +/ / /r:   {Gz?std)-r$   r%   lenr   r   num_classesglobal_pool	mix_tokenpooling_scalenum_featureshead_hidden_sizebetagrad_checkpointingr   patch_embedr(   	Parameterr   zeros	pos_embedr,   pos_drop
stage_endsfeature_infor   r   rW   r   r   r   dictr   
ModuleListnetworkpost_network	cls_tokenr   r)   r_   aux_headnorm	head_dropheadapply_init_weights)r4   r   r   r   r   r   r   stem_hidden_dimr   r&   downsamplesoutlook_attentionre   r6   	drop_ratepos_drop_rater   r   rg   r   use_aux_headuse_mix_tokenr   
num_layersr
patch_gridr  r   r   stager8   s           ``  ``  ` ``          r9   r%   zVOLO.__init__  sU   2 	[[
)Ij)))44	X&&&&&*4>rNBD1 	WDI')))+V)))"'%!& m
 
 
  qkZ/=@(1+Q[B[_lBlm
ek!Z]JqMS]^`Sa&b&bcc
]333 	s6{{## $	 $	A # (qMaL'l%,)
 
 
 +qMaL'l%#1,)   NN5!!!O""9---$$T*Q-1Uk`iUkUk%l%l%lmmmNI1~ z*Q-AE9JANNOOOQQ	}W-- !" " / / / / / / / / / / s;//00/ / / ! !D  \%+aJrN*K*KLLDN$.c2222  	!ITWXBId&7EEE^`^i^k^kDMM DMJt011	 I..ALqBId/===VXVaVcVc	dn#....

4%&&&&&r:   c                     t          |t          j                  r^t          |j        d           t          |t          j                  r0|j        +t          j                            |j        d           d S d S d S d S )Nr   r   r   )
isinstancer(   r)   r   weightr   init	constant_)r4   ms     r9   r  zVOLO._init_weightsP  s    a## 	-!(,,,,!RY'' -AF,>!!!&!,,,,,	- 	-- -,>,>r:   c                 
    ddhS )Nr   r  r   r4   s    r9   no_weight_decayzVOLO.no_weight_decayV  s    [))r:   c                 .    t          dddgg d          S )Nz ^cls_token|pos_embed|patch_embed)z^network\.(\d+)\.(\d+)N)z^network\.(\d+)r   ))z
^cls_tokenr"  )z^post_network\.(\d+)N)z^norm)i )stemr   blocks2)r  )r4   coarses     r9   group_matcherzVOLO.group_matcherZ  s6    41*  
 
 
 	
r:   c                     || _         d S ri   )r   )r4   enables     r9   set_grad_checkpointingzVOLO.set_grad_checkpointingi  s    "(r:   returnc                     | j         S ri   )r
  r  s    r9   get_classifierzVOLO.get_classifierm  s
    yr:   r   r   c                    || _         ||| _        |dk    rt          j        | j        |          nt          j                    | _        | j        :|dk    rt          j        | j        |          nt          j                    | _        d S d S )Nr   )r   r   r(   r)   r   r_   r
  r  )r4   r   r   s      r9   reset_classifierzVOLO.reset_classifierq  s    &"*DALqBId/===VXVaVcVc	=$ITWXBId&7EEE^`^i^k^kDMMM %$r:   c                 L   t          | j                  D ]k\  }}|dk    r|| j        z   }|                     |          }| j        r/t
          j                                        st          ||          }` ||          }l|j	        \  }}}}|
                    |d|          }|S )Nr<   r=   )	enumerater  r   r   r   r   r   is_scriptingr   r@   rD   )r4   rH   idxblockrI   rJ   rK   rL   s           r9   forward_tokenszVOLO.forward_tokensy  s    #DL11 	 	JCaxx&MM!$$& uy/E/E/G/G ua((E!HHW
1aIIaQr:   c                    |j         \  }}}| j                            |dd          }t          j        ||gd          }| j        D ]C}| j        r/t          j                                        st          ||          }8 ||          }D|S )Nr=   r   r>   )
r@   r  expandr   r   r  r   r   r1  r   )r4   rH   rI   r   rL   
cls_tokensr3  s          r9   forward_clszVOLO.forward_cls  s    '1a^**1b"55
Iz1o1---& 	 	E& uy/E/E/G/G ua((E!HHr:   c                    |                      |          }|                    dddd          }| j        r| j        rt          j                            | j        | j                  }|j        d         | j        z  |j        d         | j        z  }}t          |
                                || j                  \  }}}}|                                }	| j        |z  | j        |z  }}
| j        |z  | j        |z  }}|                    d          dd|
|||ddf         |	dd|
|||ddf<   |	}nd\  }}}}|                     |          }| j        |                     |          }|                     |          }| j        dk    r|                    d	          }n| j        d
k    r|dddf         }n|}| j        |S |                     |ddddf                   }| j        s!|d|                    d          d         z  z   S | j        r| j        r|                    |j        d         |||j        d                   }|                                }	|                    d          dd||||ddf         |	dd||||ddf<   |	}|                    |j        d         ||z  |j        d                   }||||||ffS )z A separate forward fn for training with mix_token (if a train script supports).
        Combining multiple modes in as single forward with different return types is torchscript hell.
        r   r<   r   r   )r'   N)r   r   r   r   avgr>   r         ?r=   )r   rA   r   r{   r   r   r   r@   r   r   r   cloneflipr4  r  r8  r  r   meanr  maxrD   )r4   rH   r   patch_hpatch_wr   r   r   r   temp_xsbbx1sbby1sbbx2sbby2x_clsx_auxs                   r9   forward_trainzVOLO.forward_train  s    QIIaAq!! > 
	0dm 
	0)..DI66C wqzT-??tOaAaWG%.qvvxxDDV%W%W%W"D$dWWYYF-4d6H46O5E-4d6H46O5E56VVAYYqqq%+uUZ{\]\]\]?]5^F111eEk5;12AA%/"D$d "" (  ##AIIaLLu$$FFqFMMEE((aaadGEEE= La122h''} 	131a000> 	Vdm 	VMM%+a.'7EKPROTTE[[]]F16Aqqq$t)TRVYXYXYXY?Y1ZF111d4idAAA-.EMM%+a.'G2CU[QS_UUE edD$555r:   NCHWrH   indicesr  
stop_early
output_fmtintermediates_onlyc           	      f    |dv s
J d            g }t          t           j                  |          \  }}	 fd|D             } j        |	         }	|j        \  }
}}}                     |                              dddd          }t          j                                        s|s j	        }n j	        d|	dz            }t          |          D ]\  }}|dk    r| j        z   }                     |          } ||          }||v rK|r|dk    r                     |          }n|}|                    |                    dddd                     |r|S |j        \  }
}}}|                    |
d	|          } j                             |          }                     |          }||fS )
a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )rJ  zOutput format must be NCHW.c                 *    g | ]}j         |         S r   )r   )r   r   r4   s     r9   r   z.VOLO.forward_intermediates.<locals>.<listcomp>  s     AAAq*AAAr:   r   r<   r   r   Nr=   )r   r   r   r@   r   rA   r   r   r1  r  r0  r   r   r  r   rD   r  r8  )r4   rH   rK  r  rL  rM  rN  intermediatestake_indices	max_indexrI   _heightwidthr  r2  r3  x_interrJ   rK   rL   s   `                    r9   forward_intermediateszVOLO.forward_intermediates  s   * Y&&&(E&&&"6s4?7K7KW"U"UiAAAALAAAOI.	  g1feQ''1a33 9!!## 	3: 	3lGGl>IM>2G#G,, 	B 	BJCaxx&MM!$$aAl""  C1HH"iillGGG$$W__Q1a%@%@AAA 	!   W
1aIIaQ(  ##AIIaLL-r:   r   
prune_norm
prune_headc                 ,   t          t          | j                  |          \  }}| j        |         }| j        d|dz            | _        |rt	          j                    | _        |r.t	          j                    | _        | 	                    dd           |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )
r   r   r   r  r(   r_   r  r  r  r.  )r4   rK  rY  rZ  rR  rS  s         r9   prune_intermediate_layerszVOLO.prune_intermediate_layers  s     #7s4?7K7KW"U"UiOI.	|NY]N3 	&DI 	) "D!!!R(((r:   c                     |                      |                              dddd          }|                     |          }| j        |                     |          }|                     |          }|S )Nr   r<   r   r   )r   rA   r4  r  r8  r  rk   s     r9   forward_featureszVOLO.forward_features  sp    Q''1a33 "" (  ##AIIaLLr:   
pre_logitsc                 l   | j         dk    r|                    d          }n| j         dk    r|d d df         }n|}|                     |          }|r|S |                     |          }| j        B|                     |d d dd f                   }|d|                    d          d         z  z   }|S )Nr:  r   r>   r   r   r;  )r   r>  r	  r
  r  r?  )r4   rH   r`  outauxs        r9   forward_headzVOLO.forward_head#  s    u$$&&Q&--CC((AAAqD'CCCNN1 	Jiinn=$--!!!QRR%))Ccggajjm++C
r:   c                 Z    |                      |          }|                     |          }|S )z1 simplified forward (without mix token training) )r_  rd  rk   s     r9   rO   zVOLO.forward4  s-    !!!$$a  r:   F)Tri   )NFFrJ  F)r   FT)"rQ   rR   rS   r   r(   ro   r%   r  r   r   ignorer   r&  r)  Moduler,  rb   r   strr.  r4  r8  rI  Tensorr   r   r   r   rX  r]  r_  rd  rO   rT   rU   s   @r9   r   r     s         39|$/B' B' B' B' B' B'H- - - Y* * * Y
 
 
 
 Y) ) ) ) Y	    l lC lhsm l l l l  	 	 	26 26 26n 8<$$',<  < |<  eCcN34<  	< 
 <  <  !%<  
tEL!5tEL7I)I#JJ	K<  <  <  < @ ./$#	 3S	>*  	   $
 
 
 $    "      r:   c                 x    |                     dd          }t          t          | |fdt          |d          i|S )Nout_indicesr   feature_cfggetter)rl  feature_cls)popr   r   r  )variant
pretrainedr   rl  s       r9   _create_volors  ;  sV    **]A..K  [hGGG	
   r:   r\  c                 6    | ddd dddt           t          ddd|S )	Nr   )r   r   r   Q?bicubicTzpatch_embed.conv.0)r
  r  )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer>  r   
first_conv
classifierr   )rw  r   s     r9   _cfgr  F  s8    =t)%.B*:N   r:   ztimm/zLhttps://github.com/sail-sg/volo/releases/download/volo_1/d1_224_84.2.pth.tarru  )	hf_hub_idrw  rz  zLhttps://github.com/sail-sg/volo/releases/download/volo_1/d1_384_85.2.pth.tarr   )r   r   r   )r  rw  rz  rx  zLhttps://github.com/sail-sg/volo/releases/download/volo_1/d2_224_85.2.pth.tarzLhttps://github.com/sail-sg/volo/releases/download/volo_1/d2_384_86.0.pth.tarzLhttps://github.com/sail-sg/volo/releases/download/volo_1/d3_224_85.4.pth.tarzLhttps://github.com/sail-sg/volo/releases/download/volo_1/d3_448_86.3.pth.tar)r     r  zLhttps://github.com/sail-sg/volo/releases/download/volo_1/d4_224_85.7.pth.tarzMhttps://github.com/sail-sg/volo/releases/download/volo_1/d4_448_86.79.pth.targffffff?zMhttps://github.com/sail-sg/volo/releases/download/volo_1/d5_224_86.10.pth.tarzLhttps://github.com/sail-sg/volo/releases/download/volo_1/d5_448_87.0.pth.tarzMhttps://github.com/sail-sg/volo/releases/download/volo_1/d5_512_87.07.pth.tar)r      r  )zvolo_d1_224.sail_in1kzvolo_d1_384.sail_in1kzvolo_d2_224.sail_in1kzvolo_d2_384.sail_in1kzvolo_d3_224.sail_in1kzvolo_d3_448.sail_in1kzvolo_d4_224.sail_in1kzvolo_d4_448.sail_in1kzvolo_d5_224.sail_in1kzvolo_d5_448.sail_in1kzvolo_d5_512.sail_in1kr*  c                 B    t          ddddd|}t          dd| i|}|S )	 VOLO-D1 model, Params: 27M r   r   rs   r<      r   r   r         r  r  r   r   r&   volo_d1_224rr  r   )r  r  rs  rr  r   
model_argsmodels       r9   r  r    @     p\6JVeppioppJLL:LLLELr:   c                 B    t          ddddd|}t          dd| i|}|S )	r  r  r  r  r  volo_d1_384rr  r   )r  r  r  s       r9   r  r    r  r:   c                 B    t          ddddd|}t          dd| i|}|S )	 VOLO-D2 model, Params: 59M r  r   
   r      r  r  r  rs   r   r   r   r  volo_d2_224rr  r   )r  r  r  s       r9   r  r    @     q]7KWfqqjpqqJLL:LLLELr:   c                 B    t          ddddd|}t          dd| i|}|S )	r  r  r  r  r  volo_d2_384rr  r   )r  r  r  s       r9   r  r    r  r:   c                 B    t          ddddd|}t          dd| i|}|S )	 VOLO-D3 model, Params: 86M rs   rs   r   r   r  r  r  volo_d3_224rr  r   )r  r  r  s       r9   r  r    r  r:   c                 B    t          ddddd|}t          dd| i|}|S )	r  r  r  r  r  volo_d3_448rr  r   )r  r  r  s       r9   r  r    r  r:   c                 B    t          ddddd|}t          dd| i|}|S )	 VOLO-D4 model, Params: 193M r  r      r  r  r  r   r   r   r  volo_d4_224rr  r   )r  r  r  s       r9   r  r    @     r]7KWgrrkqrrJLL:LLLELr:   c                 B    t          ddddd|}t          dd| i|}|S )	r  r  r  r  r  volo_d4_448rr  r   )r  r  r  s       r9   r  r    r  r:   c           	      F    t          d	dddddd|}t          d
d| i|}|S )h VOLO-D5 model, Params: 296M
    stem_hidden_dim=128, the dim in patch embedding is 128 for VOLO-D5
    r  r     r   r  r  r      r   r   r&   re   r  volo_d5_224rr  r   )r  r  r  s       r9   r  r    R    
  4+?K[S4 4,24 4J LL:LLLELr:   c           	      F    t          d	dddddd|}t          d
d| i|}|S )r  r  r  r  r   r  r  volo_d5_448rr  r   )r  r  r  s       r9   r  r    r  r:   c           	      F    t          d	dddddd|}t          d
d| i|}|S )r  r  r  r  r   r  r  volo_d5_512rr  r   )r  r  r  s       r9   r  r    r  r:   )r   )r   r   r   r<   rX   Fr   r   )rX   Fr   r   rf  )r\  )?r   rB   typingr   r   r   r   numpyr   r   torch.nnr(   torch.nn.functional
functionalrF   torch.utils.checkpointr   	timm.datar	   r
   timm.layersr   r   r   r   r   r   _builderr   	_featuresr   	_registryr   r   __all__rh  r   rW   rq   r   r   r   r   r   r   r   r   r   r   rs  r  default_cfgsr  r  r  r  r  r  r  r  r  r  r  r   r:   r9   <module>r     s   *  / / / / / / / / / / / /                     - - - - - - A A A A A A A A Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z * * * * * * + + + + + + < < < < < < < <(7 7 7 7 7ry 7 7 7t' ' ' ' '	 ' ' 'T, , , , ,	 , , ,^    ")   8( ( ( ( (RY ( ( (V(7 (7 (7 (7 (7 (7 (7 (7V# # #
" " " ".( ( ( ( ( ( ( (V       ( " " " "V    >q q q q q29 q q qh       %$!TZ   "TZ0 0 0 "TZ   "TZ0 0 0 "TZ   "TZ0 0 0 "TZ   "T[-1 1 1 "T[   "TZ-1 1 1 "T[-1 1 1S-& -& - -`  t      t      t      t      t      t      t      t      t      t      t      r:   