
    Ng2              
       |   d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmc mZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZm Z m!Z! dd	l"m#Z#m$Z$ dd
l%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, dgZ-de.de
ej/                 fdZ0e*dee.         dej1        dej1        fd            Z2dej1        dee.         dee.         dej1        fdZ3 G d dej/                  Z4 G d dej/                  Z5 G d dej/                  Z6 G d d ej/                  Z7 G d! d"ej/                  Z8 G d# dej/                  Z9dNd%Z: e#i d& e:d'd()          d* e:d'd(d+          d, e:d'd()          d- e:d'd(d+          d. e:d'd()          d/ e:d'd(d+          d0 e:d'd()          d1 e:d'd(d+          d2 e:d'd()          d3 e:d'd(d+          d4 e:d'd()          d5 e:d'd(d+          d6 e:d'd7d89          d: e:d'd7d89          d; e:d'd<d7d8=          d> e:d'd<d7d8=          d? e:d7d8@                    Z;dOdAZ<dPdCe=dDe>de9fdEZ?e$dPdF            Z@e$dPdG            ZAe$dPdH            ZBe$dPdI            ZCe$dPdJ            ZDe$dPdK            ZEe$dPdL            ZFe$dPdM            ZGdS )Qzr An PyTorch implementation of Hiera

Adapted for timm from originals at https://github.com/facebookresearch/hiera
    N)partial)CallableDictListOptionalTupleTypeUnion)
checkpointIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
DropPathMlp
LayerScaleClNormMlpClassifierHeaduse_fused_attn_assertget_norm_layer	to_2tupleinit_weight_vitinit_weight_jax   )generate_default_cfgsregister_model)build_model_with_cfg)feature_take_indices)register_notrace_function)named_applyHieranreturnc                 j    t           j        t           j        t           j        t           j        g|          S )z
    Returns a conv with nd (e.g., Conv2d for n=2). Work up to n=3.
    If you wanted a 4d Hiera, you could probably just implement this for n=4. (no promises)
    )nnIdentityConv1dConv2dConv3d)r!   s    M/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/models/hiera.pyconv_ndr*   0   s     
 KBIry9!<<    target_sizemaskc                     ||S t          t          |j        dd                    t          |           k    d           |j        dd          | k    r(t          j        |                                |           S |S )N   z.mask spatial shape and target_size must match.)size)r   lenshapeFinterpolatefloatr,   r-   s     r)   get_resized_maskr7   8   su     |C
1223{#3#335efffz!""~$$}TZZ\\<<<<Kr+   xr2   mu_shapec                    t          |          }| j        d         | j        d         }}d t          ||          D             } | j        |g|||R  } dgt	          d t          t          dd|z             t          d|z   dd|z  z                       D             g           z   t          | j                  dz
  gz   } |                     |          j        |g||R  } | S )a  
    Restore spatial organization by undoing windowed organization of mask units.

    Args:
        x: organized by mask units windows, e.g. in 2d [B, #MUy*#MUx, MUy, MUx, C]
        shape: current spatial shape, if it were not organized into mask unit
            windows, e.g. in 2d [B, #MUy*MUy, #MUx*MUx, C].
        mu_shape: current mask unit shape, e.g. in 2d [MUy, MUx]
    Returns:
        x: e.g. in 2d, [B, #MUy*MUy, #MUx*MUx, C]
    r   c                     g | ]
\  }}||z  S  r=   ).0smus      r)   
<listcomp>z"undo_windowing.<locals>.<listcomp>X   s     99951bqBw999r+   c                 ,    g | ]}t          |          S r=   listr>   ps     r)   rA   z"undo_windowing.<locals>.<listcomp>^   s    NNN1tAwwNNNr+   r   r/   )r1   r2   zipviewsumrangepermutereshape)r8   r2   r9   DBCnum_MUsrK   s           r)   undo_windowingrQ   E   s	     	E

A71:qwr{qA99Cx$8$8999Gq)7)X)q)))A 

NNE!QUOOU1q5!a!e)5L5L M MNNNPR
S
S	Tqw<<!
	 
 	#		'"10u0a000AHr+   c            	            e Zd ZdZdeedf         deedf         deeedf                  f fdZdej	        dej	        fd	Z
 xZS )
Unrolla>  
    Reorders the tokens such that patches are contiguous in memory.
    E.g., given [B, (H, W), C] and stride of (Sy, Sx), this will re-order the tokens as
                           [B, (Sy, Sx, H // Sy, W // Sx), C]

    This allows operations like Max2d to be computed as x.view(B, Sx*Sy, -1, C).max(dim=1).
    Not only is this faster, but it also makes it easy to support inputs of arbitrary
    dimensions in addition to patch-wise sparsity.

    Performing this operation multiple times in sequence puts entire windows as contiguous
    in memory. For instance, if you applied the stride (2, 2) 3 times, entire windows of
    size 8x8 would be contiguous in memory, allowing operations like mask unit attention
    computed easily and efficiently, while also allowing max to be applied sequentially.

    Note: This means that intermediate values of the model are not in HxW order, so they
    need to be re-rolled if you want to use the intermediate values as a HxW feature map.
    The last block of the network is fine though, since by then the strides are all consumed.
    
input_size.patch_strideunroll_schedulec                     t                                                       d t          ||          D             | _        || _        d S )Nc                     g | ]
\  }}||z  S r=   r=   r>   ir?   s      r)   rA   z#Unroll.__init__.<locals>.<listcomp>        FFF1Q!VFFFr+   )super__init__rG   r0   schedule)selfrT   rU   rV   	__class__s       r)   r]   zUnroll.__init__z   sE     	FFJ(E(EFFF	'r+   r8   r"   c           	         |j         \  }}}| j        } |j        |g|z   |gz    }| j        D ]}d t	          ||          D             }|gt          d t	          ||          D             g           z   |gz   }|                    |          }t          |          }dgt          t          d|dz
  d                    z   t          t          d|dz
  d                    z   |dz
  gz   }	|	                    |	          }|
                    dt          |                    }|t          j        |          z  }|                    dt          j        | j                  |          }|S )z
        Input: Flattened patch embeddings [B, N, C]
        Output: Patch embeddings [B, N, C] permuted such that [B, 4, N//4, C].max(1) etc. performs MaxPoolNd
        c                     g | ]
\  }}||z  S r=   r=   rY   s      r)   rA   z"Unroll.forward.<locals>.<listcomp>   s     BBB41aQBBBr+   c                     g | ]	\  }}||g
S r=   r=   rY   s      r)   rA   z"Unroll.forward.<locals>.<listcomp>   s     "M"M"MdaAq6"M"M"Mr+   r   r/   r   r;   )r2   r0   rH   r^   rG   rI   r1   rD   rJ   rK   flattenmathprodrL   )
r_   r8   rN   _rO   cur_sizestrides	new_shapeLrK   s
             r)   forwardzUnroll.forward   sr   
 '1a9AFaS8^qc)+} 	$ 	$G
 CB3x+A+ABBBHc"M"Mc(G6L6L"M"M"MrRRRVWUXXIy!!A IAcDq!a%!3!3444tE!QUA<N<N7O7OOSTWXSXRYYG		'""A 		!S\\**A7###AAIIb$)DI..22r+   __name__
__module____qualname____doc__r   intr   r]   torchTensorrl   __classcell__r`   s   @r)   rS   rS   f   s         &(c3h(  S/( "%S/2	( ( ( ( ( ( %,        r+   rS   c            
            e Zd ZdZdeedf         deedf         deeedf                  dee         def
 fdZ	 dd
ej	        dedej	        dej	        fdZ
 xZS )RerollzQ
    Undos the "unroll" operation so that you can use intermediate features.
    rT   .rU   rV   
stage_endsq_poolc                    t                                                       d t          ||          D             | _        i | _        | j        }t          |d         dz             D ]W}||f| j        |<   ||d |         v r=t          |          dk    r d t          ||d                   D             }|dd          }Xd S )Nc                     g | ]
\  }}||z  S r=   r=   rY   s      r)   rA   z#Reroll.__init__.<locals>.<listcomp>   r[   r+   r;   r   r   c                     g | ]
\  }}||z  S r=   r=   )r>   r!   r?   s      r)   rA   z#Reroll.__init__.<locals>.<listcomp>   s     MMMtq!AFMMMr+   )r\   r]   rG   r0   r^   rJ   r1   )	r_   rT   rU   rV   ry   rz   r0   rZ   r`   s	           r)   r]   zReroll.__init__   s     	FFJ(E(EFFF	 yz"~)** 	6 	6A.4DM!Jww'''''!++MMs49K/L/LMMMD"1!"""5	6 	6r+   Nr8   	block_idxr-   r"   c                    | j         |         \  }}|j        \  }}}t          |          }	dg|	z  }
|D ]} |j        |g||t	          j        |          z  |
|R  }t          |j                  }dd|	z   gt          d t          t          dd|	z             t          d|	z   dz   |dz
                      D             g           z   |dz
  gz   }|	                    |          }t          |	          D ]}|
|xx         ||         z  cc<    |j
        |dg|
|R  }|j        d         } |j        ||g|
|R  }||S t          |||
          }|S )a&  
        Roll the given tensor back up to spatial order assuming it's from the given block.

        If no mask is provided:
            - Returns [B, H, W, C] for 2d, [B, T, H, W, C] for 3d, etc.
        If a mask is provided:
            - Returns [B, #MUs, MUy, MUx, C] for 2d, etc.
        r   r   c                 ,    g | ]}t          |          S r=   rC   rE   s     r)   rA   z"Reroll.forward.<locals>.<listcomp>   s    VVV1tAwwVVVr+   r;   )r^   r2   r1   rH   re   rf   rI   rG   rJ   rK   rL   rQ   )r_   r8   r~   r-   r^   r0   rN   NrO   rM   cur_mu_shaperi   rk   rK   rZ   s                  r)   rl   zReroll.forward   s    y1$'1aIIsQw 	 	GqN7NA7););$;NlNANNNA AGAAE
VVE!QUOOU1q519aRSe=T=T(U(UVVVXZ[[\q5' 
 		'""A 1XX . .Q71:-	!R2,2222A
AA AF1a*,**** H 1dL11r+   Nrm   rv   s   @r)   rx   rx      s         6c3h6  S/6 "%S/2	6
 S	6 6 6 6 6 6 66 "&	2 2|2 2 ,	2
 
2 2 2 2 2 2 2 2r+   rx   c                        e Zd ZU dZej        j        e         ed<   	 	 	 dde	de	de	d	e	d
e	def fdZ
dej        dej        fdZ xZS )MaskUnitAttentionz
    Computes either Mask Unit or Global Attention. Also is able to perform q pooling.

    Note: this assumes the tokens have already been flattened and unrolled into mask units.
    See `Unroll` for more details.
    
fused_attnr   r   Fdimdim_outheadsq_stridewindow_sizeuse_mask_unit_attnc                 b   t                                                       || _        || _        || _        || _        ||z  | _        | j        dz  | _        t                      | _	        t          j        |d|z            | _        t          j        ||          | _        || _        || _        dS )a  
        Args:
        - dim, dim_out: The input and output feature dimensions.
        - heads: The number of attention heads.
        - q_stride: If greater than 1, pool q with this stride. The stride should be flattened (e.g., 2x2 = 4).
        - window_size: The current (flattened) size of a mask unit *after* pooling (if any).
        - use_mask_unit_attn: Use Mask Unit or Global Attention.
        g         N)r\   r]   r   r   r   r   head_dimscaler   r   r$   Linearqkvprojr   r   )r_   r   r   r   r   r   r   r`   s          r)   r]   zMaskUnitAttention.__init__   s    " 	
 5(]d*
(**9S!g+..Igw//	&"4r+   r8   r"   c                    |j         \  }}}| j        r|| j        | j        z  z  nd}|                     |                              |d|d| j        | j                                      dddddd          }|	                    d          \  }}}	| j        dk    r=|
                    || j        || j        d| j                                      d          }| j        rt          j        |||	          }n<|| j        z  |                    dd	          z  }
|
                    d          }
|
|	z  }|                    dd                              |d| j                  }|                     |          }|S )
z5 Input should be of shape [batch, tokens, channels]. r   r;   r   r      r/      r   )r2   r   r   r   r   rL   r   r   rK   unbindrH   amaxr   r3   scaled_dot_product_attentionr   	transposesoftmaxr   r   )r_   r8   rN   r   rg   num_windowsr   qkvattns              r)   rl   zMaskUnitAttention.forward  sd   '1aCGCZaqT]T-==>>`ahhqkk!!!RaT]SS[[\]_`bcefhiklmm**Q--1a=1q$*k4="dmTTYY^_Y``A? 	.q!Q77AA
Nakk"b&9&99D<<B<''DqAKK1%%aT\::IIaLLr+   )r   r   F)rn   ro   rp   rq   rs   jitFinalbool__annotations__rr   r]   rt   rl   ru   rv   s   @r)   r   r      s           	%%%%  ',5 55 5 	5
 5 5 !%5 5 5 5 5 5B %,        r+   r   c                        e Zd Zdddej        ej        ddddf	ded	ed
edededee         dej	        dej	        dedede
de
f fdZdej        dej        fdZ xZS )
HieraBlock      @        Nr   r   TFr   r   r   	mlp_ratio	drop_pathinit_values
norm_layer	act_layerr   r   use_expand_projr   c                    t                                                       || _        || _         ||          | _        ||k    r7d| _        |rt          j        ||          | _        n!||dz  k    sJ d | _        nd| _        d | _        t          ||||	|
|          | _
        |t          ||          nt          j                    | _        |dk    rt          |          nt          j                    | _         ||          | _        t#          |t%          ||z            |          | _        |t          ||          nt          j                    | _        |dk    rt          |          nt          j                    | _        d S )NTr/   F)r   r   )r   )r\   r]   r   r   norm1	do_expandr$   r   r   r   r   r   r%   ls1r   
drop_path1norm2r   rr   mlpls2
drop_path2)r_   r   r   r   r   r   r   r   r   r   r   r   r   r`   s                r)   r]   zHieraBlock.__init__8  s    	Z__
'>>!DN !Ic733		#')))) 		"DNDI%
 
	 DOCZ:g;????`b`k`m`m1:Q(9---BKMMZ((
wGi$7 8 8INNNCNCZ:g;????`b`k`m`m1:Q(9---BKMMr+   r8   r"   c           
         |                      |          }| j        r| j        b|                     |          }|                    |j        d         | j        j        d|j        d                                       d          }nt          j	        |                    |j        d         | j        j        d|j        d                                       d          |                    |j        d         | j        j        d|j        d                   
                    d          gd          }||                     |                     |                     |                              z   }||                     |                     |                     |                     |                                        z   }|S )Nr   r;   r   r   )r   r   r   rH   r2   r   r   r   rs   catmeanr   r   r   r   r   r   )r_   r8   x_norms      r)   rl   zHieraBlock.forwardf  so   A> 
	y$IIf%%FF171:ty'92qwr{KKPPUVPWWIFF171:ty'92qwr{KKPPUVPWWFF171:ty'92qwr{KKPPUVPWW 	   6):): ; ;<<< $**Q--)@)@ A ABBBr+   )rn   ro   rp   r$   	LayerNormGELUrr   r5   r   Moduler   r]   rs   rt   rl   ru   rv   s   @r)   r   r   7  s         #"+/$&L#%7 $(',,R ,R,R ,R 	,R
 ,R ,R "%,R 	,R y,R ,R ,R ",R !%,R ,R ,R ,R ,R ,R\ %,        r+   r   c                        e Zd ZdZ	 ddededeedf         deedf         deedf         d	ef fd
Z	 ddej	        de
ej	                 dej	        fdZ xZS )
PatchEmbedzHPatch embed that supports any number of spatial dimensions (1d, 2d, 3d).Tdim_inr   kernel.stridepaddingrL   c                     t                                                       t          |          | _        || _         t          | j                  |||||          | _        d S )N)kernel_sizer   r   )r\   r]   r1   spatial_dimsrL   r*   r   )r_   r   r   r   r   r   rL   r`   s          r)   r]   zPatchEmbed.__init__~  sh     	  KK.GD-..
 
 
			r+   Nr8   r-   r"   c                 l   |Tt          |j        dd          |          }|                     ||                    t          j                  z            }n|                     |          }| j        rA|                    |j        d         |j        d         d                              dd          }|S )Nr/   r6   r   r   r;   )r7   r2   r   tors   r   rL   r   r_   r8   r-   s      r)   rl   zPatchEmbed.forward  s    
 #$GGGD		!dggej11122AA		!A< 	F		!'!*agaj"55??1EEAr+   Tr   )rn   ro   rp   rq   rr   r   r   r]   rs   rt   r   rl   ru   rv   s   @r)   r   r   {  s        RR !
 

 
 #s(O	

 #s(O
 38_
 
 
 
 
 
 
2 ,0 | 5<( 
	       r+   r   c            =       j    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dUdeedf         dedededededeedf         d ed!eedf         d"eedf         d#eedf         d$ed%ed&ed'eedf         d(eedf         d)eedf         d*ed+ed,ee         d-ed.ed/e	ee
j        f         d0ed1ed2ed3ed4ed5eeef         f: fd6Zd7 Zej        j        d8             Zej        j        dVd9ed:efd;            Zej        j        dWd<ed:dfd=            Zej        j        d>             ZdXdedee         d?efd@ZdAej        dBed:ej        fdCZd:ej        fdDZ	 	 	 	 	 	 	 dYdAej        dFeej                 dGee	eee         f                  dHedIedJedKed9ed:e	eej                 eej        eej                 f         f         fdLZ	 	 	 	 dZdGe	eee         f         dMedNed9efdOZ	 	 dXdAej        dFeej                 dPed:ej        fdQZdVdRed:ej        fdSZ	 d[dAej        dFeej                 d:ej        fdTZ xZ S )\r       r   r   `   r     avgr/   r      r   r/   r/      r   TTFFT       @   r   r   r   r   r   r   r   N r   MbP?F   r   img_size.in_chans	embed_dim	num_headsnum_classesglobal_poolstagesrz   r   mask_unit_sizemask_unit_attnr   dim_mulhead_mulpatch_kernelrU   patch_paddingr   drop_path_rater   fix_initweight_initr   	drop_ratepatch_drop_ratehead_init_scalesep_pos_embedabs_win_pos_embedglobal_pos_sizec                    t                                                       || _        d| _        t	          |          }t          |t                    rt          |          }|| _        d t          ||          D             | _
        t          j        | j
                  }t          j        |
          }t          j        |	          } |t                    k     sJ ||	c| _        | _        ||
c| _        | _        d t          | j
        | j                  D             | _        fdt'          dt                    dz             D             | _        || _        t-          |||||          | _        d | _        d | _        d | _        d | _        |rt9          j        t=          j        d| j
        d         | j
        d         z  |                    | _        t9          j        t=          j        d| j
        d         |                    | _        n|rUt9          j        t=          j        d|g|R            | _        t9          j        t=          j        d|g|
R            | _        n-t9          j        t=          j        d||                    | _        tA          |||	gt          | j        d d                   z            | _!        tE          |||	gt          | j        d d                   z  | j        |          | _#        d	 | j        d |         D             }!d}"tI                    }#d
 t=          j%        d||#          D             }$t9          j&                    | _'        g | _(        t'          |#          D ]}%|}&||"         }'|%dz
  | j        v r2t          ||z            }&t          ||z            }|"dz  }"|%|!v r|| z  }tS          ||&|||$|%         |||%|!v r| nd|||'          }(|&}|%| j        v r5| xj(        tU          |&d|"dz   z  d| j        |"                    gz  c_(        | j'        +                    |(           |x| _,        | _-        t]          |||||d          | _/        |rMt8          j0        1                    | j        d           t8          j0        1                    | j        d           nZ| j        &t8          j0        1                    | j        d           | j        &t8          j0        1                    | j        d           |dk    r5|dk    rtd          ntf          })ti          |)d          })tk          |)|            |r| 6                                 t          | j/        j7        t8          j8                  rT| j/        j7        j9        j:        ;                    |           | j/        j7        j<        j:        ;                    |           d S d S )NFc                     g | ]
\  }}||z  S r=   r=   rY   s      r)   rA   z"Hiera.__init__.<locals>.<listcomp>  s     $T$T$T1Q!V$T$T$Tr+   c                     g | ]
\  }}||z  S r=   r=   rY   s      r)   rA   z"Hiera.__init__.<locals>.<listcomp>  s     "j"j"jda16"j"j"jr+   c                 D    g | ]}t          d |                   dz
  S )Nr   )rI   )r>   rZ   r   s     r)   rA   z"Hiera.__init__.<locals>.<listcomp>  s,    RRR13vbqbz??Q.RRRr+   r   r/   r   r;   c                     g | ]}|d z   S )r   r=   r>   r8   s     r)   rA   z"Hiera.__init__.<locals>.<listcomp>  s    AAA1QAAAr+   c                 6    g | ]}|                                 S r=   )itemr   s     r)   rA   z"Hiera.__init__.<locals>.<listcomp>  s     JJJAqvvxxJJJr+   )r   r   r   r   r   r   r   r   r   r   r   zblocks.)num_chs	reductionmoduleNLC)	pool_typer   r   	input_fmtg{Gz?)stdskipjaxhead.fc)classifier_name)=r\   r]   r   grad_checkpointingr   
isinstancerr   r   rU   rG   tokens_spatial_shapere   rf   r1   rz   r   mu_sizer   mask_spatial_shaperJ   ry   r   r   patch_embed	pos_embedpos_embed_winpos_embed_spatialpos_embed_temporalr$   	Parameterrs   zerosrS   unrollrx   rerollrI   linspace
ModuleListblocksfeature_infor   dictappendnum_featureshead_hidden_sizer   headinittrunc_normal_r   r   r   r   fix_init_weightfcr   weightdatamul_bias)+r_   r   r   r   r   r   r   r   rz   r   r   r   r   r   r   r   rU   r   r   r   r   r   r   r   r   r   r   r   r   r   
num_tokensflat_mu_sizeflat_q_strideq_pool_blocks	cur_stagedepthdprrZ   r   r   blockinit_fnr`   s+          `                                  r)   r]   zHiera.__init__  s   B 	&"'#J//
h$$ 	+ **H($T$THl8S8S$T$T$T!Yt899
y00	(++F####%+X"T],8.)d)"j"jc$:SUYUh6i6i"j"j"jRRRRaVq8Q8QRRR.%
 
 26599=:> 	U%'\At8;d>WXY>ZZ\eff& &D" ')lAt8;YGG' 'D## ! U!#ek!Y.Y.Y.Y.Y!Z!Z%'\%+a2\^2\2\2\%]%]""!#ek!Z.S.S!T!T JT_SbS1222
 

 JT_SbS1222O
 
 BA(@AAA 	FJJ>5!I!IJJJmoou 	& 	&AG "0	!:1u''i'122	H 455	Q	%% ]2L#a&'%+,+=+=--1( /#5  E  IDO##!!A	!4DMsW[WfgpWqMsMsttt&v v!!Ku%%%%4==D1+!!
 
 
	  	DG!!$"8d!CCCG!!$"9t!DDDD~)%%dn$%???!-%%d&8d%CCC&  )4)=)=oo?GgyAAAG&&& 	#  """dilBI.. 	9IL$))/:::IL"''88888	9 	9r+   c                     d }t          | j                  D ]K\  }} ||j        j        j        j        |dz               ||j        j        j        j        |dz              Ld S )Nc                 Z    |                      t          j        d|z                       d S )Nr   )div_re   sqrt)param	_layer_ids     r)   rescalez&Hiera.fix_init_weight.<locals>.rescaleJ  s(    JJtyy1122222r+   r   )	enumerater  r   r   r'  r(  r   fc2)r_   r:  layer_idlayers       r)   r%  zHiera.fix_init_weightI  s}    	3 	3 	3  )55 	= 	=OHeGEJO*/A>>>GEIM(-x!|<<<<	= 	=r+   c                 4    | j         dgS | j        ddgS ddgS )Nr  pos_embed_absr  r  r  )r  r@  r_   s    r)   no_weight_decayzHiera.no_weight_decayQ  s2    >%= +#_55')=>>r+   coarser"   c                 (    t          dddg          S )NzW^pos_embed|pos_embed_spatial|pos_embed_temporal|pos_embed_abs|pos_embed_win|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr  )r  )r_   rC  s     r)   group_matcherzHiera.group_matcherZ  s%    k-/CD
 
 
 	
r+   enablec                     || _         d S r   )r  )r_   rG  s     r)   set_grad_checkpointingzHiera.set_grad_checkpointinga  s    "(r+   c                     | j         j        S r   )r"  r&  rA  s    r)   get_classifierzHiera.get_classifiere  s    y|r+   reset_otherc                 N    || _         | j                            |||           d S )NrL  )r   r"  reset)r_   r   r   rL  s       r)   reset_classifierzHiera.reset_classifieri  s*    &	[kJJJJJr+   r8   
mask_ratioc                    |j         d         }t          j        | j                  }t	          |d|z
  z            }t          j        |||j                  }t          j        |d          }t          j        |d          }t          j	        ||g|j                  }	d|	ddd|f<   t          j
        |	d|          }	|	                                S )z
        Generates a random mask, mask_ratio fraction are dropped.
        1 is *keep*, 0 is *remove*. Useful for MAE, FLIP, etc.
        r   r   )devicer   N)r   index)r2   re   rf   r  rr   rs   randrS  argsortr  gatherr   )
r_   r8   rQ  rN   r   len_keepnoiseids_shuffleids_restorer-   s
             r)   get_random_maskzHiera.get_random_maskm  s    
 GAJi 788{a*n566
1k!(;;; mEq111mKQ777 {A{+AH===QQQ		\|Da{;;;yy{{r+   c                    | j         x| j                             | j                  }t          j        | j        |j        dd          dd          }||z   }|                    d                              dd          }nl| j        | j        }n]| j	        
                    d| j        d         d          t          j        | j        | j        d         | j        d         z  d          z   }||z   }|S )	Nr   bicubicT)r0   mode	antialiasr/   r   r   r   )r  tiler  r3   r4   r  r2   rd   r   r  repeatr  rs   repeat_interleaver  )r_   r8   r  r  s       r)   
_pos_embedzHiera._pos_embed  s
   ) !.33D4KLLM"(-	  I "M1I!))!,,66q!<<II^'II &--a1J11MqQQ'+-a043LQ3OO    	Mr+   NCHWr-   indicesnorm
stop_early
output_fmtintermediates_onlyc	           	          |r
J d            |dv s
J d            |rAt          t           j                  |          \  }	}
 fd|	D             }	 j        |
         }
n%t          t           j                  |          \  }	}
|  |j        |j        d         dg j        R  }nd}                     ||          }                     |          } 	                    |          }|^||d	         
                    d j        |j        d
                                                |j        d         d|j        d                   }g }t          j                                        s|s j        }n j        d|
dz            }t          |          D ]_\  }} ||          }||	v rK                     |||          }|                    |dk    r|                    dddd
          n|           `|r|S ||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        z'normalization of features not supported)re  NHWCz(Output format must be one of NCHW, NHWC.c                 *    g | ]}j         |         S r=   )ry   )r>   rZ   r_   s     r)   rA   z/Hiera.forward_intermediates.<locals>.<listcomp>  s     EEE1DOA.EEEr+   Nr   r   r-   .Nr/   r;   re  r   )r   r1   ry   r  rH   r2   r  r  rd  r  ra  r  rs   r   is_scriptingr;  r  r  rK   )r_   r8   r-   rf  rg  rh  ri  rj  rC  take_indices	max_index
patch_maskintermediatesr  rZ   blkx_ints   `                r)   forward_intermediateszHiera.forward_intermediates  s;   . BBBBBB----/Y--- 	V&:3t;O;OQX&Y&Y#L)EEEEEEEL	2II&:3t{;K;KW&U&U#L)"171:qK43JKKKJJJQZ00OOAKKNN $y/&&q$,
CCDII!'RS*VXZ[ZabdZeffA9!!## 	1: 	1[FF[)a-0F'' 	c 	cFAsAAL  Aqt44$$*PVBVBVU]]1aA%>%>%>\abbb 	!  -r+   
prune_norm
prune_headc                 $   |r3t          t          | j                  |          \  }}| j        |         }n%t          t          | j                  |          \  }}| j        d|dz            | _        |r| j                            dd           |S )z@ Prune layers not required for specified intermediates.
        Nr   r   TrN  )r   r1   ry   r  r"  rO  )r_   rf  rx  ry  rC  rq  rr  s          r)   prune_intermediate_layerszHiera.prune_intermediate_layers  s      	V&:3t;O;OQX&Y&Y#L)	2II&:3t{;K;KW&U&U#L)k.9q=.1 	1IOOA4O000r+   return_intermediatesc                 &   | j         r+| j        dk    r |J |                     || j                  }|  |j        |j        d         dg| j        R  }nd}|                     ||          }|                     |          }|                     |          }|^||d         	                    d| j
        |j        d                                                |j        d         d|j        d                   }g }t          | j                  D ]|\  }}| j        r/t          j                                        st#          ||          }n ||          }|r4|| j        v r+|                    |                     |||                     }|r||fS |S )	z
        mask should be a boolean tensor of shape [B, #MUt*#MUy*#MUx] where #MU are the number of mask units in that dim.
        Note: 1 in mask is *keep*, 0 is *remove*; mask.sum(dim=-1) should be the same across the batch.
        r   N)rQ  r   rn  ro  r/   r;   )trainingr   r\  rH   r2   r  r  rd  r  ra  r  r;  r  r  rs   r   rp  r   ry   r  r  )r_   r8   r-   r|  rs  rt  rZ   ru  s           r)   forward_featureszHiera.forward_features  s    = 	LT1A55<<<''d6J'KKD"171:qK43JKKKJJJQZ00OOAKKNN $y/&&q$,
CCDII!'RS*VXZ[ZabdZeffA,, 	C 	CFAs& uy/E/E/G/G sA&&CFF# CT_(<(<$$T[[AD[%A%ABBB   	$m##r+   
pre_logitsc                 b    |r|                      ||          n|                      |          }|S )N)r  )r"  )r_   r8   r  s      r)   forward_headzHiera.forward_head  s0    3=ODIIaJI///499Q<<r+   c                 b    |                      ||          }||                     |          }|S )Nrn  )r  r  r   s      r)   rl   zHiera.forward!  s9    
 !!!$!//<!!!$$Ar+   )r   r   r   r   r   r   r   r   r   r   r   Tr   r   r   r   r   r   r   NTr   r   r   r   r   FFr   Fr   )NF)NNFTre  FT)r   FTTr   )!rn   ro   rp   r   rr   strr   r5   r   r
   r$   r   r]   r%  rs   r   ignorerB  r   rF  rI  rK  rP  rt   r\  rd  r   rw  r{  r  r  rl   ru   rv   s   @r)   r    r      sc        )3#$&3(..4/I$( !,2,2-3"$'+/!!0;"%(%*"'&+/7?b9 b9CHob9 b9 	b9
 b9 b9 b9 #s(Ob9 b9 CHob9 "#s(Ob9 "$),b9 "b9 b9  !b9"  S/#b9$  S/%b9& !c?'b9( )b9* "+b9, "%-b9. /b90 1b92 c29n-3b94 5b96 #7b98 #9b9:  ;b9<  $=b9> #38_?b9 b9 b9 b9 b9 b9H= = = Y? ? ? Y
 
D 
T 
 
 
 
 Y) )T )T ) ) ) ) Y  K KC Khsm Kae K K K K 5 U\    0u|    > ,07;#$',:  : |:  5<(:  eCcN34	: 
 :  :  :  !%:  :  
tEL!5tEL7I)I#JJ	K:  :  :  : | ./$# 3S	>*  	
    , ,0).	+ +|+ 5<(+ #'	+
 
+ + + +Z $ 5<     ,0 | 5<( 
	       r+   r   c                 6    | ddd dddt           t          ddd|S )	Nr   )r   r   r   g?r^  Tzpatch_embed.projr
  )urlr   rT   	pool_sizecrop_pctinterpolationfixed_input_sizer   r  
first_conv
classifierr   )r  kwargss     r)   _cfgr  ,  s7    =t%.B(	   r+   zhiera_tiny_224.mae_in1k_ft_in1kztimm/zcc-by-nc-4.0)	hf_hub_idlicensezhiera_tiny_224.mae)r  r  r   z hiera_small_224.mae_in1k_ft_in1kzhiera_small_224.maezhiera_base_224.mae_in1k_ft_in1kzhiera_base_224.maez$hiera_base_plus_224.mae_in1k_ft_in1kzhiera_base_plus_224.maez hiera_large_224.mae_in1k_ft_in1kzhiera_large_224.maezhiera_huge_224.mae_in1k_ft_in1kzhiera_huge_224.maez.hiera_small_abswin_256.sbb2_e200_in12k_ft_in1k)r      r  gffffff?)r  rT   r  z1hiera_small_abswin_256.sbb2_pd_e200_in12k_ft_in1kz&hiera_small_abswin_256.sbb2_e200_in12ki-.  )r  r   rT   r  z)hiera_small_abswin_256.sbb2_pd_e200_in12kzhiera_base_abswin_256.untrained)rT   r  c                 f   |                      d|           } i }|                                 D ]\  }}d|v r|                    dd          }|                    d          r|                    dd          }n+|                    d          r|                    dd          }|dk    rd}|||<   |S )	Nmodel_statezhead.projection.zhead.fc.zencoder_norm.z
head.norm.znorm.r@  r  )getitemsreplace
startswith)
state_dictmodeloutputr   r   s        r)   checkpoint_filter_fnr    s    z::JF  ""  1 ""		,j99A<<(( 	1		/<88AA\\'"" 	1		'<00AAq		Mr+   Fvariant
pretrainedc                     |                     dd          }t          t          | |ft          t	          |d          d|S )Nout_indicesr   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)popr   r    r  r  )r  r  r  r  s       r)   _create_hierar    sY    **]A..K 2[hGGG    r+   c           	      X    t          ddd          }t          dd| it          |fi |S )Nr   r   )r   r/   r   r/   r   r   r   hiera_tiny_224r  )r  r  r  r  r  
model_argss      r)   r  r    s@    aEEEJ__j_DD^D^W]D^D^___r+   c           	      X    t          ddd          }t          dd| it          |fi |S )Nr   r   r   r/      r/   r  hiera_small_224r  )r  r  r  s      r)   r  r    s@    aFFFJ``z`T*E_E_X^E_E_```r+   c           	      X    t          ddd          }t          dd| it          |fi |S )Nr   r   r   r  hiera_base_224r  )r  r  r  s      r)   r  r    s@    aFFFJ__j_DD^D^W]D^D^___r+   c           	      X    t          ddd          }t          dd| it          |fi |S )Np   r/   r   r  hiera_base_plus_224r  )r  r  r  s      r)   r  r    s@    qGGGJdd:djIcIc\bIcIcdddr+   c           	      X    t          ddd          }t          dd| it          |fi |S )N   r/   r/      $   r   r  hiera_large_224r  )r  r  r  s      r)   r  r    s@    qGGGJ``z`T*E_E_X^E_E_```r+   c           	      X    t          ddd          }t          dd| it          |fi |S )Nr  r   r  r  hiera_huge_224r  )r  r  r  s      r)   r  r    s@    qGGGJ__j_DD^D^W]D^D^___r+   c           
      b    t          dddddddd	          }t          dd| it          |fi |S )Nr   r   r  T)r   r   h㈵>r	  F)r   r   r   r   r   r   r   r   hiera_small_abswin_256r  )r  r  r  s      r)   r  r    sV    -4aieU  J ggjgDQ[LfLf_eLfLfgggr+   c           	      ^    t          dddddd          }t          d
d	| it          |fi |S )Nr   r   r   Tr  r	  )r   r   r   r   r   r   hiera_base_abswin_256r  )r  r  r  s      r)   r  r    sS    -4]aotv v vJffZf4PZKeKe^dKeKefffr+   )r   r   r  )Hrq   re   	functoolsr   typingr   r   r   r   r   r	   r
   rs   torch.nnr$   torch.nn.functional
functionalr3   torch.utils.checkpointr   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   	_registryr   r   _builderr   	_featuresr   _features_fxr   _manipulater   __all__rr   r   r*   rt   r7   rQ   rS   rx   r   r   r   r    r  default_cfgsr  r  r   r  r  r  r  r  r  r  r  r  r=   r+   r)   <module>r     s   0        E E E E E E E E E E E E E E E E E E                 - - - - - - A A A A A A A AI I I I I I I I I I I I I I I I I I I I I I I I = < < < < < < < * * * * * * + + + + + + 3 3 3 3 3 3 $ $ $ $ $ $ )=s =tBI = = = = 	$s) 	5< 	EL 	 	 	 	<Cy s) \	   B; ; ; ; ;RY ; ; ;|N N N N NRY N N Nb? ? ? ? ?	 ? ? ?DA A A A A A A AH% % % % % % % %PF F F F FBI F F FR    %$ S&%tt( ( (S&
 $$  S& ') ) )S& 44  S&* &tt( ( (+S&2 $$  3S&> +DD- - -?S&F tt     GS&R ') ) )SS&Z 44  [S&f &tt( ( (gS&n $$  oS&z 5dd 47 7 7{S&B 8 4: : :CS&J -dd 4/ / /KS&T 0 42 2 2US&^ &tt 4( ( (_S& S Sl   2
 
3 
D 
u 
 
 
 
 ` ` ` `
 a a a a
 ` ` ` `
 e e e e
 a a a a
 ` ` ` `
 h h h h g g g g g gr+   