
    Ng,              	          d Z ddlZddlZddlmZmZmZmZmZ ddl	Z	ddl	m
Z
 ddlm
c mZ ddlmZmZ ddlmZ ddlmZ  ej        e          Z G d	 d
e
j                  Z G d de          Z	 	 	 ddee         dededefdZdS )a?   Image to Patch Embedding using Conv2d

A convolution based approach to patchifying a 2D image w/ embedding projection.

Based on code in:
  * https://github.com/google-research/vision_transformer
  * https://github.com/google-research/big_vision/tree/main/big_vision

Hacked together by / Copyright 2020 Ross Wightman
    N)CallableListOptionalTupleUnion)nn   )Formatnchw_to)	to_2tuple)_assertc                       e Zd ZU dZeed<   ej        j        e	         ed<   	 	 	 	 	 	 	 	 	 	 dde
e         dededede
e         de	de
e         de	de	de	f fdZdeeeeef         f         fdZ	 	 dde
eeeeef         f                  de
eeeeef         f                  fdZddeeeef         ef         fdZdeeef         deeef         fdZd Z xZS )
PatchEmbed! 2D Image to Patch Embedding
    
output_fmtdynamic_img_pad            NTFimg_size
patch_sizein_chans	embed_dim
norm_layerflattenbiasstrict_img_sizec                    t                                                       t          |          | _        |                     |          \  | _        | _        | _        |d| _        t          |          | _
        n|| _        t          j        | _
        |	| _        |
| _        t          j        |||||          | _        |r ||          nt          j                    | _        d S )NFkernel_sizestrider   )super__init__r   r   _init_img_sizer   	grid_sizenum_patchesr   r
   r   NCHWr   r   r   Conv2dprojIdentitynorm)selfr   r   r   r   r   r   r   r   r   r   	__class__s              S/var/www/html/ai-engine/env/lib/python3.11/site-packages/timm/layers/patch_embed.pyr$   zPatchEmbed.__init__    s     	#J//:>:M:Mh:W:W7t~t'7! DL$Z00DOO #DL$kDO..Ih	zR\cghhh	-7JJJy)))R[]]			    c                     | j         sJ |dS t          |          }t          d t          || j                   D                       }|d         |d         z  }|||fS )N)NNNc                     g | ]
\  }}||z  S  r3   ).0sps      r/   
<listcomp>z-PatchEmbed._init_img_size.<locals>.<listcomp>C   s     MMMda16MMMr0   r   r	   )r   r   tuplezip)r-   r   r&   r'   s       r/   r%   zPatchEmbed._init_img_size>   sp    ##X&&MMc(DO.L.LMMMNN	lYq\1K//r0   c           	      r   d }|t          |          }||| j        k    rt          j                    5  t	          j        | j        j        | j        j        ||| j        j	        d u          }|j
                            t          | j        j
        |d                     | j        j	        $|j	                            | j        j	                   || _        d d d            n# 1 swxY w Y   || _        |p| j        }|| j        k    s|*|                     |          \  | _        | _        | _        d S d S )Nr    T)verbose)r   r   torchno_gradr   r)   r*   in_channelsout_channelsr   weightcopy_resample_patch_embedr   r%   r&   r'   )r-   r   r   new_patch_sizenew_projs        r/   set_input_sizezPatchEmbed.set_input_sizeG   sx   
 !&z22N%.DO*K*K % %9I)I* .)t3   %%&:49;K^ei&j&j&jkkk9>-M''	777$	% % % % % % % % % % % % % % % -DO,t}t}$$(B>B>Q>QRZ>[>[;DM4>4+;+;+; )C(Bs   B%C%%C),C)returnc                 <    |rt          | j                  S | j        S )N)maxr   )r-   	as_scalars     r/   
feat_ratiozPatchEmbed.feat_ratioa   s"     	#t'''?"r0   c                    | j         rPt          j        |d         | j        d         z            t          j        |d         | j        d         z            fS |d         | j        d         z  |d         | j        d         z  fS )z Get grid (feature) size for given image size taking account of dynamic padding.
        NOTE: must be torchscript compatible so using fixed tuple indexing
        r   r	   )r   mathceilr   )r-   r   s     r/   dynamic_feat_sizezPatchEmbed.dynamic_feat_sizeg   s      	X9Xa[4?1+==>>	(ST+X\XghiXjJj@k@kkkA;$/!"44hqkT_UVEW6WWWr0   c                    |j         \  }}}}| j        | j        rct          || j        d         k    d| d| j        d          d           t          || j        d         k    d| d| j        d          d           no| j        sht          || j        d         z  dk    d| d| j        d          d           t          || j        d         z  dk    d| d| j        d          d           | j        rq| j        d         || j        d         z  z
  | j        d         z  }| j        d         || j        d         z  z
  | j        d         z  }t          j        |d|d|f          }|                     |          }| j	        r*|	                    d          
                    dd          }n*| j        t          j        k    rt          || j                  }|                     |          }|S )	Nr   zInput height (z) doesn't match model ().r	   zInput width (z%) should be divisible by patch size (   )shaper   r   r   r   r   Fpadr*   r   	transposer   r
   r(   r   r,   )r-   xBCHWpad_hpad_ws           r/   forwardzPatchEmbed.forwardp   s   W
1a=$# T]1--/n/n/nZ^ZghiZj/n/n/noooT]1--/mq/m/mY]YfghYi/m/m/mnnnn) **a/cQccT_]^M_ccc   **a/bAbbDO\]L^bbb    	/_Q'!doa.@*@@DOTUDVVE_Q'!doa.@*@@DOTUDVVEa!UAu-..AIIaLL< 	,		!&&q!,,AA_++4?++AIIaLLr0   )
r   r   r   r   NTNTTF)NN)T)__name__
__module____qualname____doc__r
   __annotations__r<   jitFinalboolr   intr   strr$   r   r   r%   rE   rJ   rN   r]   __classcell__r.   s   @r/   r   r      s         Y_T**** '*  -1 (,$($)K KsmK K 	K
 K !*K K !K K "K "K K K K K K<0uS%S/-A'B 0 0 0 0 ?C@D\ \uS%S/%9:;\ !sE#s(O';!<=\ \ \ \4# #E%S/32F,G # # # #X%S/ XeCHo X X X X      r0   r   c                        e Zd ZU dZeed<   	 	 	 	 	 	 	 	 dd	ee         d
edededee         de	dee
         de	f fdZdeej        ee         f         fdZ xZS )PatchEmbedWithSizer   r   r   r   r   r   NTr   r   r   r   r   r   r   c	           
      Z    t                                          ||||||||           d S )N)r   r   r   r   r   r   r   r   )r#   r$   )
r-   r   r   r   r   r   r   r   r   r.   s
            r/   r$   zPatchEmbedWithSize.__init__   sH     	!!! 	 		
 		
 		
 		
 		
r0   rF   c                 (   |j         \  }}}}| j        ht          || j        d         z  dk    d| d| j        d          d           t          || j        d         z  dk    d| d| j        d          d           |                     |          }|j         dd          }| j        r*|                    d                              dd          }n*| j        t          j	        k    rt          || j                  }|                     |          }||fS )	Nr   zInput image height (z#) must be divisible by patch size (rP   r	   zInput image width (rQ   )rR   r   r   r   r*   r   rU   r   r
   r(   r   r,   )r-   rV   rW   rX   rY   rZ   	feat_sizes          r/   r]   zPatchEmbedWithSize.forward   sS   W
1a=$A**a/  2E  2E  2Enrn}~  oA  2E  2E  2E  F  F  FA**a/  2Dq  2D  2Dmqm|}~m  2D  2D  2D  E  E  EIIaLLGBCCL	< 	,		!&&q!,,AA_++4?++AIIaLL)|r0   )r   r   r   r   NTNT)r^   r_   r`   ra   r
   rb   r   rf   r   re   rg   r$   r   r<   Tensorr   r]   rh   ri   s   @r/   rk   rk      s           '*  -1 (,
 
sm
 
 	

 
 !*
 
 !
 
 
 
 
 
 
,E%,S	"9:        r0   rk   bicubicTFnew_sizeinterpolation	antialiasr;   c           	         ddl 	 ddlm} n# t          $ r	 ddlm} Y nw xY wt          | j                  dk    s
J d            t                    dk    s
J d            | j        dd         }t          |          t                    k    r| S |r)t          	                    d	| j         d
 d d           fdfd} ||          }t          j
        j                            |j                  | j                  fd}	 | ||	dd          dd          }
| j        }|                                 }  |
|           } |                     |          } | S )a/  Resample the weights of the patch embedding kernel to target resolution.
    We resample the patch embedding kernel by approximately inverting the effect
    of patch resizing.

    Code based on:
      https://github.com/google-research/big_vision/blob/b00544b81f8694488d5f36295aeb7972f3755ffe/big_vision/models/proj/flexi/vit.py

    With this resizing, we can for example load a B/8 filter into a B/16 model
    and, on 2x larger input image, the result will match.

    Args:
        patch_embed: original parameter to be resized.
        new_size (tuple(int, int): target shape (height, width)-only.
        interpolation (str): interpolation for resize
        antialias (bool): use anti-aliasing filter in resize
        verbose (bool): log operation
    Returns:
        Resized patch embedding kernel.
    r   N)vmap   zFour dimensions expectedrQ   zNew shape should only be hwrn   zResize patch embedding z to z, w/ z interpolation.c                     t          j        |           d         }t          j        ||          d                                         }|S )N)NN.)sizemodert   )r   r   .)r<   rp   rS   interpolatenumpy)x_np	_new_sizex_tfx_upsampledrt   rs   s       r/   resizez$resample_patch_embed.<locals>.resize   sW    |D!!/2my}	K K KKTVV[V[V]V] 	r0   c                 F   g }t                              |                     D ]b}                    |           }d|                    ||           <   |                     ||                              d                     c                    |          j        S )Ng      ?)rangeprodzerosunravel_indexappendreshapestackT)	_old_sizer~   mati	basis_vecnpr   s        r/   get_resize_matz,resample_patch_embed.<locals>.get_resize_mat   s    rwwy))** 	A 	AA++I8:Ib&&q)445JJvvi33;;B??@@@@xx}}r0   )devicec                 ^    |                      d          z  }|                               S )Nr   )r   )kernelresampled_kernelrr   resize_mat_pinvs     r/   resample_kernelz-resample_patch_embed.<locals>.resample_kernel   s.    *V^^B-?-??''111r0   r	   )r|   r<   rv   ImportError	functorchlenrR   r8   _loggerinfotensorlinalgpinvr   r   dtypefloatto)patch_embedrr   rs   rt   r;   rv   old_sizer   
resize_matr   v_resample_kernel
orig_dtyper   r   r   s    ```        @@@r/   rB   rB      s   4 # # # #""""""""# { !!Q&&&(B&&&x==A< %HX%//)) us{/@sshssUbsssttt            (33Jl29>>*,#?#?HZ[[[O2 2 2 2 2 2 TT/1a88!Q??"J##%%K##K00K..,,Ks    &&)rq   TF)ra   loggingrL   typingr   r   r   r   r   r<   r   torch.nn.functional
functionalrS   formatr
   r   helpersr   trace_utilsr   	getLoggerr^   r   Moduler   rk   rf   rg   re   rB   r3   r0   r/   <module>r      s  	 	   9 9 9 9 9 9 9 9 9 9 9 9 9 9                 # # # # # # # #                  
'
H
%
%o o o o o o o od( ( ( ( ( ( ( (\ 'C Cs)C C 	C
 C C C C C Cr0   