
    gr                        d Z ddlZddlmZ ddlmZmZmZ ddlZddl	Zddlm
Z
 ddlmZmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZ ddlmZ  ej        e          ZdZ dZ!g dZ"dZ#dZ$e G d de                      Z% G d de
j&                  Z' G d de
j&                  Z( G d de
j&                  Z) G d de
j&                  Z* G d de
j&                  Z+ G d de
j&                  Z, G d d e
j&                  Z- G d! d"e
j&                  Z. G d# d$e
j&                  Z/ G d% d&e
j&                  Z0 G d' d(e
j&                  Z1 G d) d*e          Z2d+Z3d,Z4 ed-e3           G d. d/e2                      Z5 ed0e3           G d1 d2e2                      Z6 ed3e3           G d4 d5e2                      Z7dS )6zPyTorch LeViT model.    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )LevitConfigr   zfacebook/levit-128S)r      i  ztabby, tabby catc                       e Zd ZU dZdZej        ed<   dZej        ed<   dZ	ej        ed<   dZ
eeej                          ed<   dS ),LevitForImageClassificationWithTeacherOutputa  
    Output type of [`LevitForImageClassificationWithTeacher`].

    Args:
        logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores as the average of the `cls_logits` and `distillation_logits`.
        cls_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
            class token).
        distillation_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
            distillation token).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer
            plus the initial embedding outputs.
    Nlogits
cls_logitsdistillation_logitshidden_states)__name__
__module____qualname____doc__r   torchFloatTensor__annotations__r   r   r   r   r        d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/levit/modeling_levit.pyr   r   3   su          $ !%FE$$$$(J!(((-1*1118<M8E%"345<<<<<r&   r   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )LevitConvEmbeddingsz[
    LeViT Conv Embeddings with Batch Norm, used in the initial patch embedding layer.
    r   c	           
          t                                                       t          j        |||||||d          | _        t          j        |          | _        d S )NF)dilationgroupsbias)super__init__r   Conv2dconvolutionBatchNorm2d
batch_norm)
selfin_channelsout_channelskernel_sizestridepaddingr+   r,   bn_weight_init	__class__s
            r'   r/   zLevitConvEmbeddings.__init__R   s^     	9{FGh_elq
 
 
 .66r&   c                 Z    |                      |          }|                     |          }|S N)r1   r3   )r4   
embeddingss     r'   forwardzLevitConvEmbeddings.forward[   s,    %%j11
__Z00
r&   )r   r   r   r   r   r    r!   r/   r?   __classcell__r;   s   @r'   r)   r)   M   s]         
 mn7 7 7 7 7 7      r&   r)   c                   (     e Zd ZdZ fdZd Z xZS )LevitPatchEmbeddingsz
    LeViT patch embeddings, for final embeddings to be passed to transformer blocks. It consists of multiple
    `LevitConvEmbeddings`.
    c                    t                                                       t          |j        |j        d         dz  |j        |j        |j                  | _        t          j
                    | _        t          |j        d         dz  |j        d         dz  |j        |j        |j                  | _        t          j
                    | _        t          |j        d         dz  |j        d         dz  |j        |j        |j                  | _        t          j
                    | _        t          |j        d         dz  |j        d         |j        |j        |j                  | _        |j        | _        d S )Nr            )r.   r/   r)   num_channelshidden_sizesr7   r8   r9   embedding_layer_1r   	Hardswishactivation_layer_1embedding_layer_2activation_layer_2embedding_layer_3activation_layer_3embedding_layer_4r4   configr;   s     r'   r/   zLevitPatchEmbeddings.__init__g   sU   !4!4Q!71!<f>PRXR_agao"
 "
 #%,..!4"a')<Q)?1)DfFXZ`Zgioiw"
 "
 #%,..!4"a')<Q)?1)DfFXZ`Zgioiw"
 "
 #%,..!4"a')<Q)?ASU[Ubdjdr"
 "
 #/r&   c                    |j         d         }|| j        k    rt          d          |                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }| 	                    |          }|
                    d                              dd          S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rH   )shaperI   
ValueErrorrK   rM   rN   rO   rP   rQ   rR   flatten	transpose)r4   pixel_valuesrI   r>   s       r'   r?   zLevitPatchEmbeddings.forward}   s    #)!,4,,,w   ++L99
,,Z88
++J77
,,Z88
++J77
,,Z88
++J77
!!!$$..q!444r&   r@   rB   s   @r'   rD   rD   a   sQ         
0 0 0 0 0,5 5 5 5 5 5 5r&   rD   c                   &     e Zd Zd fd	Zd Z xZS )MLPLayerWithBNr   c                     t                                                       t          j        ||d          | _        t          j        |          | _        d S )NF)in_featuresout_featuresr-   )r.   r/   r   LinearlinearBatchNorm1dr3   )r4   	input_dim
output_dimr:   r;   s       r'   r/   zMLPLayerWithBN.__init__   sG    iIJUZ[[[.44r&   c                     |                      |          }|                     |                    dd                                        |          }|S )Nr   r   )ra   r3   rX   
reshape_asr4   hidden_states     r'   r?   zMLPLayerWithBN.forward   sH    {{<00|';';Aq'A'ABBMMl[[r&   )r   r   r   r    r/   r?   rA   rB   s   @r'   r\   r\      sL        5 5 5 5 5 5
      r&   r\   c                   $     e Zd Z fdZd Z xZS )LevitSubsamplec                 d    t                                                       || _        || _        d S r=   )r.   r/   r8   
resolution)r4   r8   rm   r;   s      r'   r/   zLevitSubsample.__init__   s+    $r&   c                     |j         \  }}}|                    || j        | j        |          d d d d | j        d d | j        f                             |d|          }|S )N)rV   viewrm   r8   reshape)r4   rh   
batch_size_channelss        r'   r?   zLevitSubsample.forward   sn    "."4
Ax#((T_doW_``AA~~$+~~~$+~-

'*b(
+
+ 	 r&   ri   rB   s   @r'   rk   rk      sG        % % % % %
      r&   rk   c                   `     e Zd Z fdZ ej                    d fd	            Zd Zd Z xZ	S )LevitAttentionc                    t                                                       || _        |dz  | _        || _        || _        ||z  |z  ||z  dz  z   | _        ||z  |z  | _        t          || j                  | _	        t          j                    | _        t          | j        |d          | _        t          t          j        t#          |          t#          |                              }t%          |          }i g }	}|D ]t}
|D ]o}t'          |
d         |d         z
            t'          |
d         |d         z
            f}||vrt%          |          ||<   |	                    ||                    pui | _        t,          j
                            t-          j        |t%          |                              | _        |                     dt-          j        |	                              ||          d           d S )	N      rH   r   )r:   r   attention_bias_idxsF
persistent)r.   r/   num_attention_headsscalekey_dimattention_ratioout_dim_keys_valuesout_dim_projectionr\   queries_keys_valuesr   rL   
activation
projectionlist	itertoolsproductrangelenabsappendattention_bias_cacher"   	Parameterzerosattention_biasesregister_buffer
LongTensorrp   )r4   rJ   r~   r|   r   rm   points
len_pointsattention_offsetsindicesp1p2offsetr;   s                r'   r/   zLevitAttention.__init__   s   #6 d]
.#2W#<?R#RU\_rUruvUv#v "1G";>Q"Q#1,@X#Y#Y ,..()@,_`aaai'j(9(95;L;LMMNN[[
%'7 	: 	:B : :bebem,,c"Q%"Q%-.@.@A!222034E0F0F%f-089999	: %'! % 2 25;?RTWXiTjTj3k3k l l!5#3G#<#<#A#A*j#Y#Yfk 	 	
 	
 	
 	
 	
r&   Tc                 r    t                                          |           |r| j        ri | _        d S d S d S r=   r.   trainr   r4   moder;   s     r'   r   zLevitAttention.train   M    d 	+D- 	+(*D%%%	+ 	+ 	+ 	+r&   c                     | j         r| j        d d | j        f         S t          |          }|| j        vr| j        d d | j        f         | j        |<   | j        |         S r=   trainingr   ry   strr   r4   device
device_keys      r'   get_attention_biasesz#LevitAttention.get_attention_biases   o    = 	9(D,D)DEEVJ!:::8<8MaaaQUQiNi8j)*5,Z88r&   c                    |j         \  }}}|                     |          }|                    ||| j        d                              | j        | j        | j        | j        z  gd          \  }}}|                    dddd          }|                    dddd          }|                    dddd          }||                    dd          z  | j	        z  | 
                    |j                  z   }	|	                    d          }	|	|z                      dd                              ||| j                  }|                     |                     |                    }|S Nro   r   dimr   rH   r   )rV   r   rp   r|   splitr~   r   permuterY   r}   r   r   softmaxrq   r   r   r   )
r4   rh   rr   
seq_lengthrs   r   querykeyvalue	attentions
             r'   r?   zLevitAttention.forward   s`   $0$6!
J"66|DD/44ZTMegijjpp\4<)=)LMST q 
 
sE aAq))kk!Q1%%aAq))CMM"b111DJ>AZAZ[g[nAoAoo	%%"%--	!E)44Q::BB:z[_[rsst|'D'DEEr&   T
r   r   r    r/   r"   no_gradr   r   r?   rA   rB   s   @r'   rv   rv      s        
 
 
 
 
: U]__+ + + + + _+
9 9 9      r&   rv   c                   `     e Zd Z fdZ ej                    d fd	            Zd Zd Z xZ	S )LevitAttentionSubsamplec	                 .   t                                                       || _        |dz  | _        || _        || _        ||z  |z  ||z  z   | _        ||z  |z  | _        || _        t          || j                  | _
        t          ||          | _        t          |||z            | _        t          j                    | _        t          | j        |          | _        i | _        t'          t)          j        t-          |          t-          |                              }	t'          t)          j        t-          |          t-          |                              }
t/          |	          t/          |
          }}i g }}|
D ]}|	D ]}d}t1          |d         |z  |d         z
  |dz
  dz  z             t1          |d         |z  |d         z
  |dz
  dz  z             f}||vrt/          |          ||<   |                    ||                    t4          j                            t5          j        |t/          |                              | _        |                     dt5          j        |                               ||          d           d S )Nrx   r   r   rH   ry   Frz   )!r.   r/   r|   r}   r~   r   r   r   resolution_outr\   keys_valuesrk   queries_subsamplequeriesr   rL   r   r   r   r   r   r   r   r   r   r   r"   r   r   r   r   r   rp   )r4   rc   rd   r~   r|   r   r8   resolution_inr   r   points_r   len_points_r   r   r   r   sizer   r;   s                      r'   r/   z LevitAttentionSubsample.__init__   s    	#6 d]
.#2W#<?R#RU\_rUr#r "1G";>Q"Q,))T5MNN!/!F!F%i;N1NOO,..()@*MM$&!i'm(<(<eM>R>RSSTTy(~)>)>n@U@UVVWW"%f++s7||K
%'7 	: 	:B : :befnr!u4qA~EFFBqETZN]_`a]bLbfjmnfnrsesLsHtHtu!222034E0F0F%f-089999: !& 2 25;?RTWXiTjTj3k3k l l!5#3G#<#<#A#A+z#Z#Zgl 	 	
 	
 	
 	
 	
r&   Tc                 r    t                                          |           |r| j        ri | _        d S d S d S r=   r   r   s     r'   r   zLevitAttentionSubsample.train  r   r&   c                     | j         r| j        d d | j        f         S t          |          }|| j        vr| j        d d | j        f         | j        |<   | j        |         S r=   r   r   s      r'   r   z,LevitAttentionSubsample.get_attention_biases  r   r&   c                 ^   |j         \  }}}|                     |                              ||| j        d                              | j        | j        | j        z  gd          \  }}|                    dddd          }|                    dddd          }|                     | 	                    |                    }|                    || j
        dz  | j        | j                                      dddd          }||                    dd          z  | j        z  |                     |j                  z   }|                    d          }||z                      dd                              |d| j                  }|                     |                     |                    }|S r   )rV   r   rp   r|   r   r~   r   r   r   r   r   rY   r}   r   r   r   rq   r   r   r   )	r4   rh   rr   r   rs   r   r   r   r   s	            r'   r?   zLevitAttentionSubsample.forward"  s   $0$6!
J\**T*j$*BBGGUDL$"6"EFAUNN 	U
 kk!Q1%%aAq))T33LAABB

:t':A'=t?WY]Yeffnnq!Q
 
 CMM"b111DJ>AZAZ[g[nAoAoo	%%"%--	!E)44Q::BB:rSWSjkkt|'D'DEEr&   r   r   rB   s   @r'   r   r      s        +
 +
 +
 +
 +
Z U]__+ + + + + _+
9 9 9      r&   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitMLPLayerzE
    MLP Layer with `2X` expansion in contrast to ViT with `4X`.
    c                     t                                                       t          ||          | _        t	          j                    | _        t          ||          | _        d S r=   )r.   r/   r\   	linear_upr   rL   r   linear_down)r4   rc   
hidden_dimr;   s      r'   r/   zLevitMLPLayer.__init__=  sO    '	:>>,..)*i@@r&   c                     |                      |          }|                     |          }|                     |          }|S r=   )r   r   r   rg   s     r'   r?   zLevitMLPLayer.forwardC  s=    ~~l33|44''55r&   r@   rB   s   @r'   r   r   8  sV         A A A A A      r&   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitResidualLayerz"
    Residual Block for LeViT
    c                 d    t                                                       || _        || _        d S r=   )r.   r/   module	drop_rate)r4   r   r   r;   s      r'   r/   zLevitResidualLayer.__init__O  s+    "r&   c                    | j         r| j        dk    rt          j        |                    d          dd|j                  }|                    | j                                      d| j        z
                                            }|| 	                    |          |z  z   }|S || 	                    |          z   }|S )Nr   r   )r   )
r   r   r"   randr   r   ge_divdetachr   )r4   rh   rnds      r'   r?   zLevitResidualLayer.forwardT  s    = 	 T^a//*\..q111a@STTTC''$.))--a$..@AAHHJJC'$++l*C*Cc*IIL'$++l*C*CCLr&   r@   rB   s   @r'   r   r   J  sQ         # # # # #
             r&   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )
LevitStagezP
    LeViT Stage consisting of `LevitMLPLayer` and `LevitAttention` layers.
    c                    t                                                       g | _        || _        |
| _        t          |          D ]}| j                            t          t          |||||
          | j        j	                             |dk    rE||z  }| j                            t          t          ||          | j        j	                             |	d         dk    r| j        dz
  |	d         z  dz   | _        | j                            t          | j        j        ||dz            |	d         |	d         |	d         |	d         |
| j        d           | j        | _        |	d         dk    rq| j        j        |dz            |	d         z  }| j                            t          t          | j        j        |dz            |          | j        j	                             t          j        | j                  | _        d S )	Nr   	Subsampler      rH   r   )r~   r|   r   r8   r   r   rG   )r.   r/   layersrT   r   r   r   r   rv   drop_path_rater   r   r   rJ   r   
ModuleList)r4   rT   idxrJ   r~   depthsr|   r   	mlp_ratiodown_opsr   rs   r   r;   s                r'   r/   zLevitStage.__init__d  s    	*v 	 	AK""<:M`mnnK.    1}})I5
""&}\:'N'NPTP[Pjkk   A;+%%#'#5#9hqk"IA"MDK'[-cC!Gm<$QK(0$,QK#A;"/#'#6  
 
 
 "&!4D{Q![5cAg>!L
""&%dk&>sQw&GTTVZVaVp    mDK00r&   c                     | j         S r=   )r   )r4   s    r'   get_resolutionzLevitStage.get_resolution  s    !!r&   c                 0    | j         D ]} ||          }|S r=   )r   )r4   rh   layers      r'   r?   zLevitStage.forward  s*    [ 	/ 	/E 5..LLr&   )r   r   r    r!   r/   r   r?   rA   rB   s   @r'   r   r   _  sa         51 51 51 51 51n" " "      r&   r   c                   *     e Zd ZdZ fdZddZ xZS )LevitEncoderzC
    LeViT Encoder consisting of multiple `LevitStage` stages.
    c                    t                                                       || _        | j        j        | j        j        z  }g | _        | j        j                            dg           t          t          |j
                            D ]}t          |||j        |         |j        |         |j
        |         |j        |         |j        |         |j        |         |j        |         |
  
        }|                                }| j                            |           t%          j        | j                  | _        d S )N )r.   r/   rT   
image_size
patch_sizestagesr   r   r   r   r   r   rJ   r~   r|   r   r   r   r   r   )r4   rT   rm   	stage_idxstager;   s        r'   r/   zLevitEncoder.__init__  s   [+t{/EE
##RD)))s6=1122 	& 	&I#I.y)i(*95&y1 +	* E --//JKu%%%%mDK00r&   FTc                     |rdnd }| j         D ]}|r||fz   } ||          }|r||fz   }|st          d ||fD                       S t          ||          S )Nr%   c              3      K   | ]}||V  	d S r=   r%   ).0vs     r'   	<genexpr>z'LevitEncoder.forward.<locals>.<genexpr>  s"      WWqWWr&   )last_hidden_stater   )r   tupler   )r4   rh   output_hidden_statesreturn_dictall_hidden_statesr   s         r'   r?   zLevitEncoder.forward  s    "6@BBD[ 	/ 	/E# H$5$G! 5..LL 	D 1\O C 	XWW\3D$EWWWWWW-\mnnnnr&   )FTr@   rB   s   @r'   r   r     s^         1 1 1 1 12o o o o o o o or&   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitClassificationLayerz$
    LeViT Classification Layer
    c                     t                                                       t          j        |          | _        t          j        ||          | _        d S r=   )r.   r/   r   rb   r3   r`   ra   )r4   rc   rd   r;   s      r'   r/   z!LevitClassificationLayer.__init__  sA    .33i	:66r&   c                 Z    |                      |          }|                     |          }|S r=   )r3   ra   )r4   rh   r   s      r'   r?   z LevitClassificationLayer.forward  s)    |44\**r&   r@   rB   s   @r'   r   r     sQ         7 7 7 7 7
      r&   r   c                   *    e Zd ZdZeZdZdZdgZd Z	dS )LevitPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    levitrZ   r   c                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j        t          j        f          r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsg        )meanstdNg      ?)
isinstancer   r`   r0   weightdatanormal_rT   initializer_ranger-   zero_rb   r2   fill_)r4   r   s     r'   _init_weightsz"LevitPreTrainedModel._init_weights  s    fry")455 	* M&&CT[5R&SSS{& &&((((( '& @AA 	*K""$$$M$$S)))))	* 	*r&   N)
r   r   r    r!   r   config_classbase_model_prefixmain_input_name_no_split_modulesr  r%   r&   r'   r   r     sH         
 L$O-.
* 
* 
* 
* 
*r&   r   aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`LevitConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aC  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`LevitImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zNThe bare Levit model outputting raw features without any specific head on top.c                        e Zd Z fdZ ee           eeee	de
          	 	 	 d
dej        dee         dee         deeef         fd	                        Z xZS )
LevitModelc                     t                                          |           || _        t          |          | _        t          |          | _        |                                  d S r=   )r.   r/   rT   rD   patch_embeddingsr   encoder	post_initrS   s     r'   r/   zLevitModel.__init__  sW        4V < <#F++r&   vision)
checkpointoutput_typer  modalityexpected_outputNrZ   r   r   returnc                 J   ||n| j         j        }||n| j         j        }|t          d          |                     |          }|                     |||          }|d         }|                    d          }|s||f|dd          z   S t          |||j                  S )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr   )	rT   r   use_return_dictrW   r  r  r  r   r   )r4   rZ   r   r   r>   encoder_outputsr   pooled_outputs           r'   r?   zLevitModel.forward   s     %9$D  $+Jj 	 &1%<kk$+B]?@@@**<88
,,!5# ' 
 
 ,A. *..1.55 	L%}58KKK7/')7
 
 
 	
r&   NNN)r   r   r    r/   r   LEVIT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr"   r#   r   boolr   r   r?   rA   rB   s   @r'   r  r    s        
     +*+ABB&<$.   +//3&*	!
 !
'!
 'tn!
 d^	!

 
u>>	?!
 !
 !
  CB!
 !
 !
 !
 !
r&   r  z
    Levit Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee           eeee	e
          	 	 	 	 d
dej        deej                 dee         dee         deeef         f
d	                        Z xZS )LevitForImageClassificationc                 \   t                                          |           || _        |j        | _        t	          |          | _        |j        dk    r t          |j        d         |j                  nt          j	        
                                | _        |                                  d S Nr   ro   )r.   r/   rT   
num_labelsr  r  r   rJ   r"   r   Identity
classifierr  rS   s     r'   r/   z$LevitForImageClassification.__init__T  s        +''

  1$$ %V%8%<f>OPPP""$$ 	 	r&   r  r  r  r  NrZ   labelsr   r   r  c                    ||n| j         j        }|                     |||          }|d         }|                    d          }|                     |          }d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j	        k    s|j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }	| j        dk    r1 |	|                                |                                          }n |	||          }n| j         j        dk    rGt                      }	 |	|                    d| j                  |                    d                    }n*| j         j        dk    rt                      }	 |	||          }|s|f|d	d         z   }
||f|
z   n|
S t!          |||j        
          S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationro   rH   )lossr   r   )rT   r   r  r  r/  problem_typer-  dtyper"   longintr
   squeezer	   rp   r   r   r   )r4   rZ   r1  r   r   outputssequence_outputr   r6  loss_fctoutputs              r'   r?   z#LevitForImageClassification.forwardd  s   ( &1%<kk$+B]**\@Tbm*nn!!*)..q1111{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE3!/
 
 
 	
r&   )NNNN)r   r   r    r/   r   r$  r   _IMAGE_CLASS_CHECKPOINTr   r&  _IMAGE_CLASS_EXPECTED_OUTPUTr"   r#   r   r   r(  r   r   r?   rA   rB   s   @r'   r*  r*  L  s              +*+ABB*8$4	   +/-1/3&*3
 3
'3
 )*3
 'tn	3

 d^3
 
u::	;3
 3
 3
  CB3
 3
 3
 3
 3
r&   r*  ap  
    LeViT Model transformer with image classification heads on top (a linear layer on top of the final hidden state and
    a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet. .. warning::
           This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
           supported.
    c                        e Zd Z fdZ ee           eeee	e
          	 	 	 d	dej        dee         dee         deeef         fd                        Z xZS )
&LevitForImageClassificationWithTeacherc                    t                                          |           || _        |j        | _        t	          |          | _        |j        dk    r t          |j        d         |j                  nt          j	        
                                | _        |j        dk    r t          |j        d         |j                  nt          j	        
                                | _        |                                  d S r,  )r.   r/   rT   r-  r  r  r   rJ   r"   r   r.  r/  classifier_distillr  rS   s     r'   r/   z/LevitForImageClassificationWithTeacher.__init__  s        +''

  1$$ %V%8%<f>OPPP""$$ 	  1$$ %V%8%<f>OPPP""$$ 	 	r&   r0  NrZ   r   r   r  c                 H   ||n| j         j        }|                     |||          }|d         }|                    d          }|                     |          |                     |          }}||z   dz  }|s|||f|dd          z   }	|	S t          ||||j                  S )Nr  r   r   rH   )r   r   r   r   )rT   r   r  r  r/  rE  r   r   )
r4   rZ   r   r   r<  r=  r   distill_logitsr   r?  s
             r'   r?   z.LevitForImageClassificationWithTeacher.forward  s     &1%<kk$+B]**\@Tbm*nn!!*)..q11%)___%E%EtG^G^_nGoGoN
~-2 	j.9GABBKGFM;! .!/	
 
 
 	
r&   r#  )r   r   r    r/   r   r$  r   r@  r   r&  rA  r"   r#   r   r(  r   r   r?   rA   rB   s   @r'   rC  rC    s            * +*+ABB*@$4	   +//3&*	
 
'
 'tn
 d^	

 
uBB	C
 
 
  CB
 
 
 
 
r&   rC  )8r!   r   dataclassesr   typingr   r   r   r"   torch.utils.checkpointr   torch.nnr   r	   r
   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   r   configuration_levitr   
get_loggerr   loggerr&  r%  r'  r@  rA  r   Moduler)   rD   r\   rk   rv   r   r   r   r   r   r   r   LEVIT_START_DOCSTRINGr$  r  r*  rC  r%   r&   r'   <module>rT     s         ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) )            A A A A A A A A A A            . - - - - - u u u u u u u u u u u u , , , , , , 
	H	%	%   , %  0 1  = = = = =; = = =2    ")   ()5 )5 )5 )5 )529 )5 )5 )5X	 	 	 	 	RY 	 	 	    RY   ; ; ; ; ;RY ; ; ;|P P P P Pbi P P Pf    BI   $               *B B B B B B B BJ+o +o +o +o +o29 +o +o +o\    ry    * * * * *? * * *0	   T 2
 2
 2
 2
 2
% 2
 2
	 2
j   K
 K
 K
 K
 K
"6 K
 K
 K
\   5
 5
 5
 5
 5
-A 5
 5
 5
 5
 5
r&   