
    gn              	       
   d Z ddlZddlZddlmZmZmZmZ ddlZddl	m
c mZ ddlZddlm
Z
 ddlmZmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZmZm Z  ddl!m"Z"  e j#        e$          Z%dZ&dZ'g dZ(dZ)dZ*d3dej+        de,de-dej+        fdZ. G d de
j/                  Z0 G d de
j/                  Z1 G d de
j/                  Z2 G d d e
j/                  Z3 G d! d"e
j/                  Z4 G d# d$e
j/                  Z5 G d% d&e
j/                  Z6 G d' d(e
j/                  Z7 G d) d*e          Z8d+Z9d,Z: ed-e9           G d. d/e8                      Z; ed0e9           G d1 d2e8                      Z<dS )4zPyTorch PVT model.    N)IterableOptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )	PvtConfigr   zZetatech/pvt-tiny-224)r   2   i   ztabby, tabby cat        Finput	drop_probtrainingreturnc                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchrandr   r    floor_div)r   r   r   	keep_probr!   random_tensoroutputs          `/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/pvt/modeling_pvt.py	drop_pathr+   6   s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FM    c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
PvtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 V    t                                                       || _        d S N)super__init__r   )selfr   	__class__s     r*   r2   zPvtDropPath.__init__N   s$    "r,   hidden_statesc                 8    t          || j        | j                  S r0   )r+   r   r   r3   r5   s     r*   forwardzPvtDropPath.forwardR   s    FFFr,   c                 6    d                     | j                  S )Nzp={})formatr   )r3   s    r*   
extra_reprzPvtDropPath.extra_reprU   s    }}T^,,,r,   r0   )__name__
__module____qualname____doc__r   floatr2   r#   Tensorr8   strr;   __classcell__r4   s   @r*   r.   r.   K   s        bb# #(5/ #T # # # # # #GU\ Gel G G G G-C - - - - - - - -r,   r.   c                        e Zd ZdZ	 ddedeeee         f         deeee         f         dededed	ef fd
Z	de
j        dedede
j        fdZde
j        dee
j        eef         fdZ xZS )PvtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    Fconfig
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc                    t                                                       || _        t          |t          j        j                  r|n||f}t          |t          j        j                  r|n||f}|d         |d         z  |d         |d         z  z  }|| _        || _        || _	        || _
        t          j        t          j        d|r|dz   n||                    | _        |r(t          j        t          j        dd|                    nd | _        t          j        ||||          | _        t          j        ||j                  | _        t          j        |j                  | _        d S )Nr   r   kernel_sizerJ   eps)p)r1   r2   rG   
isinstancecollectionsabcr   rH   rI   rK   num_patchesr   	Parameterr#   randnposition_embeddingszerosrM   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)
r3   rG   rH   rI   rJ   rK   rL   rM   rW   r4   s
            r*   r2   zPvtPatchEmbeddings.__init__`   s\    	#-j+/:R#S#SqZZZdfpYq
#-j+/:R#S#SqZZZdfpYq
!!}
15*Q-:VW=:XY$$(&#%<KiH;??[+VV$
 $
  JS\ek!Q&D&DEEEX\)L+6Zdeee,{8MNNNzF$>???r,   
embeddingsheightwidthr   c                    ||z  }t           j                                        s$|| j        j        | j        j        z  k    r| j        S |                    d||d                              dddd          }t          j	        |||fd          }|                    dd||z                                ddd          }|S )Nr   r   r      bilinear)sizemode)
r#   jit
is_tracingrG   rH   rZ   reshapepermuteFinterpolate)r3   rd   re   rf   rW   interpolated_embeddingss         r*   interpolate_pos_encodingz+PvtPatchEmbeddings.interpolate_pos_encoding|   s    un y##%% 	,+9ORVR]Rh9h*h*h++''65"==EEaAqQQ
"#-
&%Wa"b"b"b"9"A"A!RRW"X"X"`"`abdegh"i"i&&r,   pixel_valuesc                    |j         \  }}}}|| j        k    rt          d          |                     |          }|j         ^ }}}|                    d                              dd          }|                     |          }| j        | j                            |dd          }	t          j
        |	|fd          }|                     | j        d d dd f         ||          }
t          j
        | j        d d d df         |
fd          }
n|                     | j        ||          }
|                     ||
z             }|||fS )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.ri   r   rh   dim)r!   rK   
ValueErrorr]   flatten	transposer`   rM   expandr#   catrt   rZ   rc   )r3   ru   
batch_sizerK   re   rf   patch_embed_rd   rM   rZ   s              r*   r8   zPvtPatchEmbeddings.forward   so   2>2D/
L&%4,,,w   ool33'-FE!))!,,66q!<<__[11
>%--j"bAAIIz#:BBBJ"&"?"?@XYZYZYZ\]\^\^Y^@_agin"o"o"')T-Eaaa!e-LNa,bhi"j"j"j"&"?"?@XZ`bg"h"h\\*/B"BCC
65((r,   F)r<   r=   r>   r?   r   r   intr   boolr2   r#   rA   rt   r   r8   rC   rD   s   @r*   rF   rF   Y   s(          @ @@ #x},-@ #x},-	@
 @ @ @ @ @ @ @ @ @8	'5< 	' 	'UX 	']b]i 	' 	' 	' 	')EL )U5<c;Q5R ) ) ) ) ) ) ) )r,   rF   c                   L     e Zd Zdedef fdZdej        dej        fdZ xZ	S )PvtSelfOutputrG   rL   c                     t                                                       t          j        ||          | _        t          j        |j                  | _        d S r0   )r1   r2   r   Lineardensera   rb   rc   )r3   rG   rL   r4   s      r*   r2   zPvtSelfOutput.__init__   sD    Y{K88
z&"<==r,   r5   r   c                 Z    |                      |          }|                     |          }|S r0   )r   rc   r7   s     r*   r8   zPvtSelfOutput.forward   s*    

=11]33r,   )
r<   r=   r>   r   r   r2   r#   rA   r8   rC   rD   s   @r*   r   r      sq        >y >s > > > > > >
U\ el        r,   r   c                        e Zd ZdZdedededef fdZdedej	        fd	Z
	 ddej	        dedededeej	                 f
dZ xZS )PvtEfficientSelfAttentionzpEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://arxiv.org/abs/2102.12122).rG   rL   num_attention_headssequences_reduction_ratioc                 
   t                                                       || _        || _        | j        | j        z  dk    r t	          d| j         d| j         d          t          | j        | j        z            | _        | j        | j        z  | _        t          j	        | j        | j        |j
                  | _        t          j	        | j        | j        |j
                  | _        t          j	        | j        | j        |j
                  | _        t          j        |j                  | _        || _        |dk    r?t          j        ||||          | _        t          j        ||j                  | _        d S d S )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rO   rQ   )r1   r2   rL   r   ry   r   attention_head_sizeall_head_sizer   r   qkv_biasquerykeyvaluera   attention_probs_dropout_probrc   r   r\   sequence_reductionr^   r_   r`   r3   rG   rL   r   r   r4   s        r*   r2   z"PvtEfficientSelfAttention.__init__   s~    	&#6 d66!;;6D$4 6 626 6 6  
 $'t'7$:R'R#S#S !58PPYt/1C&/ZZZ
9T-t/AXXXYt/1C&/ZZZ
z&"EFF)B&$q((&(i[6OXq' ' 'D# !l;F<QRRRDOOO	 )(r,   r5   r   c                     |                                 d d         | j        | j        fz   }|                    |          }|                    dddd          S )Nrh   r   ri   r   r   )rk   r   r   viewrp   )r3   r5   	new_shapes      r*   transpose_for_scoresz.PvtEfficientSelfAttention.transpose_for_scores   sY    !&&(("-1I4Kc0dd	%**955$$Q1a000r,   Fre   rf   output_attentionsc                 "   |                      |                     |                    }| j        dk    r|j        \  }}}|                    ddd                              ||||          }|                     |          }|                    ||d                              ddd          }|                     |          }|                      |                     |                    }	|                      | 	                    |                    }
t          j        ||	                    dd                    }|t          j        | j                  z  }t           j                            |d          }|                     |          }t          j        ||
          }|                    dddd                                          }|                                d d         | j        fz   }|                    |          }|r||fn|f}|S )Nr   r   ri   rh   rw   r   )r   r   r   r!   rp   ro   r   r`   r   r   r#   matmulr{   mathsqrtr   r   
functionalsoftmaxrc   
contiguousrk   r   r   )r3   r5   re   rf   r   query_layerr~   seq_lenrK   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                   r*   r8   z!PvtEfficientSelfAttention.forward   s    //

=0I0IJJ)A--0=0C-J)11!Q::BB:|]cejkkM 33MBBM)11*lBOOWWXY[\^_``M OOM::M--dhh}.E.EFF	//

=0I0IJJ !<Y5H5HR5P5PQQ+di8P.Q.QQ -//0@b/II ,,77_kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]r,   r   )r<   r=   r>   r?   r   r   r@   r2   r#   rA   r   r   r   r8   rC   rD   s   @r*   r   r      s        zzSS.1SHKShmS S S S S S:1# 1%, 1 1 1 1 #(* *|* * 	*
  * 
u|	* * * * * * * *r,   r   c                   v     e Zd Zdedededef fdZd Z	 ddej	        d	ed
ede
deej	                 f
dZ xZS )PvtAttentionrG   rL   r   r   c                     t                                                       t          ||||          | _        t	          ||          | _        t                      | _        d S )N)rL   r   r   )rL   )r1   r2   r   r3   r   r)   setpruned_headsr   s        r*   r2   zPvtAttention.__init__   se     	-# 3&?	
 
 
	 $FDDDEEr,   c                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rw   )lenr   r3   r   r   r   r   r   r   r   r)   r   r   union)r3   headsindexs      r*   prune_headszPvtAttention.prune_heads	  s    u::??F7490$)2OQUQb
 
u
 -TY_eDD	*49=%@@	,TY_eDD	.t{/@%QOOO )-	(EE

(R	%"&)"?$)B_"_	 -33E::r,   Fr5   re   rf   r   r   c                     |                      ||||          }|                     |d                   }|f|dd          z   }|S )Nr   r   )r3   r)   )r3   r5   re   rf   r   self_outputsattention_outputr   s           r*   r8   zPvtAttention.forward  sM     yy?PQQ;;|A77#%QRR(88r,   r   )r<   r=   r>   r   r   r@   r2   r   r#   rA   r   r   r8   rC   rD   s   @r*   r   r      s        "".1"HK"hm" " " " " "; ; ;& _d "\36?BW[	u|	       r,   r   c            
       r     e Zd Z	 	 d
dededee         dee         f fdZdej        dej        fd	Z	 xZ
S )PvtFFNNrG   in_featureshidden_featuresout_featuresc                 t   t                                                       ||n|}t          j        ||          | _        t          |j        t                    rt          |j                 | _	        n|j        | _	        t          j        ||          | _
        t          j        |j                  | _        d S r0   )r1   r2   r   r   dense1rT   
hidden_actrB   r   intermediate_act_fndense2ra   rb   rc   )r3   rG   r   r   r   r4   s        r*   r2   zPvtFFN.__init__&  s     	'3'?||[i_==f'-- 	9'-f.?'@D$$'-'8D$i>>z&"<==r,   r5   r   c                     |                      |          }|                     |          }|                     |          }|                     |          }|                     |          }|S r0   )r   r   rc   r   r7   s     r*   r8   zPvtFFN.forward7  s_    M2200??]33M22]33r,   )NN)r<   r=   r>   r   r   r   r2   r#   rA   r8   rC   rD   s   @r*   r   r   %  s        
 *.&*> >> > "#	>
 sm> > > > > >"U\ el        r,   r   c                   \     e Zd Zdedededededef fdZdd	ej        d
edede	fdZ
 xZS )PvtLayerrG   rL   r   r+   r   	mlp_ratioc                    t                                                       t          j        ||j                  | _        t          ||||          | _        |dk    rt          |          nt          j	                    | _
        t          j        ||j                  | _        t          ||z            }t          |||          | _        d S )NrQ   )rG   rL   r   r   r   )rG   r   r   )r1   r2   r   r^   r_   layer_norm_1r   	attentionr.   Identityr+   layer_norm_2r   r   mlp)	r3   rG   rL   r   r+   r   r   mlp_hidden_sizer4   s	           r*   r2   zPvtLayer.__init__A  s     	L&:OPPP%# 3&?	
 
 
 4=s??Y///L&:OPPPkI566[Rabbbr,   Fr5   re   rf   r   c                 F   |                      |                     |          |||          }|d         }|dd          }|                     |          }||z   }|                     |                     |                    }|                     |          }||z   }	|	f|z   }|S )N)r5   re   rf   r   r   r   )r   r   r+   r   r   )
r3   r5   re   rf   r   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r*   r8   zPvtLayer.forwardW  s    !%++M::/	 "0 "
 "
 2!4(,>>*:;;(=8XXd//>>??
^^J//
$z1/G+r,   r   )r<   r=   r>   r   r   r@   r2   r#   rA   r   r8   rC   rD   s   @r*   r   r   @  s        cc c !	c
 c $)c c c c c c c, U\ 3 s _c        r,   r   c                        e Zd Zdef fdZ	 	 	 ddej        dee         dee         dee         d	e	e
ef         f
d
Z xZS )
PvtEncoderrG   c                    t                                                       || _        t          j        d|j        t          |j                                                            }g }t          |j
                  D ]}|                    t          ||dk    r|j        n| j        j        d|dz   z  z  |j        |         |j        |         |dk    r|j        n|j        |dz
           |j        |         ||j
        dz
  k                         t%          j        |          | _        g }d}t          |j
                  D ]}g }|dk    r||j        |dz
           z  }t          |j        |                   D ]_}|                    t+          ||j        |         |j        |         |||z            |j        |         |j        |                              `|                    t%          j        |                     t%          j        |          | _        t%          j        |j        d         |j                  | _        d S )Nr   ri   r   )rG   rH   rI   rJ   rK   rL   rM   )rG   rL   r   r+   r   r   rh   rQ   )r1   r2   rG   r#   linspacedrop_path_ratesumdepthstolistrangenum_encoder_blocksappendrF   rH   patch_sizesstridesrK   hidden_sizesr   
ModuleListpatch_embeddingsr   r   sequence_reduction_ratios
mlp_ratiosblockr^   r_   r`   )
r3   rG   drop_path_decaysrd   iblockscurlayersjr4   s
            r*   r2   zPvtEncoder.__init__o  s[    !>!V-BCDVDVWW^^`` 
v011 	 	A"!45FFv00@V[\abefaf[g@h%1!4!>!,89Q!4!4FDWXY\]X]D^ & 3A 66#<q#@@  
 
 
 
 !#j 9 9 v011 	1 	1AFAvvv}QU++6=+,, 
 
%$*$7$:,2,Fq,I"237";282RST2U"("3A"6  	 	 	 	 MM"-//0000]6**
 ,v':2'>FDYZZZr,   FTru   r   output_hidden_statesreturn_dictr   c                 l   |rdnd }|rdnd }|j         d         }t          | j                  }|}	t          t	          | j        | j                            D ]\  }
\  }} ||	          \  }	}}|D ].} ||	|||          }|d         }	|r||d         fz   }|r||	fz   }/|
|dz
  k    r@|	                    |||d                              dddd                                          }	| 	                    |	          }	|r||	fz   }|st          d |	||fD                       S t          |	||          S )	N r   r   rh   r   ri   c              3      K   | ]}||V  	d S r0   r   ).0vs     r*   	<genexpr>z%PvtEncoder.forward.<locals>.<genexpr>  s(      mmq_`_l_l_l_l_lmmr,   last_hidden_stater5   
attentions)r!   r   r   	enumeratezipr   ro   rp   r   r`   tupler   )r3   ru   r   r   r   all_hidden_statesall_self_attentionsr~   
num_blocksr5   idxembedding_layerblock_layerre   rf   r   layer_outputss                    r*   r8   zPvtEncoder.forward  s    #7@BBD$5?bb4!'*
__
$3<SAVX\Xb=c=c3d3d 	v 	v/C//;+:?=+I+I(M65$ M M %mVUDU V V -a 0$ T*=qAQ@S*S'' M(9]<L(L%j1n$$ - 5 5j&%QS T T \ \]^`acdfg h h s s u u66 	E 1]4D D 	nmm]4EGZ$[mmmmmm++*
 
 
 	
r,   )FFT)r<   r=   r>   r   r2   r#   FloatTensorr   r   r   r   r   r8   rC   rD   s   @r*   r   r   n  s        0[y 0[ 0[ 0[ 0[ 0[ 0[j -2/4&*#
 #
'#
 $D>#
 'tn	#

 d^#
 
uo%	&#
 #
 #
 #
 #
 #
 #
 #
r,   r   c                   b    e Zd ZdZeZdZdZg Zde	e
j        e
j        e
j        f         ddfdZdS )PvtPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    pvtru   moduler   Nc                    t          |t          j                  rit          j                            |j        j        d| j        j                  |j        _        |j	         |j	        j        
                                 dS dS t          |t          j                  r?|j	        j        
                                 |j        j                            d           dS t          |t                    rt          j                            |j        j        d| j        j                  |j        _        |j        Dt          j                            |j        j        d| j        j                  |j        _        dS dS dS )zInitialize the weightsr   )meanstdNg      ?)rT   r   r   inittrunc_normal_weightdatarG   initializer_ranger   zero_r^   fill_rF   rZ   rM   )r3   r	  s     r*   _init_weightsz PvtPreTrainedModel._init_weights  s`   fbi(( 	 "$!6!6v}7IPSY]YdYv!6!w!wFM{& &&((((( '&-- 	K""$$$M$$S))))) 233 	.0g.C.C*/K1 /D / /F&+
 +(*(=(=$)5 )> ) ) %%%	 	 ,+r,   )r<   r=   r>   r?   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r   r\   r^   r  r   r,   r*   r  r    sk         
 L$OE")RY*L$M RV      r,   r  aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`~PvtConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a
  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See [`PvtImageProcessor.__call__`]
            for details.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zSThe bare Pvt encoder outputting raw hidden-states without any specific head on top.c                        e Zd Zdef fdZd Z ee                    d                     e	e
eede          	 	 	 ddej        d	ee         d
ee         dee         deeef         f
d                        Z xZS )PvtModelrG   c                     t                                          |           || _        t          |          | _        |                                  d S r0   )r1   r2   rG   r   encoder	post_initr3   rG   r4   s     r*   r2   zPvtModel.__init__  sK        "&)) 	r,   c                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr   r   )r3   heads_to_pruner!  r   s       r*   _prune_headszPvtModel._prune_heads  sU    
 +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr,   %(batch_size, channels, height, width)vision)
checkpointoutput_typer  modalityexpected_outputNru   r   r   r   r   c                     ||n| j         j        }||n| j         j        }||n| j         j        }|                     ||||          }|d         }|s|f|dd          z   S t          ||j        |j                  S )Nru   r   r   r   r   r   r   )rG   r   r   use_return_dictr  r   r5   r   )r3   ru   r   r   r   encoder_outputssequence_outputs          r*   r8   zPvtModel.forward  s     2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B],,%/!5#	 ' 
 
 *!, 	<#%(;;;-)7&1
 
 
 	
r,   )NNN)r<   r=   r>   r   r2   r#  r   PVT_INPUTS_DOCSTRINGr:   r   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr#   r  r   r   r   r   r8   rC   rD   s   @r*   r  r    s       
y      C C C +*+?+F+FGn+o+opp&#$.   -1/3&*
 
'
 $D>
 'tn	

 d^
 
uo%	&
 
 
  qp
 
 
 
 
r,   r  z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                   $    e Zd Zdeddf fdZ ee                    d                     ee	e
ee          	 	 	 	 ddeej                 deej                 d	ee         d
ee         dee         deee
f         fd                        Z xZS )PvtForImageClassificationrG   r   Nc                 B   t                                          |           |j        | _        t          |          | _        |j        dk    r%t          j        |j        d         |j                  nt          j                    | _	        | 
                                 d S )Nr   rh   )r1   r2   
num_labelsr  r  r   r   r   r   
classifierr  r  s     r*   r2   z"PvtForImageClassification.__init__L  s        +F## FLEVYZEZEZBIf)"-v/@AAA`b`k`m`m 	
 	r,   r$  )r&  r'  r  r)  ru   labelsr   r   r   c                    ||n| j         j        }|                     ||||          }|d         }|                     |dddddf                   }d}	|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j        k    s|j        t          j	        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }
| j        dk    r1 |
|                                |                                          }	n |
||          }	n| j         j        dk    rGt                      }
 |
|                    d| j                  |                    d                    }	n*| j         j        dk    rt                      }
 |
||          }	|s|f|dd         z   }|	|	f|z   n|S t          |	||j        |j        	          S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr+  r   r   
regressionsingle_label_classificationmulti_label_classificationrh   )losslogitsr5   r   )rG   r,  r  r7  problem_typer6  r   r#   longr   r
   squeezer	   r   r   r   r5   r   )r3   ru   r8  r   r   r   r   r.  r>  r=  loss_fctr)   s               r*   r8   z!PvtForImageClassification.forwardZ  s   * &1%<kk$+B]((%/!5#	  
 
 "!*Aqqq!9::{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE$!/)	
 
 
 	
r,   )NNNN)r<   r=   r>   r   r2   r   r/  r:   r   _IMAGE_CLASS_CHECKPOINTr   r1  _IMAGE_CLASS_EXPECTED_OUTPUTr   r#   rA   r   r   r   r8   rC   rD   s   @r*   r4  r4  D  s%       y T       +*+?+F+FGn+o+opp*)$4	   *.,0/3&*;
 ;
u|,;
 &;
 $D>	;

 'tn;
 d^;
 
u++	,;
 ;
 ;
  qp;
 ;
 ;
 ;
 ;
r,   r4  )r   F)=r?   rU   r   typingr   r   r   r   r#   torch.nn.functionalr   r   rq   torch.utils.checkpointtorch.nnr   r	   r
   activationsr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   configuration_pvtr   
get_loggerr<   loggerr1  r0  r2  rC  rD  rA   r@   r   r+   Moduler.   rF   r   r   r   r   r   r   r  PVT_START_DOCSTRINGr/  r  r4  r   r,   r*   <module>rS     s%  "        3 3 3 3 3 3 3 3 3 3 3 3                     A A A A A A A A A A ! ! ! ! ! ! F F F F F F F F - - - - - - Q Q Q Q Q Q Q Q            ) ( ( ( ( ( 
	H	%	%- % 1 1  U\ e T V[Vb    *- - - - -") - - -A) A) A) A) A) A) A) A)H	 	 	 	 	BI 	 	 	O O O O O	 O O Od' ' ' ' '29 ' ' 'T    RY   6+ + + + +ry + + +\V
 V
 V
 V
 V
 V
 V
 V
r! ! ! ! ! ! ! !H	    Y 7
 7
 7
 7
 7
! 7
 7
	 7
t   Q
 Q
 Q
 Q
 Q
 2 Q
 Q
 Q
 Q
 Q
r,   