
    g֘                        d Z ddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZmZmZmZmZm Z  ddl!m"Z"  ej#        e$          Z%dZ&dZ'g dZ(e G d de                      Z) G d dej*                  Z+ G d dej*                  Z, G d dej*                  Z- G d dej*                  Z. G d dej*                  Z/ G d de/          Z0 G d d ej*                  Z1 G d! d"ej*                  Z2 G d# d$e2          Z3 G d% d&ej*                  Z4 G d' d(ej*                  Z5e2e3d)Z6 G d* d+ej*                  Z7 G d, d-ej*                  Z8 G d. d/e          Z9d0Z:d1Z; ed2e:           G d3 d4e9                      Z< G d5 d6ej*                  Z= G d7 d8ej*                  Z> ed9e:           G d: d;e9                      Z?dS )<zPyTorch YOLOS model.    N)	dataclass)DictListOptionalSetTupleUnion)nn   )ACT2FN)BaseModelOutputBaseModelOutputWithPooling)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )YolosConfigr   zhustvl/yolos-small)r   iI  i  c                   ,   e Zd ZU dZdZeej                 ed<   dZ	ee
         ed<   dZej        ed<   dZej        ed<   dZeee
                  ed<   dZeej                 ed<   dZeeej                          ed	<   dZeeej                          ed
<   dS )YolosObjectDetectionOutputaG
  
    Output type of [`YolosForObjectDetection`].

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` are provided)):
            Total loss as a linear combination of a negative log-likehood (cross-entropy) for class prediction and a
            bounding box loss. The latter is defined as a linear combination of the L1 loss and the generalized
            scale-invariant IoU loss.
        loss_dict (`Dict`, *optional*):
            A dictionary containing the individual losses. Useful for logging.
        logits (`torch.FloatTensor` of shape `(batch_size, num_queries, num_classes + 1)`):
            Classification logits (including no-object) for all queries.
        pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
            Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
            values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
            possible padding). You can use [`~YolosImageProcessor.post_process`] to retrieve the unnormalized bounding
            boxes.
        auxiliary_outputs (`list[Dict]`, *optional*):
            Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
            and labels are provided. It is a list of dictionaries containing the two above keys (`logits` and
            `pred_boxes`) for each decoder layer.
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the decoder of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of
            the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
            the self-attention heads.
    Nloss	loss_dictlogits
pred_boxesauxiliary_outputslast_hidden_statehidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r    r   r!   r"   r   r#        d/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/yolos/modeling_yolos.pyr   r   3   s          B )-D(5$
%,,, $Ix~$$$ $FE$$$$(J!(((.2xT
+22259x 129998<M8E%"345<<<59Ju01299999r,   r   c                   P     e Zd ZdZdeddf fdZdej        dej        fdZ xZ	S )YolosEmbeddingszT
    Construct the CLS token, detection tokens, position and patch embeddings.

    configreturnNc                 F   t                                                       t          j        t	          j        dd|j                            | _        t          j        t	          j        d|j        |j                            | _	        t          |          | _        | j        j        }t          j        t	          j        d||j        z   dz   |j                            | _        t          j        |j                  | _        t#          |          | _        || _        d S Nr   )super__init__r
   	Parameterr(   zeroshidden_size	cls_tokennum_detection_tokensdetection_tokensYolosPatchEmbeddingspatch_embeddingsnum_patchesposition_embeddingsDropouthidden_dropout_probdropout$InterpolateInitialPositionEmbeddingsinterpolationr0   )selfr0   r>   	__class__s      r-   r5   zYolosEmbeddings.__init__f   s    ek!Q8J&K&KLL "U[F<WY_Yk-l-l m m 4V < <+7#%<K;)DDqH&J\]]$
 $
  z&"<==A&IIr,   pixel_valuesc                    |j         \  }}}}|                     |          }|                                \  }}}| j                            |dd          }	| j                            |dd          }
t          j        |	||
fd          }|                     | j	        ||f          }||z   }| 
                    |          }|S )Nr   dim)shaper=   sizer9   expandr;   r(   catrD   r?   rB   )rE   rG   
batch_sizenum_channelsheightwidth
embeddingsseq_len_
cls_tokensr;   r?   s               r-   forwardzYolosEmbeddings.forwardu   s    2>2D/
L&%**<88
!+!2!2
GQ ^**:r2>>
077
BKKY
J8HIqQQQ
 #001IFTY?[["55
\\*--
r,   
r$   r%   r&   r'   r   r5   r(   TensorrX   __classcell__rF   s   @r-   r/   r/   `   s{         
{ t      EL U\        r,   r/   c                   8     e Zd Zd fdZddej        fdZ xZS )rC   r1   Nc                 V    t                                                       || _        d S Nr4   r5   r0   rE   r0   rF   s     r-   r5   z-InterpolateInitialPositionEmbeddings.__init__   $    r,   i   i@  c                    |d d dd d f         }|d d d f         }|d d | j         j         d d d f         }|d d d| j         j         d d f         }|                    dd          }|j        \  }}}| j         j        d         | j         j        z  | j         j        d         | j         j        z  }
}	|                    |||	|
          }|\  }}|| j         j        z  || j         j        z  }}t          j        	                    |||fdd          }|
                    d                              dd          }t          j        |||fd          }|S )Nr   r      bicubicFrM   modealign_cornersrJ   )r0   r:   	transposerL   
image_size
patch_sizeviewr
   
functionalinterpolateflattenr(   rO   )rE   	pos_embedimg_sizecls_pos_embeddet_pos_embedpatch_pos_embedrP   r8   rU   patch_heightpatch_widthrR   rS   new_patch_heigthnew_patch_widthscale_pos_embeds                   r-   rX   z,InterpolateInitialPositionEmbeddings.forward   s   !!!!Q'*%aaag.!!!!dk&F%F%H%H!!!"KL#AAAqDK,L+L'Laaa$OP)33Aq99+:+@(
K K"1%)??K"1%)?? " *..z;Vabb ,2dk6L,LeW[WbWmNm/-33#3_"EIej 4 
 
 *11!44>>q!DD)]O]$SYZ[[[r,   r1   Nrc   r$   r%   r&   r5   r(   rZ   rX   r[   r\   s   @r-   rC   rC      s_              %,        r,   rC   c                   8     e Zd Zd fdZddej        fdZ xZS ) InterpolateMidPositionEmbeddingsr1   Nc                 V    t                                                       || _        d S r_   r`   ra   s     r-   r5   z)InterpolateMidPositionEmbeddings.__init__   rb   r,   rc   c                    |d d d d dd d f         }|d d d f         }|d d d d | j         j         d d d f         }|d d d d d| j         j         d d f         }|                    dd          }|j        \  }}}}	| j         j        d         | j         j        z  | j         j        d         | j         j        z  }}
|                    ||z  ||
|          }|\  }}|| j         j        z  || j         j        z  }}t          j        	                    |||fdd          }|
                    d                              dd                                                              ||||z  |          }t          j        |||fd          }|S )	Nr   r   re   r   rf   Frg   rJ   )r0   r:   rj   rL   rk   rl   rm   r
   rn   ro   rp   
contiguousr(   rO   )rE   rq   rr   rs   rt   ru   depthrP   r8   rU   rv   rw   rR   rS   new_patch_heightry   rz   s                    r-   rX   z(InterpolateMidPositionEmbeddings.forward   s   !!!!QQQ111*-%aaag.!!!!QQQ)I(I(K(KQQQ"NO#AAAqqq!t{/O.O*OQRQRQR$RS)33Aq992A2G/z; K"1%)??K"1%)?? " *..uz/A;P\^ijj ,2dk6L,LeW[WbWmNm/-33#3_"EIej 4 
 
 ##A&&Yq!__Z\\T%%5%GUU	 	  )]O]$SYZ[[[r,   r{   r|   r}   r\   s   @r-   r   r      s_              %,        r,   r   c                   F     e Zd ZdZ fdZdej        dej        fdZ xZS )r<   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    c                    t                                                       |j        |j        }}|j        |j        }}t          |t          j        j	                  r|n||f}t          |t          j        j	                  r|n||f}|d         |d         z  |d         |d         z  z  }|| _        || _        || _        || _
        t          j        ||||          | _        d S )Nr   r   )kernel_sizestride)r4   r5   rk   rl   rQ   r8   
isinstancecollectionsabcIterabler>   r
   Conv2d
projection)rE   r0   rk   rl   rQ   r8   r>   rF   s          r-   r5   zYolosPatchEmbeddings.__init__   s    !'!2F4EJ
$*$79Kk#-j+/:R#S#SqZZZdfpYq
#-j+/:R#S#SqZZZdfpYq
!!}
15*Q-:VW=:XY$$(&)L+:^hiiir,   rG   r1   c                     |j         \  }}}}|| j        k    rt          d          |                     |                              d                              dd          }|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.re   r   )rL   rQ   
ValueErrorr   rp   rj   )rE   rG   rP   rQ   rR   rS   rT   s          r-   rX   zYolosPatchEmbeddings.forward   sm    2>2D/
L&%4,,,w   __\22::1==GG1MM
r,   )	r$   r%   r&   r'   r5   r(   rZ   rX   r[   r\   s   @r-   r<   r<      sm         j j j j jEL U\        r,   r<   c            
            e Zd Zdeddf fdZdej        dej        fdZ	 ddeej                 d	e	de
eej        ej        f         eej                 f         fd
Z xZS )YolosSelfAttentionr0   r1   Nc                    t                                                       |j        |j        z  dk    r1t	          |d          s!t          d|j        f d|j         d          |j        | _        t          |j        |j        z            | _        | j        | j        z  | _        t          j
        |j        | j        |j                  | _        t          j
        |j        | j        |j                  | _        t          j
        |j        | j        |j                  | _        t          j        |j                  | _        d S )Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .)bias)r4   r5   r8   num_attention_headshasattrr   intattention_head_sizeall_head_sizer
   Linearqkv_biasquerykeyvaluer@   attention_probs_dropout_probrB   ra   s     r-   r5   zYolosSelfAttention.__init__   s1    ::a??PVXhHiHi?76#5"7 7 737 7 7  
 $*#= #&v'9F<V'V#W#W !58PPYv143EFO\\\
9V/1C&/ZZZYv143EFO\\\
z&"EFFr,   xc                     |                                 d d         | j        | j        fz   }|                    |          }|                    dddd          S )NrI   r   re   r   r   )rM   r   r   rm   permute)rE   r   new_x_shapes      r-   transpose_for_scoresz'YolosSelfAttention.transpose_for_scores   sP    ffhhssmt'?AY&ZZFF;yyAq!$$$r,   F	head_maskoutput_attentionsc                    |                      |          }|                     |                     |                    }|                     |                     |                    }|                     |          }t	          j        ||                    dd                    }|t          j        | j	                  z  }t          j                            |d          }	|                     |	          }	||	|z  }	t	          j        |	|          }
|
                    dddd                                          }
|
                                d d         | j        fz   }|
                    |          }
|r|
|	fn|
f}|S )NrI   rJ   r   re   r   r   )r   r   r   r   r(   matmulrj   mathsqrtr   r
   rn   softmaxrB   r   r   rM   r   rm   )rE   r"   r   r   mixed_query_layer	key_layervalue_layerquery_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                r-   rX   zYolosSelfAttention.forward  sr    !JJ}55--dhh}.E.EFF	//

=0I0IJJ//0ABB !<Y5H5HR5P5PQQ+di8P.Q.QQ -//0@b/II ,,77  -	9O_kBB%--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCC6G]=/22mM]r,   NF)r$   r%   r&   r   r5   r(   rZ   r   r   boolr	   r   rX   r[   r\   s   @r-   r   r      s        G{ Gt G G G G G G$%el %u| % % % % bg! !(0(>!Z^!	uU\5</0%2EE	F! ! ! ! ! ! ! !r,   r   c                        e Zd Zdeddf fdZ	 	 d
dej        deej                 de	de
eej        ej        f         eej                 f         f fd	Z xZS )YolosSdpaSelfAttentionr0   r1   Nc                 b    t                                          |           |j        | _        d S r_   )r4   r5   r   ra   s     r-   r5   zYolosSdpaSelfAttention.__init__)  s,       ,2,O)))r,   Fr"   r   r   c           	         |s|>t                               d           t                                          |||          S |                     |          }|                     |                     |                    }|                     |                     |                    }|                     |          }t          j	        j
                            ||||| j        r| j        nddd           }|                    dddd	                                          }|                                d d
         | j        fz   }	|                    |	          }|d fS )Na  `YolosSdpaAttention` is used but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `head_mask`. Falling back to the manual attention implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.)r"   r   r           F)	is_causalscaler   re   r   r   r   )loggerwarning_oncer4   rX   r   r   r   r   r(   r
   rn   scaled_dot_product_attentiontrainingr   r   r   rM   r   rm   )rE   r"   r   r   r   r   r   r   r   r   rF   s             r-   rX   zYolosSdpaSelfAttention.forward-  sp     		 5w   77??+#"3 #    !JJ}55--dhh}.E.EFF	//

=0I0IJJ//0ABB+HH15GD--C I 
 
 &--aAq99DDFF"/"4"4"6"6ss";t?Q>S"S%**+BCCd""r,   r   )r$   r%   r&   r   r5   r(   r)   r   rZ   r   r	   r   rX   r[   r\   s   @r-   r   r   (  s        P{ Pt P P P P P P -1"'	'# '#('# EL)'#  	'#
 
uU\5</0%2EE	F'# '# '# '# '# '# '# '# '# '#r,   r   c                   ^     e Zd ZdZdeddf fdZdej        dej        dej        fdZ xZ	S )	YolosSelfOutputz
    The residual connection is defined in YolosLayer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    r0   r1   Nc                     t                                                       t          j        |j        |j                  | _        t          j        |j                  | _        d S r_   )	r4   r5   r
   r   r8   denser@   rA   rB   ra   s     r-   r5   zYolosSelfOutput.__init__^  sJ    Yv163EFF
z&"<==r,   r"   input_tensorc                 Z    |                      |          }|                     |          }|S r_   r   rB   rE   r"   r   s      r-   rX   zYolosSelfOutput.forwardc  s*    

=11]33r,   rY   r\   s   @r-   r   r   X  s         
>{ >t > > > > > >
U\  RWR^        r,   r   c                        e Zd Zdeddf fdZdee         ddfdZ	 	 ddej	        d	e
ej	                 d
edeeej	        ej	        f         eej	                 f         fdZ xZS )YolosAttentionr0   r1   Nc                     t                                                       t          |          | _        t	          |          | _        t                      | _        d S r_   )r4   r5   r   	attentionr   outputsetpruned_headsra   s     r-   r5   zYolosAttention.__init__l  sI    +F33%f--EEr,   headsc                    t          |          dk    rd S t          || j        j        | j        j        | j                  \  }}t          | j        j        |          | j        _        t          | j        j        |          | j        _        t          | j        j	        |          | j        _	        t          | j
        j        |d          | j
        _        | j        j        t          |          z
  | j        _        | j        j        | j        j        z  | j        _        | j                            |          | _        d S )Nr   r   rJ   )lenr   r   r   r   r   r   r   r   r   r   r   r   union)rE   r   indexs      r-   prune_headszYolosAttention.prune_headsr  s   u::??F74>5t~7Y[_[l
 
u
  2$.2FNN/0BEJJ1$.2FNN.t{/@%QOOO .2^-ORUV[R\R\-\*'+~'IDNLn'n$ -33E::r,   Fr"   r   r   c                     |                      |||          }|                     |d         |          }|f|dd          z   }|S )Nr   r   )r   r   )rE   r"   r   r   self_outputsattention_outputr   s          r-   rX   zYolosAttention.forward  sM     ~~mY@QRR;;|AFF#%QRR(88r,   r   )r$   r%   r&   r   r5   r   r   r   r(   rZ   r   r   r	   r   rX   r[   r\   s   @r-   r   r   k  s        "{ "t " " " " " ";S ;d ; ; ; ;* -1"'	 | EL)  	
 
uU\5</0%2EE	F       r,   r   c                   (     e Zd Zdeddf fdZ xZS )YolosSdpaAttentionr0   r1   Nc                 r    t                                          |           t          |          | _        d S r_   )r4   r5   r   r   ra   s     r-   r5   zYolosSdpaAttention.__init__  s.       /77r,   )r$   r%   r&   r   r5   r[   r\   s   @r-   r   r     sK        8{ 8t 8 8 8 8 8 8 8 8 8 8r,   r   c                   L     e Zd Zdeddf fdZdej        dej        fdZ xZS )YolosIntermediater0   r1   Nc                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r_   )r4   r5   r
   r   r8   intermediate_sizer   r   
hidden_actstrr   intermediate_act_fnra   s     r-   r5   zYolosIntermediate.__init__  sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r,   r"   c                 Z    |                      |          }|                     |          }|S r_   )r   r   )rE   r"   s     r-   rX   zYolosIntermediate.forward  s,    

=1100??r,   	r$   r%   r&   r   r5   r(   rZ   rX   r[   r\   s   @r-   r   r     sq        9{ 9t 9 9 9 9 9 9U\ el        r,   r   c                   Z     e Zd Zdeddf fdZdej        dej        dej        fdZ xZS )YolosOutputr0   r1   Nc                     t                                                       t          j        |j        |j                  | _        t          j        |j                  | _	        d S r_   )
r4   r5   r
   r   r   r8   r   r@   rA   rB   ra   s     r-   r5   zYolosOutput.__init__  sJ    Yv79KLL
z&"<==r,   r"   r   c                 d    |                      |          }|                     |          }||z   }|S r_   r   r   s      r-   rX   zYolosOutput.forward  s4    

=11]33%4r,   r   r\   s   @r-   r   r     s|        >{ >t > > > > > >
U\  RWR^        r,   r   )eagersdpac                        e Zd ZdZdeddf fdZ	 	 ddej        deej                 d	e	de
eej        ej        f         eej                 f         fd
Z xZS )
YolosLayerz?This corresponds to the Block class in the timm implementation.r0   r1   Nc                    t                                                       |j        | _        d| _        t	          |j                 |          | _        t          |          | _        t          |          | _
        t          j        |j        |j                  | _        t          j        |j        |j                  | _        d S )Nr   eps)r4   r5   chunk_size_feed_forwardseq_len_dimYOLOS_ATTENTION_CLASSES_attn_implementationr   r   intermediater   r   r
   	LayerNormr8   layer_norm_epslayernorm_beforelayernorm_afterra   s     r-   r5   zYolosLayer.__init__  s    '-'E$01LMfUU-f55!&)) "V-?VEZ [ [ [!|F,>FDYZZZr,   Fr"   r   r   c                    |                      |                     |          ||          }|d         }|dd          }||z   }|                     |          }|                     |          }|                     ||          }|f|z   }|S )N)r   r   r   )r   r   r   r   r   )rE   r"   r   r   self_attention_outputsr   r   layer_outputs           r-   rX   zYolosLayer.forward  s     "&!!-00/ "0 "
 "

 2!4(, )=8 ++M::((66 {{<??/G+r,   r   )r$   r%   r&   r'   r   r5   r(   rZ   r   r   r	   r   rX   r[   r\   s   @r-   r   r     s        II[{ [t [ [ [ [ [ [ -1"'	 | EL)  	
 
uU\5</0%2EE	F       r,   r   c                        e Zd Zdeddf fdZ	 	 	 	 ddej        deej                 d	ed
edede	e
ef         fdZ xZS )YolosEncoderr0   r1   Nc                    t                                                       | _        t          j        fdt          j                  D                       | _        d| _        dj	        d         j	        d         z  j
        dz  z  z   j        z   }j        r6t          j        t          j        j        dz
  d|j                            nd | _        j        rt%                    nd | _        d S )Nc                 .    g | ]}t                    S r+   )r   ).0rV   r0   s     r-   
<listcomp>z)YolosEncoder.__init__.<locals>.<listcomp>  s!    #`#`#`1Jv$6$6#`#`#`r,   Fr   r   re   )r4   r5   r0   r
   
ModuleListrangenum_hidden_layerslayergradient_checkpointingrk   rl   r:   use_mid_position_embeddingsr6   r(   r7   r8   mid_position_embeddingsr   rD   )rE   r0   
seq_lengthrF   s    ` r-   r5   zYolosEncoder.__init__  s   ]#`#`#`#`fF^@_@_#`#`#`aa
&+# "1%(9!(<<@QST@TTUX^Xss 	 1	BL,q0&	     	$ JPIku=fEEEqur,   FTr"   r   r   output_hidden_statesreturn_dictc                 *   |rdnd }|rdnd }	| j         j        r|                     | j        ||f          }
t	          | j                  D ]\  }}|r||fz   }|||         nd }| j        r%| j        r|                     |j	        |||          }n ||||          }|d         }| j         j        r|| j         j
        dz
  k     r||
|         z   }|r|	|d         fz   }	|r||fz   }|st          d |||	fD                       S t          |||	          S )Nr+   r   r   c              3      K   | ]}||V  	d S r_   r+   )r  vs     r-   	<genexpr>z'YolosEncoder.forward.<locals>.<genexpr>.  s(      mmq_`_l_l_l_l_lmmr,   )r!   r"   r#   )r0   r  rD   r  	enumerater	  r
  r   _gradient_checkpointing_func__call__r  tupler   )rE   r"   rR   rS   r   r   r  r  all_hidden_statesall_self_attentions$interpolated_mid_position_embeddingsilayer_modulelayer_head_masklayer_outputss                  r-   rX   zYolosEncoder.forward  s    #7@BBD$5?bb4;2 	u373E3EdFbekmrds3t3t0(44 	P 	POA|# I$58H$H!.7.CillO* `t} ` $ A A )!#%	! ! !-]OM^ _ _)!,M{6 \59::$14XYZ4[$[M  P&9]1=M<O&O# 	E 1]4D D 	nmm]4EGZ$[mmmmmm++*
 
 
 	
r,   )NFFT)r$   r%   r&   r   r5   r(   rZ   r   r   r	   r  r   rX   r[   r\   s   @r-   r  r    s        v{ vt v v v v v v: -1"'%* 2
 2
|2

 EL)2
  2
 #2
 2
 
uo%	&2
 2
 2
 2
 2
 2
 2
 2
r,   r  c                   j    e Zd ZdZeZdZdZdZg Z	dZ
deej        ej        ej        f         ddfdZdS )	YolosPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    vitrG   Tmoduler1   Nc                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j                  r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsr   )meanstdNg      ?)r   r
   r   r   weightdatanormal_r0   initializer_ranger   zero_r   fill_)rE   r"  s     r-   _init_weightsz"YolosPreTrainedModel._init_weightsC  s    fry")455 	* M&&CT[5R&SSS{& &&((((( '&-- 	*K""$$$M$$S)))))	* 	*r,   )r$   r%   r&   r'   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpar	   r
   r   r   r   r,  r+   r,   r-   r   r   6  sv         
 L$O&*#N
*E")RY*L$M 
*RV 
* 
* 
* 
* 
* 
*r,   r   aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`YolosConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aM  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`YolosImageProcessor.__call__`] for details.

        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z_The bare YOLOS Model transformer outputting raw hidden-states without any specific head on top.c                   <    e Zd Zddedef fdZdefdZdee	e
e	         f         ddfd	Z ee           eeeed
e          	 	 	 	 	 ddeej                 deej                 dee         dee         dee         deeef         fd                        Z xZS )
YolosModelTr0   add_pooling_layerc                 J   t                                          |           || _        t          |          | _        t          |          | _        t          j        |j	        |j
                  | _        |rt          |          nd | _        |                                  d S )Nr   )r4   r5   r0   r/   rT   r  encoderr
   r   r8   r   	layernormYolosPoolerpooler	post_init)rE   r0   r5  rF   s      r-   r5   zYolosModel.__init__w  s       )&11#F++f&8f>STTT->Hk&)))D 	r,   r1   c                     | j         j        S r_   )rT   r=   )rE   s    r-   get_input_embeddingszYolosModel.get_input_embeddings  s    //r,   heads_to_pruneNc                     |                                 D ]/\  }}| j        j        |         j                            |           0dS )a	  
        Prunes heads of the model.

        Args:
            heads_to_prune (`dict`):
                See base class `PreTrainedModel`. The input dictionary must have the following format: {layer_num:
                list of heads to prune in this layer}
        N)itemsr7  r	  r   r   )rE   r>  r	  r   s       r-   _prune_headszYolosModel._prune_heads  sU     +0022 	C 	CLE5Lu%/;;EBBBB	C 	Cr,   vision)
checkpointoutput_typer-  modalityexpected_outputrG   r   r   r  r  c           	      2   ||n| j         j        }||n| j         j        }||n| j         j        }|t	          d          |                     || j         j                  }|                     |          }|                     ||j	        d         |j	        d         ||||          }|d         }| 
                    |          }| j        |                     |          nd }	|s|	||	fn|f}
|
|dd          z   S t          ||	|j        |j                  S )Nz You have to specify pixel_valuesr   rI   )rR   rS   r   r   r  r  r   r   )r!   pooler_outputr"   r#   )r0   r   r  use_return_dictr   get_head_maskr  rT   r7  rL   r8  r:  r   r"   r#   )rE   rG   r   r   r  r  embedding_outputencoder_outputssequence_outputpooled_outputhead_outputss              r-   rX   zYolosModel.forward  sb     2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]?@@@ &&y$+2OPP	??<88,,%b)$R(/!5# ' 
 
 *!,..998<8OO444UY 	6?L?XO];;_n^pL/!"""555)-')7&1	
 
 
 	
r,   )T)NNNNN)r$   r%   r&   r   r   r5   r<   r=  r   r   r   rA  r   YOLOS_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r(   rZ   r	   r   rX   r[   r\   s   @r-   r4  r4  r  sr       
 { t      0&: 0 0 0 0
C4T#Y+? 
CD 
C 
C 
C 
C +*+ABB&.$.   04,0,0/3&*0
 0
u|,0
 EL)0
 $D>	0

 'tn0
 d^0
 
u00	10
 0
 0
  CB0
 0
 0
 0
 0
r,   r4  c                   *     e Zd Zdef fdZd Z xZS )r9  r0   c                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r_   )r4   r5   r
   r   r8   r   Tanh
activationra   s     r-   r5   zYolosPooler.__init__  sC    Yv163EFF
'))r,   c                 r    |d d df         }|                      |          }|                     |          }|S )Nr   )r   rW  )rE   r"   first_token_tensorrN  s       r-   rX   zYolosPooler.forward  s@     +111a40

#56666r,   )r$   r%   r&   r   r5   rX   r[   r\   s   @r-   r9  r9    sS        ${ $ $ $ $ $ $
      r,   r9  c                   (     e Zd ZdZ fdZd Z xZS )YolosMLPPredictionHeada  
    Very simple multi-layer perceptron (MLP, also called FFN), used to predict the normalized center coordinates,
    height and width of a bounding box w.r.t. an image.

    Copied from https://github.com/facebookresearch/detr/blob/master/models/detr.py

    c                     t                                                       || _        |g|dz
  z  }t          j        d t          |g|z   ||gz             D                       | _        d S )Nr   c              3   F   K   | ]\  }}t          j        ||          V  d S r_   )r
   r   )r  nks      r-   r  z2YolosMLPPredictionHead.__init__.<locals>.<genexpr>  s0      #g#g1BIaOO#g#g#g#g#g#gr,   )r4   r5   
num_layersr
   r  ziplayers)rE   	input_dim
hidden_dim
output_dimr`  hrF   s         r-   r5   zYolosMLPPredictionHead.__init__  so    $LJN+m#g#gYKRSOUVZdYeUe@f@f#g#g#gggr,   c                     t          | j                  D ]F\  }}|| j        dz
  k     r(t          j                             ||                    n
 ||          }G|S r3   )r  rb  r`  r
   rn   relu)rE   r   r  r	  s       r-   rX   zYolosMLPPredictionHead.forward  sd    !$+.. 	V 	VHAu01DOa4G0G0G""5588,,,UUSTXXAAr,   )r$   r%   r&   r'   r5   rX   r[   r\   s   @r-   r[  r[    sV         h h h h h      r,   r[  zy
    YOLOS Model (consisting of a ViT encoder) with object detection heads on top, for tasks such as COCO detection.
    c                       e Zd Zdef fdZej        j        d             Z e	e
           eee          	 	 	 	 ddej        deee                  dee         d	ee         d
ee         deeef         fd                        Z xZS )YolosForObjectDetectionr0   c                 6   t                                          |           t          |d          | _        t	          |j        |j        |j        dz   d          | _        t	          |j        |j        dd          | _        | 	                                 d S )NF)r5  r   r   )rc  rd  re  r`     )
r4   r5   r4  r!  r[  r8   
num_labelsclass_labels_classifierbbox_predictorr;  ra   s     r-   r5   z YolosForObjectDetection.__init__  s        f>>> (>(V5GTZTehiTivw(
 (
 (
$ 5(V5GTUbc
 
 

 	r,   c                 V    d t          |d d         |d d                   D             S )Nc                     g | ]
\  }}||d S ))r   r   r+   )r  abs      r-   r  z9YolosForObjectDetection._set_aux_loss.<locals>.<listcomp>  s$    ggg41a1A..gggr,   rI   )ra  )rE   outputs_classoutputs_coords      r-   _set_aux_lossz%YolosForObjectDetection._set_aux_loss  s9    
 hg3}SbS?QS`adbdadSe;f;fggggr,   )rD  r-  NrG   labelsr   r  r  r1   c           
         ||n| j         j        }|                     ||||          }|d         }|dd| j         j         dddf         }|                     |          }|                     |                                          }	d\  }
}}|d\  }}| j         j        rM|r|j        n|d         }|                     |          }|                     |                                          }| 	                    ||| j
        |	| j         ||          \  }
}}|s|||	f|z   |z   }n||	f|z   }|
|
|f|z   n|S t          |
|||	||j        |j        |j                  S )a!	  
        labels (`List[Dict]` of len `(batch_size,)`, *optional*):
            Labels for computing the bipartite matching loss. List of dicts, each dictionary containing at least the
            following 2 keys: `'class_labels'` and `'boxes'` (the class labels and bounding boxes of an image in the
            batch respectively). The class labels themselves should be a `torch.LongTensor` of len `(number of bounding
            boxes in the image,)` and the boxes a `torch.FloatTensor` of shape `(number of bounding boxes in the image,
            4)`.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
        >>> model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")

        >>> inputs = image_processor(images=image, return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
        >>> target_sizes = torch.tensor([image.size[::-1]])
        >>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
        ...     0
        ... ]

        >>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        ...     box = [round(i, 2) for i in box.tolist()]
        ...     print(
        ...         f"Detected {model.config.id2label[label.item()]} with confidence "
        ...         f"{round(score.item(), 3)} at location {box}"
        ...     )
        Detected remote with confidence 0.991 at location [46.48, 72.78, 178.98, 119.3]
        Detected remote with confidence 0.908 at location [336.48, 79.27, 368.23, 192.36]
        Detected cat with confidence 0.934 at location [337.18, 18.06, 638.14, 373.09]
        Detected cat with confidence 0.979 at location [10.93, 53.74, 313.41, 470.67]
        Detected remote with confidence 0.974 at location [41.63, 72.23, 178.09, 119.99]
        ```N)r   r  r  r   )NNN)NNrl  )r   r   r   r   r    r!   r"   r#   )r0   rI  r!  r:   rn  ro  sigmoidauxiliary_lossintermediate_hidden_statesloss_functiondevicer   r!   r"   r#   )rE   rG   rw  r   r  r  r   rM  r   r   r   r   r    rt  ru  r   r   s                    r-   rX   zYolosForObjectDetection.forward  s   n &1%<kk$+B] ((/!5#	  
 
 "!* *!!!dk.N-N-P-PRSRSRS*ST --o>>((99AACC
-=*i*+5(M={) LEP`wAAV]^_V` $ < <\ J J $ 3 3L A A I I K K151C1CZmUb2 2.D).  	P , *-0AAGK *-7373CT9%..O)!/%7!/)	
 	
 	
 		
r,   )NNNN)r$   r%   r&   r   r5   r(   jitunusedrv  r   rP  r   r   rR  r)   r   r   r   r   r	   r   rX   r[   r\   s   @r-   rj  rj    s3       {      & Yh h h +*+ABB+ETcddd (,,0/3&*c
 c
'c
 d$c
 $D>	c

 'tnc
 d^c
 
u00	1c
 c
 c
 ed CBc
 c
 c
 c
 c
r,   rj  )@r'   collections.abcr   r   dataclassesr   typingr   r   r   r   r   r	   r(   torch.utils.checkpointr
   activationsr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   r   r   configuration_yolosr   
get_loggerr$   r   rR  rQ  rS  r   Moduler/   rC   r   r<   r   r   r   r   r   r   r   r   r   r  r   YOLOS_START_DOCSTRINGrP  r4  r9  r[  rj  r+   r,   r-   <module>r     sN          ! ! ! ! ! ! : : : : : : : : : : : : : : : :            ! ! ! ! ! ! K K K K K K K K - - - - - - Q Q Q Q Q Q Q Q                - , , , , , 
	H	%	%   + '  ): ): ): ): ): ): ): ):X( ( ( ( (bi ( ( (V    29   :    ry   B    29   D9 9 9 9 9 9 9 9z,# ,# ,# ,# ,#/ ,# ,# ,#`    bi   &$ $ $ $ $RY $ $ $P8 8 8 8 8 8 8 8    	   "    ")    %3<NOO ' ' ' ' ' ' ' 'TK
 K
 K
 K
 K
29 K
 K
 K
\* * * * *? * * *4	  . e U
 U
 U
 U
 U
% U
 U
	 U
p    ")        RY   *  	 @
 @
 @
 @
 @
2 @
 @
 @
 @
 @
r,   