
    g?U              	          d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlmZ  ej        e           Z!dZ"dZ#g dZ$dZ%dZ&d2dej'        de(de)dej'        fdZ* G d dej+                  Z, G d dej+                  Z- G d dej+                  Z. G d d ej+                  Z/ G d! d"ej+                  Z0 G d# d$ej+                  Z1 G d% d&e          Z2d'Z3d(Z4 ed)e3           G d* d+e2                      Z5 ed,e3           G d- d.e2                      Z6 ed/e3           G d0 d1e2e                      Z7dS )3zPyTorch ConvNext model.    )OptionalTupleUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)BackboneMixin   )ConvNextConfigr   zfacebook/convnext-tiny-224)r   i      r   ztabby, tabby cat        Finput	drop_probtrainingreturnc                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchrandr    r!   floor_div)r   r   r   	keep_probr"   random_tensoroutputs          j/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/convnext/modeling_convnext.py	drop_pathr,   :   s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FM    c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
ConvNextDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 V    t                                                       || _        d S N)super__init__r   )selfr   	__class__s     r+   r3   zConvNextDropPath.__init__R   s$    "r-   hidden_statesc                 8    t          || j        | j                  S r1   )r,   r   r   r4   r6   s     r+   forwardzConvNextDropPath.forwardV   s    FFFr-   c                 6    d                     | j                  S )Nzp={})formatr   )r4   s    r+   
extra_reprzConvNextDropPath.extra_reprY   s    }}T^,,,r-   r1   )__name__
__module____qualname____doc__r   floatr3   r$   Tensorr9   strr<   __classcell__r5   s   @r+   r/   r/   O   s        bb# #(5/ #T # # # # # #GU\ Gel G G G G-C - - - - - - - -r-   r/   c                   H     e Zd ZdZd fd	Zdej        dej        fdZ xZS )	ConvNextLayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    ư>channels_lastc                 `   t                                                       t          j        t	          j        |                    | _        t          j        t	          j        |                    | _        || _	        || _
        | j
        dvrt          d| j
                   |f| _        d S )N)rI   channels_firstzUnsupported data format: )r2   r3   r   	Parameterr$   onesweightzerosbiasepsdata_formatNotImplementedErrornormalized_shape)r4   rT   rQ   rR   r5   s       r+   r3   zConvNextLayerNorm.__init__c   s    l5:.>#?#?@@L-=!>!>??	&#FFF%&T$BR&T&TUUU!1 3r-   xr   c                 *   | j         dk    r=t          j        j                            || j        | j        | j        | j                  }n| j         dk    r|j	        }|
                                }|                    dd          }||z
                      d                              dd          }||z
  t          j        || j        z             z  }|                    |          }| j        d d d d f         |z  | j        d d d d f         z   }|S )NrI   rK   r   T)keepdim   )r    )rR   r$   r   
functional
layer_normrT   rN   rP   rQ   r    rA   meanpowsqrtto)r4   rU   input_dtypeuss        r+   r9   zConvNextLayerNorm.forwardm   s   ..#..q$2GVZV_aeaijjAA!111'K		Aq$''AQA##At#44AQ%*Q\222A;''AAAAtTM*Q.111dD=1IIAr-   )rH   rI   )	r=   r>   r?   r@   r3   r$   rB   r9   rD   rE   s   @r+   rG   rG   ]   sm         
4 4 4 4 4 4 %,        r-   rG   c                   F     e Zd ZdZ fdZdej        dej        fdZ xZ	S )ConvNextEmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t                                                       t          j        |j        |j        d         |j        |j                  | _        t          |j        d         dd          | _	        |j        | _        d S )Nr   kernel_sizestriderH   rK   rQ   rR   )
r2   r3   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsrG   	layernormr4   configr5   s     r+   r3   zConvNextEmbeddings.__init__   s     "	!4Q!7VEV_e_p!
 !
 !
 +6+>q+AtYijjj"/r-   pixel_valuesr   c                     |j         d         }|| j        k    rt          d          |                     |          }|                     |          }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r"   rj   
ValueErrorrm   rn   )r4   rq   rj   
embeddingss       r+   r9   zConvNextEmbeddings.forward   s^    #)!,4,,,w   **<88
^^J//
r-   
r=   r>   r?   r@   r3   r$   FloatTensorrB   r9   rD   rE   s   @r+   rc   rc   {   si         0 0 0 0 0E$5 %,        r-   rc   c                   H     e Zd ZdZd fd	Zdej        dej        fdZ xZ	S )ConvNextLayera3  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    r   c                 0   t                                                       t          j        ||dd|          | _        t          |d          | _        t          j        |d|z            | _        t          |j
                 | _        t          j        d|z  |          | _        |j        dk    r0t          j        |j        t          j        |          z  d	          nd | _        |d
k    rt%          |          nt          j                    | _        d S )Nr   r
   )rf   paddinggroupsrH   rQ      r   T)requires_gradr   )r2   r3   r   ri   dwconvrG   rn   Linearpwconv1r   
hidden_actactpwconv2layer_scale_init_valuerL   r$   rM   layer_scale_parameterr/   Identityr,   )r4   rp   dimr,   r5   s       r+   r3   zConvNextLayer.__init__   s    iSa3OOO*3D999ya#g..&+,yS#.. ,q00 L6S9J9JJZ^____ 	"
 9BC))444R[]]r-   r6   r   c                    |}|                      |          }|                    dddd          }|                     |          }|                     |          }|                     |          }|                     |          }| j        
| j        |z  }|                    dddd          }||                     |          z   }|S )Nr   rX   r
   r   )r   permutern   r   r   r   r   r,   )r4   r6   r   rU   s       r+   r9   zConvNextLayer.forward   s    KK&&IIaAq!!NN1LLOOHHQKKLLOO%1*Q.AIIaAq!!DNN1%%%r-   )r   ru   rE   s   @r+   rx   rx      st         [ [ [ [ [ [U%6 5<        r-   rx   c                   H     e Zd ZdZd fd	Zdej        dej        fdZ xZ	S )	ConvNextStagea  ConvNeXT stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextConfig`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`List[float]`): Stochastic depth rates for each layer.
    rX   Nc           	         t                                                       |k    s|dk    rAt          j        t	          |dd          t          j        |||                    | _        nt          j                    | _        pdg|z  t          j        fdt          |          D              | _	        d S )Nr   rH   rK   rh   re   r   c                 @    g | ]}t          |                    S ))r   r,   )rx   ).0jrp   drop_path_ratesout_channelss     r+   
<listcomp>z*ConvNextStage.__init__.<locals>.<listcomp>   s/    jjjXYmFPQ@RSSSjjjr-   )
r2   r3   r   
SequentialrG   ri   downsampling_layerr   rangelayers)	r4   rp   in_channelsr   rf   rg   depthr   r5   s	    ` `   `r+   r3   zConvNextStage.__init__   s    ,&&&1**&(m!+4EUVVV	+|U[\\\' 'D##
 ')kmmD#):cUU]mjjjjjj]bch]i]ijjj
r-   r6   r   c                 Z    |                      |          }|                     |          }|S r1   )r   r   r8   s     r+   r9   zConvNextStage.forward   s,    //>>M22r-   )rX   rX   rX   Nru   rE   s   @r+   r   r      sn         
 
 
 
 
 
U%6 5<        r-   r   c                   n     e Zd Z fdZ	 	 d	dej        dee         dee         dee	e
f         fdZ xZS )
ConvNextEncoderc           
         t                                                       t          j                    | _        d t          j        d|j        t          |j	                            
                    |j	                  D             }|j        d         }t          |j                  D ]Y}|j        |         }t          ||||dk    rdnd|j	        |         ||                   }| j                            |           |}Zd S )Nc                 6    g | ]}|                                 S  )tolist)r   rU   s     r+   r   z,ConvNextEncoder.__init__.<locals>.<listcomp>   s-     
 
 
AHHJJ
 
 
r-   r   rX   r   )r   r   rg   r   r   )r2   r3   r   
ModuleListstagesr$   linspacedrop_path_ratesumdepthssplitrk   r   
num_stagesr   append)r4   rp   r   prev_chsiout_chsstager5   s          r+   r3   zConvNextEncoder.__init__   s   moo
 
 %q&2GV]I[I[ \ \ b bcicp q q
 
 
 &q)v()) 	 	A)!,G!$$EEqqqmA& / 2  E Ku%%%HH	 	r-   FTr6   output_hidden_statesreturn_dictr   c                     |rdnd }t          | j                  D ]\  }}|r||fz   } ||          }|r||fz   }|st          d ||fD                       S t          ||          S )Nr   c              3      K   | ]}||V  	d S r1   r   )r   vs     r+   	<genexpr>z*ConvNextEncoder.forward.<locals>.<genexpr>  s"      XXq!-----XXr-   )last_hidden_stater6   )	enumerater   tupler   )r4   r6   r   r   all_hidden_statesr   layer_modules          r+   r9   zConvNextEncoder.forward   s     #7@BBD(55 	8 	8OA|# I$58H$H!(L77MM 	E 1]4D D 	YXX]4E$FXXXXXX-++
 
 
 	
r-   )FT)r=   r>   r?   r3   r$   rv   r   boolr   r   r   r9   rD   rE   s   @r+   r   r      s            . 05&*	
 
(
 'tn
 d^	

 
u44	5
 
 
 
 
 
 
 
r-   r   c                   *    e Zd ZdZeZdZdZdgZd Z	dS )ConvNextPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    convnextrq   rx   c                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j                  r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsr   )r[   stdNg      ?)
isinstancer   r   ri   rN   datanormal_rp   initializer_rangerP   zero_	LayerNormfill_)r4   modules     r+   _init_weightsz%ConvNextPreTrainedModel._init_weights  s    fry")455 	* M&&CT[5R&SSS{& &&((((( '&-- 	*K""$$$M$$S)))))	* 	*r-   N)
r=   r>   r?   r@   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r-   r+   r   r     sH         
 "L"$O()
* 
* 
* 
* 
*r-   r   aJ  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`ConvNextConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aF  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zQThe bare ConvNext model outputting raw features without any specific head on top.c                        e Zd Z fdZ ee           eeee	de
          	 	 	 d
dej        dee         dee         deeef         fd	                        Z xZS )ConvNextModelc                 &   t                                          |           || _        t          |          | _        t          |          | _        t          j        |j	        d         |j
                  | _        |                                  d S )Nr|   )r2   r3   rp   rc   rt   r   encoderr   r   rk   layer_norm_epsrn   	post_initro   s     r+   r3   zConvNextModel.__init__D  s{       ,V44&v.. f&9"&=6CXYYY 	r-   vision)
checkpointoutput_typer   modalityexpected_outputNrq   r   r   r   c                 r   ||n| j         j        }||n| j         j        }|t          d          |                     |          }|                     |||          }|d         }|                     |                    ddg                    }|s||f|dd          z   S t          |||j	                  S )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr6   )
rp   r   use_return_dictrs   rt   r   rn   r[   r   r6   )r4   rq   r   r   embedding_outputencoder_outputsr   pooled_outputs           r+   r9   zConvNextModel.forwardQ  s     %9$D  $+Jj 	 &1%<kk$+B]?@@@??<88,,!5# ' 
 
 ,A. '8'='=r2h'G'GHH 	L%}58KKK7/')7
 
 
 	
r-   )NNN)r=   r>   r?   r3   r   CONVNEXT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr$   rv   r   r   r   r   r9   rD   rE   s   @r+   r   r   ?  s        
     +*+DEE&<$.   +//3&*	"
 "
'"
 'tn"
 d^	"

 
u>>	?"
 "
 "
  FE"
 "
 "
 "
 "
r-   r   z
    ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee           eeee	e
          	 	 	 	 d
dej        deej                 dee         dee         deeef         f
d	                        Z xZS )ConvNextForImageClassificationc                 B   t                                          |           |j        | _        t          |          | _        |j        dk    r%t          j        |j        d         |j                  nt          j                    | _	        | 
                                 d S )Nr   r   )r2   r3   
num_labelsr   r   r   r   rk   r   
classifierr   ro   s     r+   r3   z'ConvNextForImageClassification.__init__  s        +%f-- FLEVYZEZEZBIf)"-v/@AAA`b`k`m`m 	
 	r-   )r   r   r   r   Nrq   labelsr   r   r   c                    ||n| j         j        }|                     |||          }|r|j        n|d         }|                     |          }d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j	        k    s|j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }	| j        dk    r1 |	|                                |                                          }n |	||          }n| j         j        dk    rGt                      }	 |	|                    d| j                  |                    d                    }n*| j         j        dk    rt                      }	 |	||          }|s|f|dd         z   }
||f|
z   n|
S t!          |||j        	          S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationr   rX   )losslogitsr6   )rp   r   r   r   r   problem_typer   r    r$   longintr	   squeezer   viewr   r   r6   )r4   rq   r   r   r   outputsr   r   r   loss_fctr*   s              r+   r9   z&ConvNextForImageClassification.forward  s   ( &1%<kk$+B]--CWep-qq1<L--'!*//{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE3!/
 
 
 	
r-   )NNNN)r=   r>   r?   r3   r   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr$   rv   r   
LongTensorr   r   r   r9   rD   rE   s   @r+   r   r   ~  s             +*+DEE*8$4	   +/-1/3&*3
 3
'3
 )*3
 'tn	3

 d^3
 
u::	;3
 3
 3
  FE3
 3
 3
 3
 3
r-   r   zQ
    ConvNeXt backbone, to be used with frameworks like DETR and MaskFormer.
    c                        e Zd Z fdZ ee           eee          	 	 d	de	j
        dee         dee         defd                        Z xZS )
ConvNextBackbonec                    t                                          |           t                                          |           t          |          | _        t          |          | _        |j        d         g|j        z   | _        i }t          | j
        | j                  D ]\  }}t          |d          ||<   t          j        |          | _        |                                  d S )Nr   rK   )rR   )r2   r3   _init_backbonerc   rt   r   r   rk   num_featureszip_out_featureschannelsrG   r   
ModuleDicthidden_states_normsr   )r4   rp   r  r   rj   r5   s        r+   r3   zConvNextBackbone.__init__  s       v&&&,V44&v..#034v7JJ !#&t'94=#I#I 	g 	gE<):<Ue)f)f)f&&#%=1D#E#E  	r-   )r   r   Nrq   r   r   r   c                    ||n| j         j        }||n| j         j        }|                     |          }|                     |d|          }|r|j        n|d         }d}t          | j        |          D ]*\  }}	|| j        v r | j	        |         |	          }	||	fz  }+|s|f}
|r|
|fz  }
|
S t          ||r|ndd          S )az  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnext-tiny-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnext-tiny-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   r   r   )feature_mapsr6   
attentions)rp   r   r   rt   r   r6   r   stage_namesout_featuresr  r   )r4   rq   r   r   r   r   r6   r  r   hidden_stater*   s              r+   r9   zConvNextBackbone.forward  s&   8 &1%<kk$+B]$8$D  $+Jj 	  ??<88,,!%#  
 
 2=L--'!*#&t'7#G#G 	0 	0E<)))>t7>|LL/ 	"_F# +=**M%+?I--T
 
 
 	
r-   )NN)r=   r>   r?   r3   r   r   r   r   r   r$   rB   r   r   r9   rD   rE   s   @r+   r   r     s            " +*+DEE>XXX 04&*	9
 9
l9
 'tn9
 d^	9

 
9
 9
 9
 YX FE9
 9
 9
 9
 9
r-   r   )r   F)8r@   typingr   r   r   r$   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   r   r   utils.backbone_utilsr   configuration_convnextr   
get_loggerr=   loggerr   r   r   r   r   rB   rA   r   r,   Moduler/   rG   rc   rx   r   r   r   CONVNEXT_START_DOCSTRINGr   r   r   r   r   r-   r+   <module>r     s     ) ) ) ) ) ) ) ) ) )            A A A A A A A A A A ! ! ! ! ! !            . - - - - -              2 1 1 1 1 1 2 2 2 2 2 2 
	H	%	% # 3 '  7 1  U\ e T V[Vb    *- - - - -ry - - -    	   <       0) ) ) ) )BI ) ) )X    BI   @,
 ,
 ,
 ,
 ,
bi ,
 ,
 ,
^* * * * *o * * *0	   W 8
 8
 8
 8
 8
+ 8
 8
	 8
v   I
 I
 I
 I
 I
%< I
 I
 I
X  	 M
 M
 M
 M
 M
. M
 M
 M
 M
 M
r-   