
    g+\              	          d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlmZ  ej        e           Z!dZ"dZ#g dZ$dZ%dZ&d4dej'        de(de)dej'        fdZ* G d dej+                  Z, G d dej+                  Z- G d dej+                  Z. G d d ej+                  Z/ G d! d"ej+                  Z0 G d# d$ej+                  Z1 G d% d&ej+                  Z2 G d' d(e          Z3d)Z4d*Z5 ed+e4           G d, d-e3                      Z6 ed.e4           G d/ d0e3                      Z7 ed1e4           G d2 d3e3e                      Z8dS )5zPyTorch ConvNextV2 model.    )OptionalTupleUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings)BackboneMixin   )ConvNextV2Configr   zfacebook/convnextv2-tiny-1k-224)r   i      r   ztabby, tabby cat        Finput	drop_probtrainingreturnc                     |dk    s|s| S d|z
  }| j         d         fd| j        dz
  z  z   }|t          j        || j        | j                  z   }|                                 |                     |          |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r   r   r   )r   )dtypedevice)shapendimtorchrandr    r!   floor_div)r   r   r   	keep_probr"   random_tensoroutputs          n/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/convnextv2/modeling_convnextv2.py	drop_pathr,   :   s     CxII[^
Q 77E
5EL Y Y YYMYYy!!M1FM    c                   j     e Zd ZdZd	dee         ddf fdZdej        dej        fdZ	de
fdZ xZS )
ConvNextV2DropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 V    t                                                       || _        d S N)super__init__r   )selfr   	__class__s     r+   r3   zConvNextV2DropPath.__init__R   s$    "r-   hidden_statesc                 8    t          || j        | j                  S r1   )r,   r   r   r4   r6   s     r+   forwardzConvNextV2DropPath.forwardV   s    FFFr-   c                 6    d                     | j                  S )Nzp={})formatr   )r4   s    r+   
extra_reprzConvNextV2DropPath.extra_reprY   s    }}T^,,,r-   r1   )__name__
__module____qualname____doc__r   floatr3   r$   Tensorr9   strr<   __classcell__r5   s   @r+   r/   r/   O   s        bb# #(5/ #T # # # # # #GU\ Gel G G G G-C - - - - - - - -r-   r/   c                   L     e Zd ZdZdef fdZdej        dej        fdZ xZ	S )ConvNextV2GRNz)GRN (Global Response Normalization) layerdimc                     t                                                       t          j        t	          j        ddd|                    | _        t          j        t	          j        ddd|                    | _        d S )Nr   )r2   r3   r   	Parameterr$   zerosweightbias)r4   rH   r5   s     r+   r3   zConvNextV2GRN.__init__`   s_    l5;q!Q#<#<==LQ1c!:!:;;			r-   r6   r   c                     t          j        |ddd          }||                    dd          dz   z  }| j        ||z  z  | j        z   |z   }|S )N   )r   rO   T)prH   keepdim)rH   rQ   ư>)r$   normmeanrL   rM   )r4   r6   global_featuresnorm_featuress       r+   r9   zConvNextV2GRN.forwarde   sb    *]aVTRRR'?+?+?BPT+?+U+UX\+\]}}'DE	QTaar-   )
r=   r>   r?   r@   intr3   r$   FloatTensorr9   rD   rE   s   @r+   rG   rG   ]   sr        33<C < < < < < <
U%6 5;L        r-   rG   c                   H     e Zd ZdZd fd	Zdej        dej        fdZ xZS )	ConvNextV2LayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    rS   channels_lastc                 `   t                                                       t          j        t	          j        |                    | _        t          j        t	          j        |                    | _        || _	        || _
        | j
        dvrt          d| j
                   |f| _        d S )N)r\   channels_firstzUnsupported data format: )r2   r3   r   rJ   r$   onesrL   rK   rM   epsdata_formatNotImplementedErrornormalized_shape)r4   rc   r`   ra   r5   s       r+   r3   zConvNextV2LayerNorm.__init__u   s    l5:.>#?#?@@L-=!>!>??	&#FFF%&T$BR&T&TUUU!1 3r-   xr   c                 *   | j         dk    r=t          j        j                            || j        | j        | j        | j                  }n| j         dk    r|j	        }|
                                }|                    dd          }||z
                      d                              dd          }||z
  t          j        || j        z             z  }|                    |          }| j        d d d d f         |z  | j        d d d d f         z   }|S )Nr\   r^   r   T)rQ   rO   )r    )ra   r$   r   
functional
layer_normrc   rL   rM   r`   r    rA   rU   powsqrtto)r4   rd   input_dtypeuss        r+   r9   zConvNextV2LayerNorm.forward   s   ..#..q$2GVZV_aeaijjAA!111'K		Aq$''AQA##At#44AQ%*Q\222A;''AAAAtTM*Q.111dD=1IIAr-   )rS   r\   )	r=   r>   r?   r@   r3   r$   rB   r9   rD   rE   s   @r+   r[   r[   o   sm         
4 4 4 4 4 4 %,        r-   r[   c                   F     e Zd ZdZ fdZdej        dej        fdZ xZ	S )ConvNextV2EmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t                                                       t          j        |j        |j        d         |j        |j                  | _        t          |j        d         dd          | _	        |j        | _        d S )Nr   kernel_sizestriderS   r^   r`   ra   )
r2   r3   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsr[   	layernormr4   configr5   s     r+   r3   zConvNextV2Embeddings.__init__   s     "	!4Q!7VEV_e_p!
 !
 !
 -V-@-C[klll"/r-   pixel_valuesr   c                     |j         d         }|| j        k    rt          d          |                     |          }|                     |          }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r"   rv   
ValueErrorry   rz   )r4   r}   rv   
embeddingss       r+   r9   zConvNextV2Embeddings.forward   s^    #)!,4,,,w   **<88
^^J//
r-   
r=   r>   r?   r@   r3   r$   rY   rB   r9   rD   rE   s   @r+   ro   ro      si         0 0 0 0 0E$5 %,        r-   ro   c                   H     e Zd ZdZd fd	Zdej        dej        fdZ xZ	S )ConvNextV2Layera5  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    r   c                    t                                                       t          j        ||dd|          | _        t          |d          | _        t          j        |d|z            | _        t          |j
                 | _        t          d|z            | _        t          j        d|z  |          | _        |dk    rt          |          nt          j                    | _        d S )Nr   r
   )rr   paddinggroupsrS   r`      r   )r2   r3   r   ru   dwconvr[   rz   Linearpwconv1r   
hidden_actactrG   grnpwconv2r/   Identityr,   )r4   r|   rH   r,   r5   s       r+   r3   zConvNextV2Layer.__init__   s    iSa3OOO,Sd;;;ya#g..&+, S))yS#..:Cc//+I666r{}}r-   r6   r   c                    |}|                      |          }|                    dddd          }|                     |          }|                     |          }|                     |          }|                     |          }|                     |          }|                    dddd          }||                     |          z   }|S )Nr   rO   r
   r   )r   permuterz   r   r   r   r   r,   )r4   r6   r   rd   s       r+   r9   zConvNextV2Layer.forward   s    KK&&IIaAq!!NN1LLOOHHQKKHHQKKLLOOIIaAq!!DNN1%%%r-   )r   r   rE   s   @r+   r   r      st         
] 
] 
] 
] 
] 
]U%6 5<        r-   r   c                   H     e Zd ZdZd fd	Zdej        dej        fdZ xZ	S )	ConvNextV2Stagea  ConvNeXTV2 stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`List[float]`): Stochastic depth rates for each layer.
    rO   Nc           	         t                                                       |k    s|dk    rAt          j        t	          |dd          t          j        |||                    | _        nt          j                    | _        pdg|z  t          j        fdt          |          D              | _	        d S )Nr   rS   r^   rt   rq   r   c                 @    g | ]}t          |                    S ))rH   r,   )r   ).0jr|   drop_path_ratesout_channelss     r+   
<listcomp>z,ConvNextV2Stage.__init__.<locals>.<listcomp>   s/    lllZ[of,/RSBTUUUlllr-   )
r2   r3   r   
Sequentialr[   ru   downsampling_layerr   rangelayers)	r4   r|   in_channelsr   rr   rs   depthr   r5   s	    ` `   `r+   r3   zConvNextV2Stage.__init__   s    ,&&&1**&(m#KTGWXXX	+|U[\\\' 'D##
 ')kmmD#):cUU]mllllll_dej_k_klll
r-   r6   r   c                 Z    |                      |          }|                     |          }|S r1   )r   r   r8   s     r+   r9   zConvNextV2Stage.forward   s,    //>>M22r-   )rO   rO   rO   Nr   rE   s   @r+   r   r      sn         
 
 
 
 
 
U%6 5<        r-   r   c                   n     e Zd Z fdZ	 	 d	dej        dee         dee         dee	e
f         fdZ xZS )
ConvNextV2Encoderc           
         t                                                       t          j                    | _        d t          j        d|j        t          |j	                            
                    |j	                  D             }|j        d         }t          |j                  D ]Y}|j        |         }t          ||||dk    rdnd|j	        |         ||                   }| j                            |           |}Zd S )Nc                 6    g | ]}|                                 S  )tolist)r   rd   s     r+   r   z.ConvNextV2Encoder.__init__.<locals>.<listcomp>   s-     
 
 
AHHJJ
 
 
r-   r   rO   r   )r   r   rs   r   r   )r2   r3   r   
ModuleListstagesr$   linspacedrop_path_ratesumdepthssplitrw   r   
num_stagesr   append)r4   r|   r   prev_chsiout_chsstager5   s          r+   r3   zConvNextV2Encoder.__init__   s   moo
 
 %q&2GV]I[I[ \ \ b bcicp q q
 
 
 &q)v()) 	 	A)!,G#$$EEqqqmA& / 2  E Ku%%%HH	 	r-   FTr6   output_hidden_statesreturn_dictr   c                     |rdnd }t          | j                  D ]\  }}|r||fz   } ||          }|r||fz   }|st          d ||fD                       S t          ||          S )Nr   c              3      K   | ]}||V  	d S r1   r   )r   vs     r+   	<genexpr>z,ConvNextV2Encoder.forward.<locals>.<genexpr>  s"      XXq!-----XXr-   )last_hidden_stater6   )	enumerater   tupler   )r4   r6   r   r   all_hidden_statesr   layer_modules          r+   r9   zConvNextV2Encoder.forward  s     #7@BBD(55 	8 	8OA|# I$58H$H!(L77MM 	E 1]4D D 	YXX]4E$FXXXXXX-++
 
 
 	
r-   )FT)r=   r>   r?   r3   r$   rY   r   boolr   r   r   r9   rD   rE   s   @r+   r   r      s            . 05&*	
 
(
 'tn
 d^	

 
u44	5
 
 
 
 
 
 
 
r-   r   c                   *    e Zd ZdZeZdZdZdgZd Z	dS )ConvNextV2PreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
convnextv2r}   r   c                    t          |t          j        t          j        f          rT|j        j                            d| j        j                   |j	         |j	        j        
                                 dS dS t          |t          j                  r?|j	        j        
                                 |j        j                            d           dS dS )zInitialize the weightsr   )rU   stdNg      ?)
isinstancer   r   ru   rL   datanormal_r|   initializer_rangerM   zero_	LayerNormfill_)r4   modules     r+   _init_weightsz'ConvNextV2PreTrainedModel._init_weights.  s    fry")455 	* M&&CT[5R&SSS{& &&((((( '&-- 	*K""$$$M$$S)))))	* 	*r-   N)
r=   r>   r?   r@   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r-   r+   r   r   #  sH         
 $L$$O*+
* 
* 
* 
* 
*r-   r   aL  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`ConvNextV2Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aI  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`ConvNextImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zSThe bare ConvNextV2 model outputting raw features without any specific head on top.c                        e Zd Z fdZ ee           eeee	de
          	 	 	 d
dej        dee         dee         deeef         fd	                        Z xZS )ConvNextV2Modelc                 &   t                                          |           || _        t          |          | _        t          |          | _        t          j        |j	        d         |j
                  | _        |                                  d S )NrR   r   )r2   r3   r|   ro   r   r   encoderr   r   rw   layer_norm_epsrz   	post_initr{   s     r+   r3   zConvNextV2Model.__init__Y  s{       .v66(00 f&9"&=6CXYYY 	r-   vision)
checkpointoutput_typer   modalityexpected_outputNr}   r   r   r   c                 r   ||n| j         j        }||n| j         j        }|t          d          |                     |          }|                     |||          }|d         }|                     |                    ddg                    }|s||f|dd          z   S t          |||j	                  S )Nz You have to specify pixel_valuesr   r   r   rR   r   )r   pooler_outputr6   )
r|   r   use_return_dictr   r   r   rz   rU   r   r6   )r4   r}   r   r   embedding_outputencoder_outputsr   pooled_outputs           r+   r9   zConvNextV2Model.forwardf  s     %9$D  $+Jj 	 &1%<kk$+B]?@@@??<88,,!5# ' 
 
 ,A. '8'='=r2h'G'GHH 	L%}58KKK7/')7
 
 
 	
r-   )NNN)r=   r>   r?   r3   r   CONVNEXTV2_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr$   rY   r   r   r   r   r9   rD   rE   s   @r+   r   r   S  s             +*+FGG&<$.   +//3&*	"
 "
'"
 'tn"
 d^	"

 
u>>	?"
 "
 "
  HG"
 "
 "
 "
 "
r-   r   z
    ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee           eeee	e
          	 	 	 	 d
dej        deej                 dee         dee         deeef         f
d	                        Z xZS ) ConvNextV2ForImageClassificationc                 B   t                                          |           |j        | _        t          |          | _        |j        dk    r%t          j        |j        d         |j                  nt          j                    | _	        | 
                                 d S )Nr   rR   )r2   r3   
num_labelsr   r   r   r   rw   r   
classifierr   r{   s     r+   r3   z)ConvNextV2ForImageClassification.__init__  s        +)&11 FLEVYZEZEZBIf)"-v/@AAA`b`k`m`m 	
 	r-   )r   r   r   r   Nr}   labelsr   r   r   c                    ||n| j         j        }|                     |||          }|r|j        n|d         }|                     |          }d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j	        k    s|j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }	| j        dk    r1 |	|                                |                                          }n |	||          }n| j         j        dk    rGt                      }	 |	|                    d| j                  |                    d                    }n*| j         j        dk    rt                      }	 |	||          }|s|f|dd         z   }
||f|
z   n|
S t!          |||j        	          S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationrR   rO   )losslogitsr6   )r|   r   r   r   r   problem_typer   r    r$   longrX   r	   squeezer   viewr   r   r6   )r4   r}   r   r   r   outputsr   r   r   loss_fctr*   s              r+   r9   z(ConvNextV2ForImageClassification.forward  s   ( &1%<kk$+B]//,EYgr/ss1<L--'!*//{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE3!/
 
 
 	
r-   )NNNN)r=   r>   r?   r3   r   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr$   rY   r   
LongTensorr   r   r   r9   rD   rE   s   @r+   r   r     s             +*+FGG*8$4	   +/-1/3&*3
 3
'3
 )*3
 'tn	3

 d^3
 
u::	;3
 3
 3
  HG3
 3
 3
 3
 3
r-   r   zT
    ConvNeXT V2 backbone, to be used with frameworks like DETR and MaskFormer.
    c                        e Zd Z fdZ ee           eee          	 	 d	de	j
        dee         dee         defd                        Z xZS )
ConvNextV2Backbonec                    t                                          |           t                                          |           t          |          | _        t          |          | _        |j        d         g|j        z   | _        i }t          | j
        | j                  D ]\  }}t          |d          ||<   t          j        |          | _        |                                  d S )Nr   r^   )ra   )r2   r3   _init_backbonero   r   r   r   rw   num_featureszip_out_featureschannelsr[   r   
ModuleDicthidden_states_normsr   )r4   r|   r	  r   rv   r5   s        r+   r3   zConvNextV2Backbone.__init__  s       v&&&.v66(00#034v7JJ !#&t'94=#I#I 	i 	iE<)<\Wg)h)h)h&&#%=1D#E#E  	r-   )r   r   Nr}   r   r   r   c                    ||n| j         j        }||n| j         j        }|                     |          }|                     |d|          }|r|j        n|d         }d}t          | j        |          D ]*\  }}	|| j        v r | j	        |         |	          }	||	fz  }+|s|f}
|r|
|fz  }
|
S t          ||r|ndd          S )a  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnextv2-tiny-1k-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   r   r   )feature_mapsr6   
attentions)r|   r   r   r   r   r6   r  stage_namesout_featuresr	  r   )r4   r}   r   r   r   r   r6   r  r   hidden_stater*   s              r+   r9   zConvNextV2Backbone.forward   s&   8 &1%<kk$+B]$8$D  $+Jj 	  ??<88,,!%#  
 
 2=L--'!*#&t'7#G#G 	0 	0E<)))>t7>|LL/ 	"_F# +=**M%+?I--T
 
 
 	
r-   )NN)r=   r>   r?   r3   r   r   r   r   r   r$   rB   r   r   r9   rD   rE   s   @r+   r  r    s            " +*+FGG>XXX 04&*	9
 9
l9
 'tn9
 d^	9

 
9
 9
 9
 YX HG9
 9
 9
 9
 9
r-   r  )r   F)9r@   typingr   r   r   r$   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   r   r   utils.backbone_utilsr   configuration_convnextv2r   
get_loggerr=   loggerr   r   r   r   r   rB   rA   r   r,   Moduler/   rG   r[   ro   r   r   r   r   CONVNEXTV2_START_DOCSTRINGr   r   r   r  r   r-   r+   <module>r     s4      ) ) ) ) ) ) ) ) ) )            A A A A A A A A A A ! ! ! ! ! !            . - - - - -              2 1 1 1 1 1 6 6 6 6 6 6 
	H	%	% % 8 '  < 1  U\ e T V[Vb    *- - - - - - - -    BI   $    ")   >    29   0( ( ( ( (bi ( ( (X    bi   B,
 ,
 ,
 ,
 ,
	 ,
 ,
 ,
`* * * * * * * *0	 
  Y 
8
 8
 8
 8
 8
/ 8
 8
 
8
v   I
 I
 I
 I
 I
'@ I
 I
 I
X  	 M
 M
 M
 M
 M
2M M
 M
 M
 M
 M
r-   