
    gB                        d Z ddlmZmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ dZ G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d dej                  Z G d de          ZdZdZ ede           G d de                      Z dS )zrPyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.    )ListOptionalTupleUnionN)nn)CrossEntropyLoss   )SemanticSegmenterOutput)PreTrainedModel)add_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings)load_backbone   )UperNetConfigr   c                        e Zd ZdZ	 	 	 ddededeeeeef         f         deeeeef         ef         d	ed
eeeeef         f         ddf fdZ	de
j        de
j        fdZ xZS )UperNetConvModulez
    A convolutional block that bundles conv/norm/activation layers. This block simplifies the usage of convolution
    layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
    r   Fr   in_channelsout_channelskernel_sizepaddingbiasdilationreturnNc                     t                                                       t          j        ||||||          | _        t          j        |          | _        t          j                    | _        d S )N)r   r   r   r   r   r   )	super__init__r   Conv2dconvBatchNorm2d
batch_normReLU
activation)selfr   r   r   r   r   r   	__class__s          h/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/upernet/modeling_upernet.pyr   zUperNetConvModule.__init__(   si     	I#%#
 
 
	 .66'))    inputc                     |                      |          }|                     |          }|                     |          }|S N)r   r!   r#   )r$   r(   outputs      r&   forwardzUperNetConvModule.forward=   s:    5!!((((r'   )r   Fr   )__name__
__module____qualname____doc__intr   r   strboolr   torchTensorr,   __classcell__r%   s   @r&   r   r   "   s          5601$ $$ $ 3c3h/0	$
 sE#s(OS01$ $ U38_,-$ 
$ $ $ $ $ $*U\ el        r'   r   c                   T     e Zd Zdedededdf fdZdej        dej        fdZ xZS )	UperNetPyramidPoolingBlock
pool_scaler   channelsr   Nc                    t                                                       t          j        |          t	          ||d          g| _        t          | j                  D ](\  }}|                     t          |          |           )d S )Nr   r   )	r   r   r   AdaptiveAvgPool2dr   layers	enumerate
add_moduler2   )r$   r:   r   r;   ilayerr%   s         r&   r   z#UperNetPyramidPoolingBlock.__init__F   s     ,,k8CCC
 "$+.. 	+ 	+HAuOOCFFE****	+ 	+r'   r(   c                 4    |}| j         D ]} ||          }|S r*   )r?   )r$   r(   hidden_staterC   s       r&   r,   z"UperNetPyramidPoolingBlock.forwardO   s/    [ 	/ 	/E 5..LLr'   )	r-   r.   r/   r1   r   r4   r5   r,   r6   r7   s   @r&   r9   r9   E   s        +3 +S +C +D + + + + + +U\ el        r'   r9   c            
       x     e Zd ZdZdeedf         dedededdf
 fd	Zd
ej	        de
ej	                 fdZ xZS )UperNetPyramidPoolingModulea}  
    Pyramid Pooling Module (PPM) used in PSPNet.

    Args:
        pool_scales (`Tuple[int]`):
            Pooling scales used in Pooling Pyramid Module.
        in_channels (`int`):
            Input channels.
        channels (`int`):
            Channels after modules, before conv_seg.
        align_corners (`bool`):
            align_corners argument of F.interpolate.
    pool_scales.r   r;   align_cornersr   Nc                 V   t                                                       || _        || _        || _        || _        g | _        t          |          D ]T\  }}t          |||          }| j        	                    |           | 
                    t          |          |           Ud S )N)r:   r   r;   )r   r   rH   rI   r   r;   blocksr@   r9   appendrA   r2   )	r$   rH   r   r;   rI   rB   r:   blockr%   s	           r&   r   z$UperNetPyramidPoolingModule.__init__e   s    &*& &{33 	+ 	+MAz.*R]hpqqqEKu%%%OOCFFE****	+ 	+r'   xc                     g }| j         D ]d} ||          }t          j                            ||                                dd          d| j                  }|                    |           e|S )N   bilinearsizemoderI   )rK   r   
functionalinterpolaterS   rI   rL   )r$   rN   ppm_outsppmppm_outupsampled_ppm_outs         r&   r,   z#UperNetPyramidPoolingModule.forwardq   s{    ; 	/ 	/Cc!ffG " 9 9affhhqrrl4K] !: ! ! OO-....r'   )r-   r.   r/   r0   r   r1   r3   r   r4   r5   r   r,   r6   r7   s   @r&   rG   rG   V   s         
+E#s(O 
+# 
+QT 
+ei 
+nr 
+ 
+ 
+ 
+ 
+ 
+ $u|*<        r'   rG   c                   X     e Zd ZdZ fdZd Zd Zd Zdej	        dej	        fdZ
 xZS )	UperNetHeadz
    Unified Perceptual Parsing for Scene Understanding. This head is the implementation of
    [UPerNet](https://arxiv.org/abs/1807.10221).
    c                    t                                                       || _        |j        | _        || _        |j        | _        d| _        t          j	        | j        |j
        d          | _        t          | j        | j        d         | j        | j                  | _        t          | j        d         t          | j                  | j        z  z   | j        dd          | _        t          j                    | _        t          j                    | _        | j        d d         D ]j}t          || j        d          }t          | j        | j        dd          }| j                            |           | j                            |           kt          t          | j                  | j        z  | j        dd          | _        d S )NFr   r=   )rI   r	   r   r   )r   r   configrH   r   hidden_sizer;   rI   r   r   
num_labels
classifierrG   psp_modulesr   len
bottleneck
ModuleListlateral_convs	fpn_convsrL   fpn_bottleneck)r$   r`   r   l_convfpn_convr%   s        r&   r   zUperNetHead.__init__   s   !-&*")DM63DRSTTT 7R M,	
 
 
 ,R 3t'7#8#84=#HHM	
 
 
  ]__+CRC0 	, 	,K&{DMqQQQF(ST^_```H%%f---N!!(++++/ !!DM1M	
 
 
r'   c                 :    |                      | j                   d S r*   apply_init_weightsr$   s    r&   init_weightszUperNetHead.init_weights       

4%&&&&&r'   c                     t          |t          j                  rR|j        j                            d| j        j                   |j        "|j        j        	                                 d S d S d S Ng        )meanstd

isinstancer   r   weightdatanormal_r`   initializer_ranger   zero_r$   modules     r&   rp   zUperNetHead._init_weights   n    fbi(( 	)M&&CT[5R&SSS{& &&(((((	) 	)&&r'   c                     |d         }|g}|                     |                     |                     t          j        |d          }|                     |          }|S )Nr^   r   dim)extendrd   r4   catrf   )r$   inputsrN   psp_outsr+   s        r&   psp_forwardzUperNetHead.psp_forward   s\    2J3((++,,,9X1---**r'   encoder_hidden_statesr   c                 B    fdt           j                  D                                                                           t	                    }t          |dz
  dd          D ]Z}|dz
           j        dd          }|dz
           t          j        	                    |         |d j
                  z   |dz
  <   [ fdt          |dz
            D             }|                    d                    t          |dz
  dd          D ]F}t          j        	                    ||         |d         j        dd          d j
                  ||<   Gt          j        |d	          }                     |          }                     |          }|S )
Nc                 8    g | ]\  }} ||                   S  r   ).0rB   lateral_convr   s      r&   
<listcomp>z'UperNetHead.forward.<locals>.<listcomp>   s-    pppq,LL!6q!9::pppr'   r   r   r^   rP   rQ   rR   c                 H    g | ]} j         |         |                   S r   )ri   )r   rB   lateralsr$   s     r&   r   z'UperNetHead.forward.<locals>.<listcomp>   s/    \\\q%DN1%hqk22\\\r'   r   )r@   rh   rL   r   re   rangeshaper   rU   rV   rI   r4   r   rj   rc   )r$   r   used_backbone_levelsrB   
prev_shapefpn_outsr+   r   s   ``     @r&   r,   zUperNetHead.forward   s   ppppR[\`\nRoRoppp(()>??@@@  #8}}+a/B77 	 	A!!a%.qrr2J&q1uo0I0I*:TM_ 1J 1 1 HQUOO
 ]\\\\EBVYZBZ<[<[\\\%%%+a/B77 	 	A-33(1+"3ABB"7jX\Xj 4  HQKK 9X1---$$X..((r'   )r-   r.   r/   r0   r   rr   rp   r   r4   r5   r,   r6   r7   s   @r&   r\   r\   |   s         
%
 %
 %
 %
 %
N' ' ') ) )  U\ el        r'   r\   c                        e Zd ZdZ	 ddededeeeeef         f         dd	f fd
Zd Zd Z	de
j        de
j        fdZ xZS )UperNetFCNHeada  
    Fully Convolution Networks for Semantic Segmentation. This head is the implementation of
    [FCNNet](https://arxiv.org/abs/1411.4038>).

    Args:
        config:
            Configuration.
        in_channels (int):
            Number of input channels.
        kernel_size (int):
            The kernel size for convs in the head. Default: 3.
        dilation (int):
            The dilation rate for convs in the head. Default: 1.
    rP   r	   r   in_indexr   r   r   Nc           
         t                                                       || _        |j        | _        |j        | _        |j        | _        |j	        | _
        || _        |dz  |z  }g }|                    t          | j        | j        |||                     t          | j        dz
            D ]3}|                    t          | j        | j        |||                     4| j        dk    rt          j                    | _        nt          j        | | _        | j
        r-t          | j        | j        z   | j        ||dz            | _        t          j        | j        |j        d          | _        d S )NrP   )r   r   r   r   r   r_   r=   )r   r   r`   auxiliary_in_channelsr   auxiliary_channelsr;   auxiliary_num_convs	num_convsauxiliary_concat_inputconcat_inputr   rL   r   r   r   Identityconvs
Sequentialconv_catr   rb   rc   )	r$   r`   r   r   r   conv_paddingr   rB   r%   s	           r&   r   zUperNetFCNHead.__init__   s    	!713"9 #q(H4 $-[R^iq  	
 	
 	

 t~)** 	 	ALL!M4=kS_jr     
 >QDJJ.DJ 	- 4=0$-[bmqrbr  DM )DM63DRSTTTr'   c                 :    |                      | j                   d S r*   rn   rq   s    r&   rr   zUperNetFCNHead.init_weights  rs   r'   c                     t          |t          j                  rR|j        j                            d| j        j                   |j        "|j        j        	                                 d S d S d S ru   rx   r   s     r&   rp   zUperNetFCNHead._init_weights  r   r'   r   c                     || j                  }|                     |          }| j        r+|                     t	          j        ||gd                    }|                     |          }|S )Nr   r   )r   r   r   r   r4   r   rc   )r$   r   hidden_statesr+   s       r&   r,   zUperNetFCNHead.forward  sf    -dm<M** 	N]]59mV-D!#L#L#LMMF((r'   )rP   r	   r   )r-   r.   r/   r0   r1   r   r   r   rr   rp   r4   r5   r,   r6   r7   s   @r&   r   r      s           hi"U "U #"U69"UINsTYZ]_bZbTcOcId"U	"U "U "U "U "U "UH' ' ') ) )U\ el        r'   r   c                   *    e Zd ZdZeZdZg Zd Zd Z	dS )UperNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    pixel_valuesc                     t          |t                    rT|j                                         |j                                         |j        |j                                         d S d S d S r*   )ry   r   backbonerr   decode_headauxiliary_headr   s     r&   rp   z$UperNetPreTrainedModel._init_weights*  st    f455 	5O((***++---$0%2244444		5 	5 10r'   c                     | j                                          | j                                         | j        | j                                         dS dS )zInitialize the weightsN)r   rr   r   r   rq   s    r&   rr   z#UperNetPreTrainedModel.init_weights1  sW    ""$$$%%'''*,,..... +*r'   N)
r-   r.   r/   r0   r   config_classmain_input_name_no_split_modulesrp   rr   r   r'   r&   r   r      sN         
 !L$O5 5 5/ / / / /r'   r   aI  
    Parameters:
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.
        config ([`UperNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
ax  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
            [`AutoImageProcessor`]. See [`SegformerImageProcessor.__call__`] for details.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers in case the backbone has them. See
            `attentions` under returned tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers of the backbone. See `hidden_states` under
            returned tensors for more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zMUperNet framework leveraging any vision backbone e.g. for ADE20k, CityScapes.c                       e Zd Z fdZ ee                    d                     eee	          	 	 	 	 	 dde
ej                 de
e         de
e         de
ej                 d	e
e         d
eeef         fd                        Z xZS )UperNetForSemanticSegmentationc                    t                                          |           t          |          | _        t	          || j        j                  | _        |j        rt          |          nd | _	        | 
                                 d S )N)r   )r   r   r   r   r\   r;   r   use_auxiliary_headr   r   	post_init)r$   r`   r%   s     r&   r   z'UperNetForSemanticSegmentation.__init__X  s|       %f-- 'v4=;QRRR8>8Q[nV444W[ 	r'   zbatch_size, sequence_length)output_typer   Nr   output_attentionsoutput_hidden_stateslabelsreturn_dictr   c                 6   || j         j        dk    rt          d          ||n| j         j        }||n| j         j        }||n| j         j        }| j                            |||          }|j        }| 	                    |          }t          j                            ||j        dd         dd          }d}	| j        E|                     |          }	t          j                            |	|j        dd         dd          }	d}
|Ft          | j         j        	          } |||          }
|	 ||	|          }|
| j         j        |z  z  }
|s)|r|f|dd         z   }n|f|dd         z   }|
|
f|z   n|S t%          |
||j        |j        
          S )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
        >>> from PIL import Image
        >>> from huggingface_hub import hf_hub_download

        >>> image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-tiny")
        >>> model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-tiny")

        >>> filepath = hf_hub_download(
        ...     repo_id="hf-internal-testing/fixtures_ade20k", filename="ADE_val_00000001.jpg", repo_type="dataset"
        ... )
        >>> image = Image.open(filepath).convert("RGB")

        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> logits = outputs.logits  # shape (batch_size, num_labels, height, width)
        >>> list(logits.shape)
        [1, 150, 512, 512]
        ```Nr   z/The number of labels should be greater than one)r   r   rP   rQ   FrR   )ignore_index)losslogitsr   
attentions)r`   rb   
ValueErroruse_return_dictr   r   r   forward_with_filtered_kwargsfeature_mapsr   r   rU   rV   r   r   r   loss_ignore_indexauxiliary_loss_weightr
   r   r   )r$   r   r   r   r   r   outputsfeaturesr   auxiliary_logitsr   loss_fctauxiliary_lossr+   s                 r&   r,   z&UperNetForSemanticSegmentation.forwardd  s   N $+"8A"="=NOOO%0%<kk$+B]$8$D  $+Jj 	 2C1N--TXT_Tq-<</CWh = 
 
 '!!(++**68J1228NU_ot*uu*#228<<!}88 |'9!""'=J^c  9     'T[5RSSSH8FF++D+!)*:F!C!C9NJJ 	F# 1 WQRR[0 WQRR[0)-)9TGf$$vE&!/)	
 
 
 	
r'   )NNNNN)r-   r.   r/   r   r   UPERNET_INPUTS_DOCSTRINGformatr   r
   _CONFIG_FOR_DOCr   r4   r5   r3   r   tupler,   r6   r7   s   @r&   r   r   S  s       

 
 
 
 
 +*+C+J+JKh+i+ijj+BQ`aaa 04,0/3)-&*R
 R
u|,R
 $D>R
 'tn	R

 &R
 d^R
 
u--	.R
 R
 R
 ba kjR
 R
 R
 R
 R
r'   r   )!r0   typingr   r   r   r   r4   r   torch.nnr   modeling_outputsr
   modeling_utilsr   utilsr   r   r   utils.backbone_utilsr   configuration_upernetr   r   Moduler   r9   rG   r\   r   r   UPERNET_START_DOCSTRINGr   r   r   r'   r&   <module>r      sW   y x / / / / / / / / / / / /        % % % % % % 7 7 7 7 7 7 - - - - - - k k k k k k k k k k 1 1 1 1 1 1 0 0 0 0 0 0 "         	      F       "# # # # #") # # #LZ Z Z Z Z") Z Z ZzD D D D DRY D D DN/ / / / /_ / / /2    W a
 a
 a
 a
 a
%; a
 a
	 a
 a
 a
r'   