
    gV                       d Z ddlZddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZ d
dlmZ  ej        e          ZdZdZdZ G d de
j                   Z! G d de
j                   Z" G d de
j                   Z# G d de
j                   Z$ G d de
j                   Z% G d de
j                   Z& G d de
j                   Z' G d de
j                   Z( G d d e
j                   Z) G d! d"e
j                   Z* G d# d$e
j                   Z+ G d% d&e
j                   Z, G d' d(e
j                   Z- G d) d*e          Z. G d+ d,e
j                   Z/	 	 	 dfd.ej0        d/e1d0e2d1e3d2e4f
d3Z5	 	 dgd.ej0        d4ee2e4f         d0e2d2e4fd5Z6 G d6 d7e
j                   Z7 G d8 d9e
j                   Z8 G d: d;e
j                   Z9 G d< d=e
j                   Z: G d> d?e
j                   Z;e G d@ dAe                      Z< G dB dCe.          Z=e G dD dEe                      Z> edFe           G dG dHe.                      Z?e G dI dJe                      Z@ G dK dLe.          ZAe G dM dNe                      ZBe G dO dPe                      ZCe G dQ dRe                      ZDdSejE        jF        dTej0        dUej0        fdVZGdhdWej0        dXeej0                 dUej0        fdYZH G dZ d[e.          ZIe G d\ d]e                      ZJ G d^ d_e.          ZKe G d` dae                      ZL G db dce
j                   ZM G dd dee.          ZNdS )izPyTorch PatchTSMixer model.    N)	dataclass)OptionalTupleUnion)PreTrainedModel)ModelOutput   )NegativeBinomialOutputNormalOutputStudentTOutput)add_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )PatchTSMixerConfigr   a  

    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
    and behavior.

    Parameters:
        config ([`PatchTSMixerConfig`]):
            Model configuration class with all the parameters of the model. Initializing with a config file does not
            load the weights associated with the model, only the configuration. Check out the
            [`~PreTrainedModel.from_pretrained`] method to load the model weights.
        mask_input (`bool`, *optional*, defaults to `False`):
            If True, Masking will be enabled. False otherwise.
a\  
    Args:
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to predict
            the masked portion. For a forecasting task, this denotes the history/past time series values. Similarly,
            for classification or regression tasks, it denotes the appropriate context values of the time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series, it is
            greater than 1.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers.

        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   2     e Zd ZdZdedef fdZd Z xZS )PatchTSMixerGatedAttentionz
    Module that applies gated attention to input data.

    Args:
        in_size (`int`): The input size.
        out_size (`int`): The output size.
    in_sizeout_sizec                     t                                                       t          j        ||          | _        t          j        d          | _        d S )Ndim)super__init__nnLinear
attn_layerSoftmaxattn_softmax)selfr   r   	__class__s      r/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/patchtsmixer/modeling_patchtsmixer.pyr   z#PatchTSMixerGatedAttention.__init__X   sG    )GX66J2...    c                 `    |                      |                     |                    }||z  }|S N)r!   r   )r"   inputsattn_weights      r$   forwardz"PatchTSMixerGatedAttention.forward]   s0    ''(?(?@@+%r%   )__name__
__module____qualname____doc__intr   r*   __classcell__r#   s   @r$   r   r   O   sd         / /s / / / / / /
      r%   r   c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )PatchTSMixerBatchNormzP
    Compute batch normalization over the sequence length (time) dimension.
    configc                     t                                                       t          j        |j        |j                  | _        d S )Neps)r   r   r   BatchNorm1dd_modelnorm_eps	batchnormr"   r4   r#   s     r$   r   zPatchTSMixerBatchNorm.__init__i   s7    FOLLLr%   r(   c                     |                     dd          }|                     |          }|                     dd          S )a  
        Parameters:
            inputs (`torch.Tensor` of shape `(batch_size, sequence_length, d_model)`):
                input for Batch norm calculation
        Returns:
            `torch.Tensor` of shape `(batch_size, sequence_length, d_model)`
        r      )	transposer;   )r"   r(   outputs      r$   r*   zPatchTSMixerBatchNorm.forwardm   s@     !!!Q''''1%%%r%   
r+   r,   r-   r.   r   r   torchTensorr*   r0   r1   s   @r$   r3   r3   d   ss         M1 M M M M M M
&el 
& 
& 
& 
& 
& 
& 
& 
&r%   r3   c                   h     e Zd ZdZdef fdZededej        fd            Z	de
j        fdZ xZS )PatchTSMixerPositionalEncodingz'
    Class for positional encoding
    r4   c                     t                                                       |j        r|                     |          | _        d S t          j        t          j        |j	        |j
                            | _        d S r'   )r   r   use_positional_encoding_init_peposition_encr   	ParameterrB   zerosnum_patchesr9   r<   s     r$   r   z'PatchTSMixerPositionalEncoding.__init__   sh    ) 	^ $f 5 5D "U[9KV^-\-\ ] ]Dr%   returnc                    | j         dk    r5t          j        t          j        | j        | j                  d          }nD| j         dk    r!t          j        | j        | j                  }t          j        d| j                  	                    d          }t          j
        t          j        d| j        d          t          j        d          | j        z   z            }t          j        ||z            |d d dd df<   t          j        ||z            |d d dd df<   ||                                z
  }||                                d	z  z  }t          j        |d
          }nt#          | j          d          |S )NrandomTrequires_gradsincosr   r   r>   g     @
   FzN is not a valid positional encoder. Available types are 'random' and 'sincos'.)positional_encoding_typer   rJ   rB   randnrL   r9   rK   arange	unsqueezeexpmathlogsincosmeanstd
ValueError)r4   rI   positiondiv_terms       r$   rH   z'PatchTSMixerPositionalEncoding._init_pe   s    *h66<F4F(W(WgklllLL,88 ;v'96>JJL|Av'9::DDQGGHya!C!CQXHYHY\b\jHjFk!kllH$)Ih.A$B$BLADqD!$)Ih.A$B$BLADqD!',*;*;*=*==L'<+;+;+=+=+BCL<EJJJLL2  C  C  C   r%   patch_inputc                     || j         z   }|S r'   )rI   )r"   rb   hidden_states      r$   r*   z&PatchTSMixerPositionalEncoding.forward   s    "T%66r%   )r+   r,   r-   r.   r   r   staticmethodr   rJ   rH   rB   rC   r*   r0   r1   s   @r$   rE   rE   z   s         ^1 ^ ^ ^ ^ ^ ^ +     \&5<        r%   rE   c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )PatchTSMixerNormLayerzeNormalization block

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r4   c                    t                                                       |j        | _        d|j                                        v rt	          |          | _        d S t          j        |j        |j	                  | _        d S )Nbatchr6   )
r   r   norm_mlplowerr3   normr   	LayerNormr9   r:   r<   s     r$   r   zPatchTSMixerNormLayer.__init__   sl    fo++-----f55DIIIV^IIIDIIIr%   r(   c                 T   d| j                                         v rwt          j        ||j        d         |j        d         z  |j        d         |j        d         f          }|                     |          }t          j        ||j                  }n|                     |          }|S )a  
        Args:
            inputs (`torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`):
                Input to the normalization layer.
        Returns:
            `torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`
        ri   r   r   r>   r	   )rj   rk   rB   reshapeshaperl   )r"   r(   inputs_reshapeds      r$   r*   zPatchTSMixerNormLayer.forward   s     dm))++++#mLOfl1o5LOLO O #ii88O ]?FLAAFF YYv&&Fr%   rA   r1   s   @r$   rg   rg      ss         J1 J J J J J Jel        r%   rg   c                   4     e Zd Z fdZdej        fdZ xZS )PatchTSMixerMLPc                 <   t                                                       ||j        z  }t          j        ||          | _        t          j        |j                  | _        t          j        ||          | _	        t          j        |j                  | _
        d S r'   )r   r   expansion_factorr   r   fc1Dropoutdropoutdropout1fc2dropout2)r"   in_featuresout_featuresr4   
num_hiddenr#   s        r$   r   zPatchTSMixerMLP.__init__   sv     6#::
9[*55
6>229Z66
6>22r%   r(   c                     |                      t          j                            |                     |                              }|                     |          }|                     |          }|S )z
        Args:
            inputs (`torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`):
                Input to the MLP layer.
        Returns:
            `torch.Tensor` of the same shape as `inputs`
        )ry   r   
functionalgelurv   rz   r{   )r"   r(   s     r$   r*   zPatchTSMixerMLP.forward   sX     r}11$((62B2BCCDD&!!v&&r%   )r+   r,   r-   r   rB   rC   r*   r0   r1   s   @r$   rs   rs      sU        3 3 3 3 3el        r%   rs   c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )$PatchTSMixerChannelFeatureMixerBlockzThis module mixes the features in the channel dimension.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r4   c                    t                                                       t          |          | _        |j        | _        t          |j        |j        |          | _        |j        r"t          |j        |j                  | _	        d S d S Nr|   r}   r4   r   r   )
r   r   rg   rl   
gated_attnrs   num_input_channelsmlpr   gating_blockr<   s     r$   r   z-PatchTSMixerChannelFeatureMixerBlock.__init__   s    )&11	 +"12
 
 
  	 :1F<U! ! !D	 	r%   r(   c                     |}|                      |          }|                    dddd          }| j        r|                     |          }|                     |          }|                    dddd          }||z   }|S )z
        Args:
            inputs (`torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`):
                input to the MLP layer
        Returns:
            `torch.Tensor` of the same shape as `inputs`
        r   r	   r>   r   )rl   permuter   r   r   )r"   r(   residualouts       r$   r*   z,PatchTSMixerChannelFeatureMixerBlock.forward  s     6""1a++? 	/&&v..F&!!1a++x
r%   rA   r1   s   @r$   r   r      sl         1       el        r%   r   c                   h    e Zd ZdZ	 	 	 	 	 ddededed	ed
ededee         f fdZ	de
j        dedefdZ	 	 	 	 	 dde
j        dee
j                 deee
j                          dee
j                 dee
j                 dedee
j        ee
j                 eee
j                          f         fdZ xZS )PatchTSMixerAttentionz=Multi-headed attention from 'Attention Is All You Need' paper        FTN	embed_dim	num_headsrx   
is_decoderbias	is_causalr4   c                 
   t                                                       || _        || _        || _        ||z  | _        || _        | j        |z  | j        k    rt          d| j         d| d          | j        dz  | _        || _	        || _
        t          j        |||          | _        t          j        |||          | _        t          j        |||          | _        t          j        |||          | _        d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: ).g      )r   )r   r   r   r   rx   head_dimr4   r_   scalingr   r   r   r   k_projv_projq_projout_proj)	r"   r   r   rx   r   r   r   r4   r#   s	           r$   r   zPatchTSMixerAttention.__init__  s    	""!Y.MI%$.883dn 3 3%.3 3 3   }d*$"i	94@@@i	94@@@i	94@@@	)YTBBBr%   tensorseq_lenbszc                     |                     ||| j        | j                                      dd                                          S )Nr   r>   )viewr   r   r?   
contiguous)r"   r   r   r   s       r$   _shapezPatchTSMixerAttention._shape<  s<    {{3GGQQRSUVWWbbdddr%   hidden_stateskey_value_statespast_key_valueattention_masklayer_head_maskoutput_attentionsrM   c                 Z
   |du}|                                 \  }}	}
|                     |          | j        z  }|r6|4|d         j        d         |j        d         k    r|d         }|d         }n>|rU|                     |                     |          d|          }|                     |                     |          d|          }n||                     |                     |          d|          }|                     |                     |          d|          }t          j        |d         |gd          }t          j        |d         |gd          }nT|                     |                     |          d|          }|                     |                     |          d|          }| j	        r||f}|| j
        z  d| j        f} |                     ||	|          j        | } |j        | } |j        | }|                     d          }t          j        ||                    dd                    }|                                 || j
        z  |	|fk    r2t!          d|| j
        z  |	|f d|                                            ||                                 |d|	|fk    r+t!          d	|d|	|f d|                                            |                    || j
        |	|          |z   }|                    || j
        z  |	|          }t"          j                            |d          }||                                 | j
        fk    r-t!          d
| j
        f d|                                            |                    dddd          |                    || j
        |	|          z  }|                    || j
        z  |	|          }|r=|                    || j
        |	|          }|                    || j
        z  |	|          }nd}t"          j                            || j        | j                  }t          j        ||          }|                                 || j
        z  |	| j        fk    r7t!          d|| j
        z  |	| j        f d|                                            |                    || j
        |	| j                  }|                    dd          }|                    ||	| j                  }|                     |          }|||fS )z#Input shape: Batch x Time x ChannelNr   r>   r   r   r   z$Attention weights should be of size z	, but is z!Attention mask should be of size z/Head mask for a single layer should be of size )ptrainingz `attn_output` should be of size )sizer   r   rp   r   r   r   rB   catr   r   r   r   ro   bmmr?   r_   r   r   softmaxrx   r   r   r   )r"   r   r   r   r   r   r   is_cross_attentionr   tgt_len_query_states
key_statesvalue_states
proj_shapesrc_lenattn_weightsattn_weights_reshaped
attn_probsattn_outputs                       r$   r*   zPatchTSMixerAttention.forward?  s    .T9',,..Wa {{=11DL@ 	L*q!'*.>.DQ.GGG (*J)!,LL 	LT[[1A%B%BBLLJ;;t{{3C'D'Db#NNLL'T[[%?%?SIIJ;;t{{='A'A2sKKLN1$5z#BJJJJ 9nQ&7%FANNNLL T[[%?%?SIIJ;;t{{='A'A2sKKL? 	8 ),7NDN*B>
Ct{{<#>>CZP'Z'4
+|+Z8//!$$yz/C/CAq/I/IJJ3#7'"JJJ*dn8LgW^7_ * * %%''* *  
 %""$$a'(BBB ta'8Rtt]k]p]p]r]rtt   (,,S$.'7SSVddL',,S4>-A7GTTL},,\r,BB&##%%$.)::: 1t~FW 1 1',,..1 1   +//2q!<<|?P?PQTVZVdfmov?w?wwL',,S4>-A7GTTL 	)
 %1$5$5c4>7T[$\$\!055cDN6JGU\]]LL$(!]**<4<RVR_*``
i
L99#"6!OOO)C$.4H'SWS`3a ) )$$&&) )  
 "&&sDNGT]SS!++Aq11 "))#wGGmmK001>AAr%   )r   FTFN)NNNNF)r+   r,   r-   r.   r/   floatboolr   r   r   rB   rC   r   r   r*   r0   r1   s   @r$   r   r     s       GG  /3C CC C 	C
 C C C +,C C C C C C>eU\ eC ec e e e e 488<1526"'vB vB|vB #5<0vB !u|!45	vB
 !.vB "%,/vB  vB 
u|Xel3XeEL>Q5RR	SvB vB vB vB vB vB vB vBr%   r   c                   .     e Zd ZdZdef fdZd Z xZS )PatchMixerBlockzxThis module mixes the patch dimension.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r4   c                    t                                                       t          |          | _        |j        | _        |j        | _        t          |j        |j        |          | _        |j        r t          |j        |j                  | _
        |j        r<t          |j        |j        |j                  | _        t          |          | _        d S d S )Nr   r   )r   r   rx   )r   r   rg   rl   	self_attnr   rs   rL   r   r   r   r   r9   self_attn_headsrx   self_attn_layer	norm_attnr<   s     r$   r   zPatchMixerBlock.__init__  s    )&11	) +"*+
 
 
  	t :6CU`f`r s s sD 	;#8 . 0$ $ $D 
 36::DNNN	; 	;r%   c                    |}|                      |          }| j        rY|j        \  }}}}|                    ||z  ||          }|                     |d          \  }}	}	|                    ||||          }|                    dd          }|                     |          }| j        r|                     |          }|                    dd          }| j        r| 	                    ||z             }||z   }
|
S )z
        Args:
            hidden_state (`torch.Tensor`): Input tensor.

        Returns:
            `torch.Tensor`: Transformed tensor.
        F)r   r>   r	   )
rl   r   rp   ro   r   r?   r   r   r   r   )r"   rd   r   
batch_sizen_varsrL   r9   hidden_state_reshapedx_attnr   r   s              r$   r*   zPatchMixerBlock.forward  s     yy..> 	N7C7I4JW$0$8$8f9Lk[b$c$c!//0EY^/__LFAq^^JWMMF $--a33xx--? 	;,,\::L $--a33> 	A>>,*?@@LX%
r%   r+   r,   r-   r.   r   r   r*   r0   r1   s   @r$   r   r     s^         ;1 ; ; ; ; ; ;2! ! ! ! ! ! !r%   r   c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )FeatureMixerBlockzThis module mixes the hidden feature dimension.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    r4   c                    t                                                       t          |          | _        |j        | _        t          |j        |j        |          | _        |j        r"t          |j        |j                  | _	        d S d S r   )
r   r   rg   rl   r   rs   r9   r   r   r   r<   s     r$   r   zFeatureMixerBlock.__init__  s    )&11	 +"
 
 
  	l :6>\b\j k k kD	l 	lr%   hiddenc                     |}|                      |          }|                     |          }| j        r|                     |          }||z   }|S )
        Args:
            hidden (`torch.Tensor` of shape `(batch_size, num_patches, d_model)`):
                Input tensor to the layer.

        Returns:
            `torch.Tensor`: Transformed tensor.
        )rl   r   r   r   )r"   r   r   r   s       r$   r*   zFeatureMixerBlock.forward  sW     6""&!!? 	/&&v..Fx
r%   rA   r1   s   @r$   r   r     ss         l1 l l l l l l el        r%   r   c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )PatchTSMixerLayerz
    The `PatchTSMixer` layer that does all three kinds of mixing.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    r4   c                     t                                                       t          |          | _        t	          |          | _        |j        | _        |j        dk    rt          |          | _        d S d S )Nr4   mix_channel)	r   r   r   patch_mixerr   feature_mixermoder   channel_feature_mixerr<   s     r$   r   zPatchTSMixerLayer.__init__4  sw    *&999.f===K	;-'')MU[)\)\)\D&&& ('r%   r   c                     | j         dk    r|                     |          }|                     |          }|                     |          }|S )r   r   )r   r   r   r   )r"   r   s     r$   r*   zPatchTSMixerLayer.forward?  sO     9%%//77F!!&))##F++r%   rA   r1   s   @r$   r   r   *  ss         	]1 	] 	] 	] 	] 	] 	]el        r%   r   c                   6     e Zd ZdZdef fdZddefdZ xZS )PatchTSMixerBlockzThe main computing framework of the `PatchTSMixer` model.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r4   c                     t                                                       j        }t          j        fdt          |          D                       | _        d S )Nc                 0    g | ]}t                     S )r   )r   ).0r   r4   s     r$   
<listcomp>z.PatchTSMixerBlock.__init__.<locals>.<listcomp>]  s%    $a$a$a!%6f%E%E%E$a$a$ar%   )r   r   
num_layersr   
ModuleListrangemixers)r"   r4   r   r#   s    ` r$   r   zPatchTSMixerBlock.__init__X  sU    &
m$a$a$a$auU_O`O`$a$a$abbr%   Foutput_hidden_statesc                 v    g }|}| j         D ]$} ||          }|r|                    |           %|r||fS |dfS )as  
        Args:
            hidden_state (`torch.Tensor`): The input tensor.
            output_hidden_states (`bool`, *optional*, defaults to False.):
                Whether to output the hidden states as well.

        Returns:
            `torch.Tensor`: The embedding. `list`: List of all hidden states if `output_hidden_states` is set to
            `True`.
        N)r   append)r"   rd   r   all_hidden_states	embeddingmods         r$   r*   zPatchTSMixerBlock.forward_  sh      	; 	4 	4CII# 4!((333 	#///d?"r%   F)	r+   r,   r-   r.   r   r   r   r*   r0   r1   s   @r$   r   r   P  sv         c1 c c c c c c# #$ # # # # # # # #r%   r   c                   0     e Zd ZdZddef fdZd Z xZS )PatchTSMixerForPredictionHeadzqPrediction Head for Forecasting

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    Nr4   c                    t                                                       |j        | _        | j        | j                                         t	          j        |j                  | _        |-t	          j        |j	        |j
        z  |j                  | _        n'|                    |j	        |j
        z            | _        t	          j        d          | _        d S )N	start_dim)r   r   prediction_channel_indicessortr   rw   head_dropoutdropout_layerr   rL   r9   prediction_lengthbase_forecast_blockget_parameter_projectionFlattenflatten)r"   r4   distribution_outputr#   s      r$   r   z&PatchTSMixerForPredictionHead.__init__  s    *0*K'*6+00222Z(;<<&')y&2Dv~2UX^Xp'q'qD$$':'S'S"V^3( (D$ zB///r%   c                                           |          }                     |          }                     |          }t          |t                    rt	          d |D                       }n|                    dd          } j        @t          |t                    rt	           fd|D                       }n|d j        f         }|S )ar  

        Args:
            hidden_features (`torch.Tensor` of shape `(batch_size, num_patch, d_model)` in `flatten` mode
                or `(batch_size, n_vars, num_patch, d_model)` in `common_channel`/`mix_channel` mode.): Input hidden
                features.

        Returns:
            `torch.Tensor` of shape `(batch_size, prediction_length, nvars)`.

        c              3   B   K   | ]}|                     d d          V  dS )r   r   N)r?   )r   zs     r$   	<genexpr>z8PatchTSMixerForPredictionHead.forward.<locals>.<genexpr>  s0      CCQQ[[R00CCCCCCr%   r   r   Nc              3   6   K   | ]}|d j         f         V  dS ).N)r   )r   r  r"   s     r$   r  z8PatchTSMixerForPredictionHead.forward.<locals>.<genexpr>  s0       [ [Q3(G#G!H [ [ [ [ [ [r%   .)r   r   r   
isinstancetupler?   r   r"   hidden_featuresforecasts   `  r$   r*   z%PatchTSMixerForPredictionHead.forward  s     ,,77,,_==++O<<h&& 	2CC(CCCCCHH))"b11H*6(E** J  [ [ [ [RZ [ [ [[[#C)H$HIr%   r'   r   r1   s   @r$   r   r   y  sc         0 01 0 0 0 0 0 0$      r%   r   c                   0     e Zd ZdZddef fdZd Z xZS )PatchTSMixerLinearHeadzLinear head for Classification and Regression.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    Nr4   c                    t                                                       |j        | _        |j        | _        |j        |j        }nd}|| _        |0t          j        |j        |j	        z  |z  |j
                  | _        n*|                    |j        |j	        z  |z            | _        |j        t          j        d          | _        nt          j        d          | _        t          j        |j                  | _        d S )Nr   r   r   )r   r   head_aggregationoutput_rangerL   r   r   r   r9   r   num_targets
projectionr   r   r   rw   r   rx   )r"   r4   r   
mul_factorr#   s       r$   r   zPatchTSMixerLinearHead.__init__  s     & 7"/"*+JJJ#6 & i!::ZG" DOO
 2JJ!::ZG DO "*:333DLL:333DLz&"566r%   c                    |                     dd          }| j        dk    r	|d         }nH| j        dk    r|                    d          j        }n!| j        dk    r|                    d          }| j        r|                     |          }|                     |          }|                     |          }| j        E| j	        >t          j        |          | j	        d	         | j	        d
         z
  z  | j	        d
         z   }|S )ai  
        Args:
            hidden_features (`torch.Tensor` of shape `(batch_size x num_patch x d_model)` in `flatten` mode
                or `(batch_size x n_vars x num_patch x d_model)` in `common_channel`/`mix_channel` mode.): Input hidden
                features.

        Returns:
            `torch.Tensor` of shape `(batch_size x num_targets)`.
        r   r   use_last).r   max_poolr   avg_poolNr   r   )r?   r  maxvaluesr]   r   rx   r  r   r  rB   sigmoid)r"   r  s     r$   r*   zPatchTSMixerLinearHead.forward  s    *33B;; J..-g6OO"j00-11b199@OO"j00-22r2::O< 	<"ll?;;O,,77///::$,43D3Po..$2CA2FIZ[\I]2]^aearstauu  r%   r'   r   r1   s   @r$   r  r    sc         7 71 7 7 7 7 7 78             r%   r  c                   $    e Zd ZeZdZdZdZd ZdS )PatchTSMixerPreTrainedModelmodelpast_valuesFc                    t          |t                    r;| j        j        dk    r)t          j                            |j        dd           dS dS t          |t          j        t          j	        f          r?|j
        j                                         |j        j                            d           dS t          |t                    rI|j        j
        j                                         |j        j        j                            d           dS t          |t          j                  rR|j        j                            d| j        j                   |j
        "|j
        j                                         dS dS dS )zInitialize weightsrO   r   g?)r]   r^         ?N)r  rE   r4   rT   r   initnormal_rI   rm   r8   r   datazero_weightfill_r3   r;   r   init_std)r"   modules     r$   _init_weightsz)PatchTSMixerPreTrainedModel._init_weights  s_   f<== 	){3x?? 3#3GGGGG @?r~ >?? 		)K""$$$M$$S))))) 566 	)!&,,...#(..s33333	** 	)M&&CT[5I&JJJ{& &&(((((	) 	)&&r%   N)	r+   r,   r-   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointingr(   r%   r$   r  r    s9        %L#O&+#) ) ) ) )r%   r  c                   .     e Zd ZdZdef fdZd Z xZS )PatchTSMixerPretrainHeadzcPretraining head.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r4   c                     t                                                       t          j        |j                  | _        t          j        |j        |j                  | _	        d S r'   )
r   r   r   rw   r   r   r   r9   patch_lengthbase_pt_blockr<   s     r$   r   z!PatchTSMixerPretrainHead.__init__  sM    Z(;<<Yv~v7JKKr%   c                 Z    |                      |          }|                     |          }|S )a  
        Args:
            hidden_features (`torch.Tensor` of shape `(batch_size x num_patch x d_model)` in `flatten` mode
                or `(batch_size x n_vars x num_patch x d_model)` in `common_channel`/`mix_channel` mode.): Input hidden
                features.

        Returns:
            `torch.Tensor` of shape `(batch_size x n_vars x num_patch x patch_length)`.
        )r   r2  r  s      r$   r*   z PatchTSMixerPretrainHead.forward  s/     ,,_==%%o66r%   r   r1   s   @r$   r/  r/    se         L1 L L L L L L      r%   r/  Fr(   
mask_ratiounmasked_channel_indiceschannel_consistent_masking
mask_valuec                    |dk     s|dk    rt          d| d          | j        \  }}}}| j        }	t          |d|z
  z            }
|r0t	          j        |d||	          }|                    d|d          }nt	          j        ||||	          }t	          j        ||||	          }d|ddddd|
f<   t	          j        |d          }t	          j        |d          }t	          j	        |d|	          }|
                    d                              ddd|          }|d|dd|ddddf<   |                     |                                |          }||d
         fS )a  random_masking: Mask the input considering the control variables.

    Args:
        inputs (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, num_features)`):
            The input tensor to mask.
        mask_ratio (`float`):
            Masking ratio applied to mask the input data during random pretraining. It is the number between 0 and 1.
        unmasked_channel_indices (list, *optional*):
            Indices of channels that will not be masked.
        channel_consistent_masking (bool, *optional*, defaults to `False`):
            When true, masking will be same across all channels of a timeseries. Otherwise, masking positions will vary
            across channels.
        mask_value (int, *optional*, defaults to 0):
            Define the value of masked patches for pretraining.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as input Tensor and mask tensor of shape [bs x c x
        n]
    r   r   zMask ratio z has to be between 0 and 1.deviceNr   r   )r   index.r   )r_   rp   r:  r/   rB   randrepeatonesargsortgatherrW   masked_fillr   )r(   r4  r5  r6  r7  r   num_channelssequence_lengthnum_featuresr:  len_keepnoisemaskids_shuffleids_restoreinputs_masks                   r$   random_maskingrL  0  s   4 A~~qNzNNNOOO>Dl;Jo|]F?a*n566H! U
:q/&IIIQa00 
:|_VTTT :j,OOODDAAAyy -2...K-444K<"K888D>>"$$Q1l;;D+23QQQ(!!!QQQ./$$TYY[[*==KV$$r%   num_forecast_mask_patchesc                    t          |t                    r|g}d |D             }| j        \  }}}}t          j        |||| j                  }	g }
d}t          |          }t          ||          D ]V\  }}|dk    s||k    rt          d| d          t          ||z  |z            }|
	                    |||g           ||z  }Wt          |
d           }
||k     r|
d         d         ||z
  z   |
d         d<   n#||k    r|
d	         d         ||z
  z   |
d	         d<   d}|
D ]\  }}}||z   }d
|	||dd| df<   |}t          j        |	j        d                   }|	|         }	|	                    d	                              d
d
d
|          }	|d|	dd|ddddf<   |                     |	                                |          }||	d         fS )a  Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches.
    If num_forecast_mask_patches is a list, samples in the batch will be randomly masked by numbers defined in the list.

    Parameters:
        inputs (`torch.Tensor`):
            Input of shape `(bs, num_channels, num_patch, patch_length)`
        num_forecast_mask_patches (`list`):
            Number of patches to be masked at the end of each batch sample. e.g. 4 or [3, 5].
        unmasked_channel_indices (`list`, *optional*):
            Indices of channels that are not masked.
        mask_value (`int`, *optional*, defaults to 0):
            Values in the masked patches will be filled by `mask_value`.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as inputs Tensor and Mask tensor of shape `(bs,
        num_channels , num_patch)` or `(bs, tsg1, tsg2, num_channels, num_patch)`
    c                     g | ]}d S )r   r-  )r   r   s     r$   r   z$forecast_masking.<locals>.<listcomp>  s    AAA!AAAAr%   r9  r   znum_forecast_mask_patches z6 should be greater than 0 and less than total patches.c                     | d         S Nr>   r-  )xs    r$   <lambda>z"forecast_masking.<locals>.<lambda>  s
    !A$ r%   )keyr>   r   r   Nr<  )r  r/   rp   rB   rK   r:  sumzipr_   r   sortedrandpermrW   r>  rB  r   )r(   rM  r5  r7  forecast_mask_ratiosr   rC  rD  rE  rH  t_listtotal_lengthtotal_ratior1  ratiotemp_lenbatch1	patch_lenr   batch2permrK  s                         r$   forecast_maskingrc  k  sU   0 +S11 @%>$?!AA'@AAA>Dl;Jo|;z<WWWDFL*++K"#<>RSS ! !e1 ? ?q\qqq   zE)K788|UH5666 F///Fj  ay|zL'@Aq	!	
	"	"r
1
)BCr
1F"(  	1h("./VF]AAA	z{{*+>$*Q-((D:D>>"$$Q1l;;D+23QQQ(!!!QQQ./$$TYY[[*==KV$$r%   c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )PatchTSMixerPatchifyz
    A class to patchify the time series sequence into different patches

    Returns:
        `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
    r4   c                    t                                                       |j        | _        |j        | _        |j        | _        | j        | j        k    r t          d| j         d| j         d          t          | j        | j                  | j        z
  | j        z  dz   | _        | j        | j        | j        dz
  z  z   }| j        |z
  | _	        d S )NzSequence length (z+) has to be greater than the patch length ()r   )
r   r   context_lengthrD  r1  patch_strider_   r  rL   sequence_start)r"   r4   new_sequence_lengthr#   s      r$   r   zPatchTSMixerPatchify.__init__  s    %4"/"/4#444yD$8yyeievyyy  
   4d6GHH4K\\aearruvv"/$2CtGWZ[G[2\\"25HHr%   r  c                 ,   |j         d         }|| j        k    rt          d| d| j         d          |dd| j        dddf         }|                    d| j        | j                  }|                    dd                                          }|S )a!  
        Parameters:
            past_values (`torch.Tensor` of shape `(batch_size, sequence_length, num_channels)`, *required*):
                Input for patchification

        Returns:
            `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
        r   zInput sequence length (z%) doesn't match model configuration (r   N)	dimensionr   stepr  )	rp   rD  r_   rj  unfoldr1  ri  r?   r   )r"   r  rD  r@   s       r$   r*   zPatchTSMixerPatchify.forward  s     &+B/d222x/xx`d`txxx   QQQ 3 5 5qqq89$2C$J[\\!!"b))4466r%   rA   r1   s   @r$   re  re    ss         I1 I I I I I I"5<        r%   re  c                   >     e Zd ZdZdef fdZdej        fdZ xZ	S )PatchTSMixerMaskinga  
    Class to perform random or forecast masking.

    Parameters:
        config (`PatchTSMixerConfig`): model config
    Returns:
        x_mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
            Masked patched input
        mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
            Bool tensor indicating True on masked points
    r4   c                    t                                                       |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        | j        t          | j                  | _        d S d S r'   )	r   r   random_mask_ratior6  	mask_typerM  r5  r7  rW  r<   s     r$   r   zPatchTSMixerMasking.__init__  s    !'!9*0*K'))/)I&(.(G% +(4,243P,Q,QD))) 54r%   rb   c                 2   | j         dk    r,t          || j        | j        | j        | j                  \  }}nI| j         dk    r&t          || j        | j        | j                  \  }}nt          d| j          d          |	                                }||fS )a  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Patch input

        Return:
            masked_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
                Masked patched input
            mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
                Bool tensor indicating True on masked points

        rO   )r(   r4  r5  r6  r7  r	  )r(   rM  r5  r7  zInvalid mask type .)
rt  rL  rs  r5  r6  r7  rc  rM  r_   r   )r"   rb   masked_inputrH  s       r$   r*   zPatchTSMixerMasking.forward  s     >X%%!/"1)-)F+/+J?" " "L$$ ^z))!1"*.*H)-)F?	" " "L$$ C$.CCCDDD yy{{T!!r%   rA   r1   s   @r$   rq  rq    ss        
 
	R1 	R 	R 	R 	R 	R 	R!"5< !" !" !" !" !" !" !" !"r%   rq  c            	            e Zd ZdZdef fdZdej        dej        deej        ej        ej        f         fdZ	 xZ
S )PatchTSMixerStdScalerz
    Standardize features by calculating the mean and scaling along the first dimension, and then normalizes it by
    subtracting from the mean and dividing by the standard deviation.
    r4   c                     t                                                       t          |d          r|j        nd| _        t          |d          r|j        nd| _        t          |d          r|j        nd| _        d S )Nscaling_dimr   keepdimTminimum_scalegh㈵>)r   r   hasattrr{  r   r|  r}  r<   s     r$   r   zPatchTSMixerStdScaler.__init__$  sy    )0)G)GN6%%Q)0)C)CMv~~5<V_5U5U_V11[_r%   r"  observed_indicatorrM   c                 d   |                     | j        | j                  }|                    d          }||z                       | j        | j                  |z  }||z
  |z  dz                       | j        | j                  |z  }t	          j        || j        z             }||z
  |z  ||fS )C  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
            observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Calculating the scale on the observed indicator.
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        r|  r  r>   )rU  r   r|  	clamp_minrB   sqrtr}  )r"   r"  r  denominatorlocvariancescales          r$   r*   zPatchTSMixerStdScaler.forward*  s     ),,TXt|,LL!++C00((--dh-MMP[[Sj$661<AA$(TXT`Aaadoo
8d&8899s
e#S%//r%   r+   r,   r-   r.   r   r   rB   rC   r   r*   r0   r1   s   @r$   ry  ry    s         
`1 ` ` ` ` ` `0L06;l0	u|U\5<7	80 0 0 0 0 0 0 0r%   ry  c            	            e Zd ZdZdef fdZdej        dej        deej        ej        ej        f         fdZ	 xZ
S )PatchTSMixerMeanScalerz
    Computes a scaling factor as the weighted average absolute value along the first dimension, and scales the data
    accordingly.
    r4   c                 8   t                                                       t          |d          r|j        nd| _        t          |d          r|j        nd| _        t          |d          r|j        nd| _        t          |d          r|j        nd | _        d S )Nr{  r   r|  Tr}  绽|=default_scale)r   r   r~  r{  r   r|  r}  r  r<   s     r$   r   zPatchTSMixerMeanScaler.__init__H  s    )0)G)GN6%%Q)0)C)CMv~~5<V_5U5U`V11[`5<V_5U5U_V11[_r%   r"  r  rM   c                    ||z                                                       | j        d          }|                    | j        d          }|t          j        |d          z  }| j        W|                    d          }t          j        |                    d          d          }t          j        ||z            }n| j        t          j        |          z  }t          j        |dk    ||          }t          j        || j	                  }||z  }	| j
        s|                    | j                  }|	t          j        |          |fS )r  Tr  r   minNr   r   )absrU  r   rB   clampr  squeeze	ones_likewherer}  r|  
zeros_like)
r"   r"  r  ts_sumnum_observedr  	batch_sumbatch_observationsr  scaled_datas
             r$   r*   zPatchTSMixerMeanScaler.forwardO  sE    ++002266tx6NN)--dh-EE\q9999 %

q
))I!&\-=-=a-@-@a!H!H!H!M)6H*HIIMM .1G1GGM L1,e]CC Et'9:::Ul| 	0MMdhM//EE,U33U::r%   r  r1   s   @r$   r  r  B  s         
`1 ` ` ` ` ` `&;L&;6;l&;	u|U\5<7	8&; &; &; &; &; &; &; &;r%   r  c            
            e Zd ZdZdef fdZ	 d	dej        dej        deej        ej        ej        f         fdZ	 xZ
S )
PatchTSMixerNOPScalerz|
    Assigns a scaling factor equal to 1 along the first dimension, and therefore applies no scaling to the input data.
    r4   c                     t                                                       t          |d          r|j        nd| _        t          |d          r|j        nd| _        d S )Nr{  r   r|  T)r   r   r~  r{  r   r|  r<   s     r$   r   zPatchTSMixerNOPScaler.__init__~  sW    )0)G)GN6%%Q)0)C)CMv~~r%   Nr"  r  rM   c                     t          j        |d                              | j        | j                  }t          j        |d                              | j        | j                  }|||fS )a  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        FrP   )r   r|  )rB   r  r]   r   r|  r  )r"   r"  r  r  r  s        r$   r*   zPatchTSMixerNOPScaler.forward  sl     E:::??DHVZVb?cct5999>>48UYUa>bbS%r%   r'   r  r1   s   @r$   r  r  y  s         N1 N N N N N N FJ   L 6;l 	u|U\5<7	8               r%   r  c                   \    e Zd ZU dZdZej        ed<   dZe	e
ej                          ed<   dS )PatchTSMixerEncoderOutputa  
    Base class for `PatchTSMixerEncoderOutput`, with potential hidden states.

    Args:
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, d_model)`):
            Hidden-state at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*):
            Hidden-states of the model at the output of each layer.
    Nlast_hidden_stater   )r+   r,   r-   r.   r  rB   FloatTensor__annotations__r   r   r   r-  r%   r$   r  r    sO           ,0u(///8<M8E%"345<<<<<r%   r  c                        e Zd ZdZdef fdZ eee          	 	 dde	j
        dee         d	ee         d
eeef         fd            Z xZS )PatchTSMixerEncoderz
    Encoder for PatchTSMixer which inputs patched time-series and outputs patched embeddings.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r4   c                 V   t                                          |           |j        | _        t          j        |j        |j                  | _        |j        rt          |          | _
        nd | _
        t          |          | _        |j        r|                                  d S d S )Nr   )r   r   use_return_dictr   r   r1  r9   patcherrG   rE   positional_encoderr   mlp_mixer_encoder	post_initr<   s     r$   r   zPatchTSMixerEncoder.__init__  s       %5y!4fnEE) 	+&DF&S&S&SD##&*D#!2&!A!A!A  	NN	 	r%   output_typer)  FNr  r   return_dictrM   c                 
   ||n| j         }|                     |          }| j        |                     |          }|                     ||          \  }}|st	          d ||fD                       S t          ||          S )a  
        Args:
            past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
                Context values of the time series. For a pretraining task, this denotes the input time series to
                predict the masked portion. For a forecasting task, this denotes the history/past time series values.
                Similarly, for classification or regression tasks, it denotes the appropriate context values of the
                time series.

                For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series,
                it is greater than 1.

            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers.

            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.

        Returns:
            `torch.FloatTensor` of shape `(batch_size, n_vars, num_patches, d_model)`
        N)r   c              3      K   | ]}|V  d S r'   r-  r   vs     r$   r  z.PatchTSMixerEncoder.forward.<locals>.<genexpr>  s6              r%   )r  r   )r  r  r  r  r  r  )r"   r  r   r  patchesr  r   s          r$   r*   zPatchTSMixerEncoder.forward  s    8 &1%<kk$BV ,,{++ ".--g66G+/+A+A'`t+A+u+u(= 	   &!      );L\ijjjjr%   )FN)r+   r,   r-   r.   r   r   r   r  _CONFIG_FOR_DOCrB   rC   r   r   r   r   r*   r0   r1   s   @r$   r  r    s         1        +DSbccc 05&*	/k /k\/k 'tn/k d^	/k
 
u//	0/k /k /k dc/k /k /k /k /kr%   r  c                       e Zd ZU dZdZej        ed<   dZe	e
ej                          ed<   dZej        ed<   dZe	ej                 ed<   dZe	ej                 ed<   dZe	ej                 ed<   dS )	PatchTSMixerModelOutputa  
    Base class for model's outputs, with potential hidden states.

    Args:
        last_hidden_state (`torch.FloatTensor`  of shape `(batch_size, num_channels, num_patches, d_model)`):
            Hidden-state at the output of the last layer of the model.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*):
            Hidden-states of the model at the output of each layer.
        patch_input (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
            Patched input data to the model.
        mask: (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches)`,*optional*):
            Bool Tensor indicating True in masked patches and False otherwise.
        loc: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`,*optional*):
            Gives the mean of the context window per channel. Used for revin denorm outside the model, if revin
            enabled.
        scale: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`,*optional*):
            Gives the std dev of the context window per channel. Used for revin denorm outside the model, if revin
            enabled.
    Nr  r   rb   rH  r  r  )r+   r,   r-   r.   r  rB   r  r  r   r   r   rb   rH  r  r  r-  r%   r$   r  r    s          ( ,0u(///8<M8E%"345<<<%)K")))(,D(5$
%,,,'+C%#	$+++)-E8E%&-----r%   r  z3The PatchTSMixer Model for time-series forecasting.c                        e Zd Zddedef fdZ ee           ee	e
          	 	 	 ddej        deej                 d	ee         d
ee         de	f
d                        Z xZS )PatchTSMixerModelFr4   
mask_inputc                    t                                          |           |j        | _        t          |          | _        t          |          | _        |du rt          |          | _        nd | _        |j	        dk    rt          |          | _        n=|j	        dk    s	|j	        du rt          |          | _        nt          |          | _        |j        r|                                  d S d S )NTr]   r^   )r   r   r  r  encoderre  patchingrq  maskingr   r  scalerry  r  r  )r"   r4   r  r#   s      r$   r   zPatchTSMixerModel.__init__  s       %5*622,V44.v66DLLDL>V##088DKK^u$$$(>(>/77DKK/77DK  	NN	 	r%   r  Nr  observed_maskr   r  rM   c           	         ||n| j         }d}|t          j        |          }|                     ||          \  }}}|                     |          }	|	}
| j        |                     |	          \  }
}|                     |
||          }t          |t                    r	t          | }|s)t          d |j
        |j        |	|||fD                       S t          |j
        |j        |	|||          S )a  
        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:
                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Returns:

        Nr   r  c              3      K   | ]}|V  d S r'   r-  r  s     r$   r  z,PatchTSMixerModel.forward.<locals>.<genexpr>V  6       
 
 
 
 
 
 
 
r%   )r  r   rb   rH  r  r  )r  rB   r  r  r  r  r  r  r  r  r  r   r  )r"   r  r  r   r  rH  scaled_past_valuesr  r  	patched_x	enc_inputencoder_outputs               r$   r*   zPatchTSMixerModel.forward*  sC   & &1%<kk$BV !OK88M)-[-)P)P&CMM"455		<#"ll955OIt !5# & 
 
 ne,, 	H6GN 	 
 
 #4"0
 
 
 
 
 
 ',>(6!
 
 
 	
r%   r   )NFN)r+   r,   r-   r   r   r   r   PATCHTSMIXER_INPUTS_DOCSTRINGr   r  r  rB   rC   r   r*   r0   r1   s   @r$   r  r    s        
 1 t      . +*+HII+BQ`aaa 15/4&*=
 =
\=
  -=
 'tn	=

 d^=
 
!=
 =
 =
 ba JI=
 =
 =
 =
 =
r%   r  c                       e Zd ZU dZdZeej                 ed<   dZ	ej        ed<   dZ
ej        ed<   dZeeej                          ed<   dS ) PatchTSMixerForPreTrainingOutputa  
    Output type of [`PatchTSMixerForPreTrainingOutput`].

    Args:
        prediction_outputs (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, patch_length)`):
            Prediction output from the pretrain head.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*):
            Hidden-states of the model at the output of each layer.
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
            Backbone embeddings before passing through the head.
        loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
            Total loss
    Nlossprediction_outputsr  r   r+   r,   r-   r.   r  r   rB   r  r  r  r  r   r   r-  r%   r$   r  r  l  {           )-D(5$
%,,,,0)000+/u(///8<M8E%"345<<<<<r%   r  c                        e Zd ZdZdef fdZ ee           ee	e
          	 	 	 	 ddej        d	eej                 d
ee         dedee         de	fd                        Z xZS )PatchTSMixerForPretrainingz
    `PatchTSMixer` for mask pretraining.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    Returns:
        `None`.
    r4   c                 
   t                                          |           t          |d          | _        t	          |          | _        |j        | _        |j        | _        |j        r|                                  d S d S )NT)r  r   )	r   r   r  r  r/  headmasked_lossr  r  r<   s     r$   r   z#PatchTSMixerForPretraining.__init__  s       &v$???
,F;;;	!-%5  	NN	 	r%   r  NFTr  r  r   return_lossr  rM   c                    ||n| j         }| j        du r!t          j                            d          }n t          j                            d          }|                     ||||          }t          |t                    r	t          | }| 	                    |j
                  }|du r |||j                  }	nd}	| j        du rO|	M|	                    d          |j        z                                  |j                                        d	z   z  }	|s't          d
 |	||j
        |j        fD                       S t!          |	||j
        |j                  S )a  
        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:
                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
        return_loss (`bool`,  *optional*):
            Whether to return the loss in the `forward` call.

        Returns:

        NTnone	reductionr]   r  r   r  r   r   r  c              3      K   | ]}|V  d S r'   r-  r  s     r$   r  z5PatchTSMixerForPretraining.forward.<locals>.<genexpr>  6              r%   r  r  r  r   )r  r  rB   r   MSELossr  r  r  r  r  r  rb   r]   rH  rU  r   r  )
r"   r  r  r   r  r  r  model_outputx_hatloss_vals
             r$   r*   z"PatchTSMixerForPretraining.forward  s   , &1%<kk$BVt##8##f#55DD8##f#55D zz'!5#	 " 
 
 lE** 	B2LAL		,899$tE<#;<<HHH t##(< "--0AAFFHHLL]LaLaLcLcfkLklH 		    2 .	      0$*<&4	
 
 
 	
r%   NFTN)r+   r,   r-   r.   r   r   r   r  r   r  r  rB   rC   r   r   r*   r0   r1   s   @r$   r  r    s       	 		1 	 	 	 	 	 	 +*+HII+KZijjj 15/4 &*@
 @
\@
  -@
 'tn	@

 @
 d^@
 
*@
 @
 @
 kj JI@
 @
 @
 @
 @
r%   r  c                       e Zd ZU dZdZeej                 ed<   dZ	ej        ed<   dZ
ej        ed<   dZeeej                          ed<   dZej        ed<   dZej        ed<   dS )	PatchTSMixerForPredictionOutputa  
    Output type of [`PatchTSMixerForPredictionOutput`].

    Args:
        prediction_outputs (`torch.FloatTensor` of shape `(batch_size, prediction_length, num_input_channels)`):
            Prediction output from the forecast head.
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
            Backbone embeddings before passing through the head.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*):
            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
            Total loss.
        loc (`torch.FloatTensor`, *optional* of shape `(batch_size, 1, num_input_channels)`):
            Input mean
        scale (`torch.FloatTensor`, *optional* of shape `(batch_size, 1, num_input_channels)`):
            Input std dev

    Nr  r  r  r   r  r  )r+   r,   r-   r.   r  r   rB   r  r  r  r  r   r   r  r  r-  r%   r$   r  r    s          & )-D(5$
%,,,,0)000+/u(///8<M8E%"345<<<!C	!!!#E5#####r%   r  c                   ,    e Zd ZU dZdZej        ed<   dS )"SamplePatchTSMixerPredictionOutputa9  
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.

    Args:
        sequences (`torch.FloatTensor` of shape `(batch_size, num_samples, prediction_length, number_channels)`):
            Sampled values from the chosen distribution.
    N	sequencesr+   r,   r-   r.   r  rB   r  r  r-  r%   r$   r  r    1           $(Iu '''''r%   r  c                   ,    e Zd ZU dZdZej        ed<   dS )"SamplePatchTSMixerRegressionOutputa$  
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.

    Args:
        sequences (`torch.FloatTensor` of shape `(batch_size, num_samples, num_targets)`
                Sampled values from the chosen distribution.
    Nr  r  r-  r%   r$   r  r  	  r  r%   r  inputtargetrM   c                 .    |                      |           S )zc
    Computes the negative log likelihood loss from input distribution with respect to target.
    )log_prob)r  r  s     r$   nllr    s     NN6""""r%   input_tensorweightsc                 n   |t          j        |dk    | |z  t          j        |                     }t          j        |r|                    |          n|                                d          }|r|                    |          n|                                |z  S |                     |          S )aj  
    Computes the weighted average of a given tensor across a given `dim`, masking values associated with weight zero,
    meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

    Args:
        input_tensor (`torch.FloatTensor`):
            Input tensor, of which the average must be computed.
        weights (`torch.FloatTensor`, *optional*):
            Weights tensor, of the same shape as `input_tensor`.
        dim (`int`, *optional*):
            The dim along which to average `input_tensor`.

    Returns:
        `torch.FloatTensor`: The tensor with values averaged along the specified `dim`.
    Nr   r   r  r  )rB   r  r  r  rU  r]   )r  r  r   weighted_tensorsum_weightss        r$   weighted_averager     s      +glL74JEL\]iLjLjkkk#"P'++#+"6"6"67;;==VYZZZ03N###,,,9L9L9N9NR]]]  S )))r%   c                   &    e Zd ZdZdef fdZ ee           ee	e
          	 	 	 	 	 ddej        d	eej                 d
eej                 dee         dedee         de	fd                        Z	 ddej        d	eej                 defdZ xZS )PatchTSMixerForPredictionz
    `PatchTSMixer` for forecasting application.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    Returns:
        `None`.
    r4   c                 ,   t                                          |           |j        | _        |j        | _        |j        | _        |j        | _        |j        dk    rd | _        nb|j        }t          t          t          d}|                    |j        d           }| ||          | _        nt          d|j                   t          |          | _        t          || j                  | _        |j        r|                                  d S d S )Nmse	student_tnormalnegative_binomialr   Unknown distribution output r4   r   )r   r   r  r  r   num_parallel_samplesr   r   r   r   r
   getr_   r  r  r   r  r  )r"   r4   r   distribution_output_mapoutput_classr#   s        r$   r   z"PatchTSMixerForPrediction.__init__D  s)      K	%5*0*K'$*$?!;%'+D$$*C+&%;' '#
 366v7QSWXXL'+7<C+@+@+@(( !\@Z!\!\]]]&v..
1 $ 8
 
 
	  	NN	 	r%   r  NFTr  r  future_valuesr   r  r  rM   c           	         | j         dk    rt          j        d          }n"| j         dk    rt          }nt	          d          ||n| j        }|                     ||||          }t          |t                    r	t          | }| 
                    |j                  }	d}
| j        | j        rp| j                            |	|j        d| j        f         |j        d| j        f         	          }|,|d
u r( |||d| j        f                   }
t#          |
          }
n|	|j        d| j        f         z  |j        d| j        f         z   }	||d
u r ||	|d| j        f                   }
nt| j        rI| j                            |	|j        |j        	          }||d
u r |||          }
t#          |
          }
n$|	|j        z  |j        z   }	||d
u r ||	|          }
| j        )|j        d| j        f         }|j        d| j        f         }n|j        }|j        }|s)t          d |
|	|j        |j        ||fD                       S t'          |
|	|j        |j        ||          S )a  
        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:
                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,:
            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
            values of the time series, that serve as labels for the model. The `future_values` is what the
            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
            required for a pretraining task.

            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
            pass the target data with all channels, as channel Filtering for both prediction and target will be
            manually applied before the loss computation.
        return_loss (`bool`,  *optional*):
            Whether to return the loss in the `forward` call.

        Returns:

        r  r]   r  r  2Invalid loss function: Allowed values: mse and nllNr  .r  r  Tc              3      K   | ]}|V  d S r'   r-  r  s     r$   r  z4PatchTSMixerForPrediction.forward.<locals>.<genexpr>  r  r%   )r  r  r  r   r  r  )r  r   r  r  r_   r  r  r  r  r  r  r  r   r   distributionr  r  r  r   r  )r"   r  r  r  r   r  r  r  r  y_hatr  r  r  r  s                 r$   r*   z!PatchTSMixerForPrediction.forwardd  s*   B 9:///DDY%DDQRRR%0%<kk$BV zz'!5#	 " 
 
 lE** 	B2LAL 		,899*6' `#7DD$(d.M)MN&,S$2Q-QR  E    
 !,1D1D#t$%c4+J&JK   H
  099H L.sD4S/STT"&sD,K'KLM  !,1D1D#tE=d>]9]+^__H' 
:#7DD|/|7I  E     !,1D1D#tL-@@H/99H 22\5EE ,1D1D#tE=99H*6"3(G#GHC &sD,K'KLEE"C &E 	 
 
  2 .
 
 
 
 
 
 /$*<&4
 
 
 	
r%   c                 
   | j         } | |d|d          }| j                            |j        |j        |j                  fdt          |          D             }t          j        |d          }t          |          S )	a  
        Generate sequences of sample predictions from a model with a probability distribution head.

        Args:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the future.

            observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Return:
            [`SamplePatchTSMixerPredictionOutput`] where the outputs `sequences` tensor will have shape `(batch_size,
            number of samples, prediction_length, num_input_channels)`.
        NF)r  r  r  r   r	  c                 8    g | ]}                                 S r-  sampler   r   r  s     r$   r   z6PatchTSMixerForPrediction.generate.<locals>.<listcomp>  s%    NNNQ<&&((NNNr%   r   r   r  )
r  r   r  r  r  r  r   rB   stackr  )r"   r  r  r  outputssamplesr  s         @r$   generatez"PatchTSMixerForPrediction.generate  s    0  $8 $#'!&	
 
 
 /<<&GKw} = 
 

 ONNN%8L2M2MNNN +g1---1GDDDDr%   )NNFTNr'   )r+   r,   r-   r.   r   r   r   r  r   r  r  rB   rC   r   r   r*   r  r  r0   r1   s   @r$   r  r  8  sj       	 	1      @ +*+HII+JYhiii 1504/4 &*s
 s
\s
  -s
  -	s

 'tns
 s
 d^s
 
)s
 s
 s
 ji JIs
p 15-E -E\-E  --E 
,	-E -E -E -E -E -E -E -Er%   r  c                       e Zd ZU dZdZeej                 ed<   dZ	ej        ed<   dZ
ej        ed<   dZeeej                          ed<   dS )-PatchTSMixerForTimeSeriesClassificationOutputa  
    Output type of [`PatchTSMixerForTimeSeriesClassificationOutput`].

    Args:
        prediction_outputs (`torch.FloatTensor` of shape `(batch_size, num_labels)`):
            Prediction output from the classfication head.
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
            Backbone embeddings before passing through the head.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*):
            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
            Total loss.
    Nr  r  r  r   r  r-  r%   r$   r  r    r  r%   r  c                        e Zd ZdZdef fdZ ee           ee	e
          	 	 	 	 ddej        d	ej        d
ee         dedee         de	fd                        Z xZS )'PatchTSMixerForTimeSeriesClassificationz
    `PatchTSMixer` for classification application.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    Returns:
        `None`.
    r4   c                 P   t                                          |           t          |          | _        t	          |          | _        |j        | _        |j        dv r!t          |j	        |j
                  | _        nd | _        |j        r|                                  d S d S )Nr   r^   r]   Tr9   rL   )r   r   r  r  r  r  r  r   InjectScalerStatistics4Dr9   rL   inject_scaler  r<   s     r$   r   z0PatchTSMixerForTimeSeriesClassification.__init__-  s       &v..
*
 
 
	  &5>222 8]c]o p p pD $D  	NN	 	r%   r  NFTr  target_valuesr   r  r  rM   c                    t           j                                        }||n| j        }|                     |||          }t          |t                    r	t          | }| j        ,|                     |j	        |j
        |j                  |_	        |                     |j	                  }||du r |||          }	nd}	|s't          d |	||j	        |j        fD                       S t          |	||j	        |j                  S )  
        target_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*): Target
            values of the time series, that serve as labels for the model. The `target_values` is what the
            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
            required for a pretraining task.

            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
            pass the target data with all channels, as channel Filtering for both prediction and target will be
            manually applied before the loss computation.

            For a classification task, it has a shape of `(batch_size,)`.

            For a regression task, it has a shape of `(batch_size, num_targets)`.
        return_loss (`bool`, *optional*):
            Whether to return the loss in the `forward` call.

        Returns:

        Nr  r	  Tc              3      K   | ]}|V  d S r'   r-  r  s     r$   r  zBPatchTSMixerForTimeSeriesClassification.forward.<locals>.<genexpr>|  r  r%   r  )rB   r   CrossEntropyLossr  r  r  r  r  r  r  r  r  r  r   r  )
r"   r  r   r   r  r  r  r  r  r  s
             r$   r*   z/PatchTSMixerForTimeSeriesClassification.forward>  sV   F x((**%0%<kk$BVzz!5# " 
 

 lE** 	B2LAL(-1->->. $"( .? . .L* 		,899$)<)<tE=11HHH 		    2 .	      =$*<&4	
 
 
 	
r%   r  )r+   r,   r-   r.   r   r   r   r  r   r  r  rB   rC   r   r   r*   r0   r1   s   @r$   r  r  !  s
       	 	1      " +*+HIIA$   '+/4 &*H
 H
\H
 |H
 'tn	H

 H
 d^H
 
7H
 H
 H
	  JI
H
 H
 H
 H
 H
r%   r  c                       e Zd ZU dZdZeej                 ed<   dZ	ej        ed<   dZ
ej        ed<   dZeeej                          ed<   dS )PatchTSMixerForRegressionOutputa  
    Output type of [`PatchTSMixerForRegressionOutput`].

    Args:
        regression_outputs (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
            Prediction output from the regression head.
        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
            Backbone embeddings before passing through the head.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*):
            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
            Total loss.
    Nr  regression_outputsr  r   )r+   r,   r-   r.   r  r   rB   r  r  r'  r  r   r   r-  r%   r$   r&  r&    r  r%   r&  c                   `     e Zd Zd
dededef fdZdej        dej        dej        fd	Z xZS )r  r>   r9   rL   	expansionc                 D   t                                                       t          j        |dz   ||z            | _        t          j        ||z  |          | _        t          j        dd|z            | _        t          j        d|z  d          | _        || _        d S rQ  )	r   r   r   r   inverse_trans_expansioninverse_trans_compressionmap_scale_expansionmap_scale_compressionrL   )r"   r9   rL   r)  r#   s       r$   r   z!InjectScalerStatistics4D.__init__  s    ')y1i'>Q'R'R$)+9w3F)P)P&#%9QI#>#> %'Yq9}a%@%@"&r%   r(   r  r  c                 .   |                     dd          }|                    d          }|                    dd| j        d          }|                     dd          }|                    d          }|                    dd| j        d          }t	          j        ||gd          }|                     |          }|                     |          }t	          j        ||gd          }|                     |          }| 	                    |          }|S )a  
        Args:
            inputs (`torch.Tensor` of shape `(batch_size, num_input_channels, num_patch, d_model)`)
            loc (`torch.Tensor` of shape `(batch_size, 1, num_input_channels)`)
            scale (`torch.Tensor` of shape `(batch_size, 1, num_input_channels)`)
        Returns:
            `torch.Tensor` of shape `(batch_size, num_input_channels, num_patch, d_model)`
        r   r   r   r   )
r?   rW   r>  rL   rB   r   r-  r.  r+  r,  )r"   r(   r  r  r]   stdevconcat_statss          r$   r*   z InjectScalerStatistics4D.forward  s    }}R$$~~b!!{{1a!1155B''##Q4#3Q77y$B777//==11,??FL1r:::--f55//77r%   )r>   )	r+   r,   r-   r/   r   rB   rC   r*   r0   r1   s   @r$   r  r    s        ' ' '# '# ' ' ' ' ' 'el  el        r%   r  c                        e Zd ZdZdef fdZ ee           ee	e
          	 	 	 	 ddej        d	ej        d
ee         dedee         de	fd                        Zdej        defdZ xZS )PatchTSMixerForRegressionz
    `PatchTSMixer` for regression application.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    Returns:
        `None`.
    r4   c                    t                                          |           t          |          | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        dk    rd | _        n_t          t          t          d}|                    |j                  }| ||j                  | _        nt          d|j                   |j        dv r!t          |j        |j                  | _        nd | _        t'          || j                  | _        |j        r|                                  d S d S )Nr  r  r   r   r  r  r  )r   r   r  r  r  r   r  r  r   r   r
   r  r  r_   r   r  r9   rL   r  r  r  r  )r"   r4   r  r  r#   s       r$   r   z"PatchTSMixerForRegression.__init__  sW      &v..
K	#)#= %5$*$?!;%'+D$$ ,&%;' '#
 366v7QRRL'+7<F<N+O+O+O(( !\@Z!\!\]]]>222 8]c]o p p pD $D* $ 8
 
 
	  	NN	 	r%   r  NFTr  r   r   r  r  rM   c                 p     j         dk    rt          j        d          }n" j         dk    rt          }nt	          d          ||n j        }                     |||          }t          |t                    r	t          | } j
        , 
                    |j        |j        |j                  |_                             |j                  }||d	u r j        r j        d
k    r't!          j        |dk               rt%          d           j                            |          }	t           fd|D                       } ||	|          }
t)          |
          }
n |||          }
nd}
|s't          d |
||j        |j        fD                       S t-          |
||j        |j                  S )r"  r  r]   r  r  r  Nr  r	  Tr  r   zDtarget_values cannot be negative for negative_binomial distribution.c                 P    g | ]"}|                     d j        j                  #S )r   )r   r4   r  )r   itemr"   s     r$   r   z5PatchTSMixerForRegression.forward.<locals>.<listcomp>>  s,    XXX$tyyT[-DEEXXXr%   c              3      K   | ]}|V  d S r'   r-  r  s     r$   r  z4PatchTSMixerForRegression.forward.<locals>.<genexpr>H  r  r%   )r  r'  r  r   )r  r   r  r  r_   r  r  r  r  r  r  r  r  r  r  r   rB   any	Exceptionr  r  r   r&  )r"   r  r   r   r  r  r  r  r  r  r  s   `          r$   r*   z!PatchTSMixerForRegression.forward  s    @ 9:///DDY%DDQRRR%0%<kk$BVzz!5# " 
 

 lE** 	B2LAL(-1->->. $"( .? . .L* 		,899$)<)<' 
6+/BBBuyQ^abQbGcGcB#$jkkk#7DDUKKXXXXRWXXXYY4m<<+H554}55H 		    2 .	      /$*<&4	
 
 
 	
r%   c                 ,   | j         } | |dd          }| j                            |j                  fdt	          |          D             }t          j        |d                              d|| j        j	                  }t          |          S )	a
  
        Generate sequences of sample predictions from a model with a probability distribution head.

        Args:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the target values.

        Return:
            [`SamplePatchTSMixerRegressionOutput`] where the outputs `sequences` tensor will have shape `(batch_size,
            number of samples, num_targets)`.
        NF)r  r   r   c                 8    g | ]}                                 S r-  r  r  s     r$   r   z6PatchTSMixerForRegression.generate.<locals>.<listcomp>v  s2     
 
 
&'L!!
 
 
r%   r   r   r   r  )r  r   r  r'  r   rB   r  r   r4   r  r  )r"   r  r  r  r  r  s        @r$   r  z"PatchTSMixerForRegression.generateY  s       $8 $#!&
 
 
 /<<W=WXX
 
 
 
+01E+F+F
 
 

 +g1---2227KT[Mdee1GDDDDr%   r  )r+   r,   r-   r.   r   r   r   r  r   r&  r  rB   rC   r   r   r*   r  r  r0   r1   s   @r$   r3  r3    s3       	 	%1 % % % % % %N +*+HII+JYhiii '+/4 &*V
 V
\V
 |V
 'tn	V

 V
 d^V
 
)V
 V
 V
 ji JIV
p#E\#E 
,#E #E #E #E #E #E #E #Er%   r3  )NFr   )Nr   )NN)Or.   rY   dataclassesr   typingr   r   r   rB   torch.nnr   transformers.modeling_utilsr   transformers.utilsr   time_series_utilsr
   r   r   utilsr   r   r   r   configuration_patchtsmixerr   
get_loggerr+   loggerr  PATCHTSMIXER_START_DOCSTRINGr  Moduler   r3   rE   rg   rs   r   r   r   r   r   r   r   r  r  r/  rC   r   listr   r/   rL  rc  re  rq  ry  r  r  r  r  r  r  r  r  r  r  r  distributionsDistributionr  r  r  r  r  r&  r  r3  r-  r%   r$   <module>rL     s2	   " !  ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) )        7 7 7 7 7 7 * * * * * * U U U U U U U U U U            ; : : : : : 
	H	%	%&  &! $       *& & & & &BI & & &,$ $ $ $ $RY $ $ $N. . . . .BI . . .b    bi   .- - - - -29 - - -b[B [B [B [B [BBI [B [B [B|B B B B Bbi B B BJ* * * * *	 * * *Z# # # # #	 # # #L&# &# &# &# &#	 &# &# &#R5 5 5 5 5BI 5 5 5pD D D D DRY D D DN) ) ) ) )/ ) ) )2    ry   D &*',7% 7%L7%7% #7% !%	7%
 7% 7% 7% 7%| &*	A% A%LA%$T3Y/A% #A% 	A% A% A% A%J- - - - -29 - - -b9" 9" 9" 9" 9"") 9" 9" 9"z 0  0  0  0  0BI  0  0  0H3; 3; 3; 3; 3;RY 3; 3; 3;n         BI      6 = = = = = = = =Ik Ik Ik Ik Ik5 Ik Ik IkX . . . . .k . . .: 9  W
 W
 W
 W
 W
3 W
 W
	 W
t = = = = ={ = = =*Y
 Y
 Y
 Y
 Y
!< Y
 Y
 Y
x $ $ $ $ $k $ $ $8 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
(#u"/ # #%, # # # #* *5< *(5<:P *fkfr * * * *0PE PE PE PE PE ; PE PE PEf = = = = =K = = =*j
 j
 j
 j
 j
.I j
 j
 j
Z = = = = =k = = =*% % % % %ry % % %PpE pE pE pE pE ; pE pE pE pE pEr%   