
    go                        d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	 ddl
Z
ddlZ
ddl
mZ ddlmZmZmZ dd	lmZ  e            rdd
lmZ ddlmZ ddlmZmZmZmZmZmZmZmZm Z  ddl!m"Z" ddl#m$Z$ ddlm%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+  e(j,        e-          Z.dZ/dZ0d Z1d Z2d Z3 G d dej4                  Z5 G d dej4                  Z6 G d dej4                  Z7 G d dej4                  Z8 G d d ej4                  Z9 G d! d"ej4                  Z: G d# d$ej4                  Z; G d% d&ej4                  Z< G d' d(ej4                  Z= G d) d*ej4                  Z> G d+ d,ej4                  Z? G d- d.ej4                  Z@ G d/ d0ej4                  ZA G d1 d2ej4                  ZB G d3 d4e"          ZCe G d5 d6e                      ZDd7ZEd8ZF e&d9eE           G d: d;eC                      ZG e&d<eE           G d= d>eC                      ZH e&d?eE           G d@ dAeC                      ZI e&dBeE           G dC dDeC                      ZJ e&dEeE           G dF dGeC                      ZK e&dHeE           G dI dJeC                      ZL e&dKeE           G dL dMeC                      ZM e&dNeE           G dO dPeC                      ZNdS )QzPyTorch FNet model.    N)	dataclass)partial)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )is_scipy_available)linalg)ACT2FN)	BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputModelOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
FNetConfigzgoogle/fnet-baser!   c                     | j         d         }|d|d|f         }|                     t          j                  } t          j        d| ||          S )z4Applies 2D matrix multiplication to 3D input arrays.r    Nzbij,jk,ni->bnk)shapetypetorch	complex64einsum)xmatrix_dim_onematrix_dim_two
seq_lengths       b/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/fnet/modeling_fnet.py_two_dim_matmulr-   @   sN    J#KZK*$<=N	uA<(!^^LLL    c                 $    t          | ||          S N)r-   )r(   r)   r*   s      r,   two_dim_matmulr1   I   s    1nn===r.   c                     | }t          t          | j                  dd                   D ]#}t          j                            ||          }$|S )z
    Applies n-dimensional Fast Fourier Transform (FFT) to input array.

    Args:
        x: Input n-dimensional array.

    Returns:
        n-dimensional Fourier transform of input n-dimensional array.
    r    N)axis)reversedrangendimr%   fft)r(   outr3   s      r,   fftnr9   N   sO     Cqvqrr*++ , ,immCdm++Jr.   c                   *     e Zd ZdZ fdZddZ xZS )FNetEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t                                                       t          j        |j        |j        |j                  | _        t          j        |j        |j                  | _	        t          j        |j
        |j                  | _        t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j        |j                  | _        |                     dt)          j        |j                                      d          d           |                     dt)          j        | j                                        t(          j                  d           d S )	N)padding_idxepsposition_ids)r    F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsLinear
projectionDropouthidden_dropout_probdropoutregister_bufferr%   arangeexpandzerosr@   sizelongselfconfig	__class__s     r,   rG   zFNetEmbeddings.__init__a   sP   !|F,=v?Q_e_rsss#%<0NPVPb#c#c %'\&2H&J\%]%]" f&8f>STTT)F$68JKKz&"<== 	EL)GHHOOPWXXej 	 	
 	
 	
 	ek$*;*@*@*B*B%*UUUbg 	 	
 	
 	
 	
 	
r.   Nc                    ||                                 }n|                                 d d         }|d         }|| j        d d d |f         }|mt          | d          r2| j        d d d |f         }|                    |d         |          }|}n+t          j        |t
          j        | j        j                  }|| 	                    |          }| 
                    |          }	||	z   }
|                     |          }|
|z  }
|                     |
          }
|                     |
          }
|                     |
          }
|
S )NrA   r    rC   r   rE   device)r\   r@   hasattrrC   rZ   r%   r[   r]   rd   rL   rP   rN   rQ   rT   rW   )r_   	input_idsrC   r@   inputs_embedsinput_shaper+   buffered_token_type_ids buffered_token_type_ids_expandedrP   
embeddingsrN   s               r,   forwardzFNetEmbeddings.forwardw   se    #..**KK',,..ss3K ^
,QQQ^<L
 !t-.. m*.*=aaa*n*M'3J3Q3QR]^_R`bl3m3m0!A!&[
SWSdSk!l!l!l  00;;M $ : :> J J"%::
"66|DD))
^^J//
__Z00
\\*--
r.   )NNNN)__name__
__module____qualname____doc__rG   rl   __classcell__ra   s   @r,   r;   r;   ^   sR        QQ
 
 
 
 
,! ! ! ! ! ! ! !r.   r;   c                   *     e Zd Z fdZd Zd Z xZS )FNetBasicFourierTransformc                 r    t                                                       |                     |           d S r0   )rF   rG   _init_fourier_transformr^   s     r,   rG   z"FNetBasicFourierTransform.__init__   s3    $$V,,,,,r.   c                 l   |j         s't          t          j        j        d          | _        d S |j        dk    rt                      r|                     dt          j	        t          j        |j                  t          j                             |                     dt          j	        t          j        |j                  t          j                             t          t          | j        | j                  | _        d S t%          j        d           t          | _        d S t          | _        d S )	N)r       dim   dft_mat_hiddenrD   dft_mat_seq)r)   r*   zpSciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead.)use_tpu_fourier_optimizationsr   r%   r7   r9   fourier_transformrM   r   rX   tensorr   dftrJ   r&   tpu_short_seq_lengthr1   r}   r|   r   warning)r_   r`   s     r,   rv   z1FNetBasicFourierTransform._init_fourier_transform   s*   3 	*%,UY^%H%H%HD"""+t33!## .$$$el6:f>P3Q3QY^Yh&i&i&i   $$!5<
6;V0W0W_d_n#o#o#o   *1"43CTXTg* * *&&& *   *.&&&%)D"""r.   c                 <    |                      |          j        }|fS r0   )r   real)r_   hidden_statesoutputss      r,   rl   z!FNetBasicFourierTransform.forward   s"     ((77<zr.   )rm   rn   ro   rG   rv   rl   rq   rr   s   @r,   rt   rt      sV        - - - - -* * *.      r.   rt   c                   $     e Zd Z fdZd Z xZS )FNetBasicOutputc                     t                                                       t          j        |j        |j                  | _        d S Nr>   )rF   rG   r   rQ   rJ   rR   r^   s     r,   rG   zFNetBasicOutput.__init__   s9    f&8f>STTTr.   c                 6    |                      ||z             }|S r0   )rQ   r_   r   input_tensors      r,   rl   zFNetBasicOutput.forward   s    |m'CDDr.   rm   rn   ro   rG   rl   rq   rr   s   @r,   r   r      sL        U U U U U      r.   r   c                   $     e Zd Z fdZd Z xZS )FNetFourierTransformc                     t                                                       t          |          | _        t	          |          | _        d S r0   )rF   rG   rt   r_   r   outputr^   s     r,   rG   zFNetFourierTransform.__init__   s;    -f55	%f--r.   c                 n    |                      |          }|                     |d         |          }|f}|S Nr   )r_   r   )r_   r   self_outputsfourier_outputr   s        r,   rl   zFNetFourierTransform.forward   s7    yy//\!_mDD!#r.   r   rr   s   @r,   r   r      sG        . . . . .
      r.   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )FNetIntermediatec                    t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _        d S |j        | _        d S r0   )rF   rG   r   rS   rJ   intermediate_sizedense
isinstance
hidden_actstrr   intermediate_act_fnr^   s     r,   rG   zFNetIntermediate.__init__   sn    Yv163KLL
f'-- 	9'-f.?'@D$$$'-'8D$$$r.   r   returnc                 Z    |                      |          }|                     |          }|S r0   )r   r   r_   r   s     r,   rl   zFNetIntermediate.forward   s,    

=1100??r.   rm   rn   ro   rG   r%   Tensorrl   rq   rr   s   @r,   r   r      s^        9 9 9 9 9U\ el        r.   r   c                   P     e Zd Z fdZdej        dej        dej        fdZ xZS )
FNetOutputc                    t                                                       t          j        |j        |j                  | _        t          j        |j        |j                  | _        t          j	        |j
                  | _        d S r   )rF   rG   r   rS   r   rJ   r   rQ   rR   rU   rV   rW   r^   s     r,   rG   zFNetOutput.__init__   sf    Yv79KLL
f&8f>STTTz&"<==r.   r   r   r   c                     |                      |          }|                     |          }|                     ||z             }|S r0   )r   rW   rQ   r   s      r,   rl   zFNetOutput.forward   s@    

=11]33}|'CDDr.   r   rr   s   @r,   r   r      si        > > > > >U\  RWR^        r.   r   c                   *     e Zd Z fdZd Zd Z xZS )	FNetLayerc                     t                                                       |j        | _        d| _        t	          |          | _        t          |          | _        t          |          | _	        d S Nr    )
rF   rG   chunk_size_feed_forwardseq_len_dimr   fourierr   intermediater   r   r^   s     r,   rG   zFNetLayer.__init__   s^    '-'E$+F33,V44 ((r.   c                     |                      |          }|d         }t          | j        | j        | j        |          }|f}|S r   )r   r   feed_forward_chunkr   r   )r_   r   self_fourier_outputsr   layer_outputr   s         r,   rl   zFNetLayer.forward   sN    #||M::-a00#T%A4CSUc
 
  /r.   c                 \    |                      |          }|                     ||          }|S r0   )r   r   )r_   r   intermediate_outputr   s       r,   r   zFNetLayer.feed_forward_chunk  s0    "//??{{#6GGr.   )rm   rn   ro   rG   rl   r   rq   rr   s   @r,   r   r      sV        ) ) ) ) )
 
 
      r.   r   c                   &     e Zd Z fdZddZ xZS )FNetEncoderc                     t                                                       | _        t          j        fdt          j                  D                       | _        d| _        d S )Nc                 .    g | ]}t                    S  )r   ).0_r`   s     r,   
<listcomp>z(FNetEncoder.__init__.<locals>.<listcomp>  s!    #_#_#_!If$5$5#_#_#_r.   F)	rF   rG   r`   r   
ModuleListr5   num_hidden_layerslayergradient_checkpointingr^   s    `r,   rG   zFNetEncoder.__init__  s`    ]#_#_#_#_uVE]?^?^#_#_#_``
&+###r.   FTc                 8   |rdnd }t          | j                  D ]J\  }}|r||fz   }| j        r#| j        r|                     |j        |          }n ||          }|d         }K|r||fz   }|st          d ||fD                       S t          ||          S )Nr   r   c              3      K   | ]}||V  	d S r0   r   )r   vs     r,   	<genexpr>z&FNetEncoder.forward.<locals>.<genexpr>+  s"      XXq!-----XXr.   )last_hidden_stater   )	enumerater   r   training_gradient_checkpointing_func__call__tupler   )r_   r   output_hidden_statesreturn_dictall_hidden_statesilayer_modulelayer_outputss           r,   rl   zFNetEncoder.forward  s    "6@BBD(44 		- 		-OA|# I$58H$H!* <t} < $ A A,BWYf g g ,] ; ;)!,MM 	E 1]4D D 	YXX]4E$FXXXXXXN_````r.   )FTr   rr   s   @r,   r   r     sT        , , , , ,a a a a a a a ar.   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )
FNetPoolerc                     t                                                       t          j        |j        |j                  | _        t          j                    | _        d S r0   )rF   rG   r   rS   rJ   r   Tanh
activationr^   s     r,   rG   zFNetPooler.__init__2  sC    Yv163EFF
'))r.   r   r   c                 r    |d d df         }|                      |          }|                     |          }|S r   )r   r   )r_   r   first_token_tensorpooled_outputs       r,   rl   zFNetPooler.forward7  s@     +111a40

#56666r.   r   rr   s   @r,   r   r   1  s^        $ $ $ $ $
U\ el        r.   r   c                   B     e Zd Z fdZdej        dej        fdZ xZS )FNetPredictionHeadTransformc                 V   t                                                       t          j        |j        |j                  | _        t          |j        t                    rt          |j                 | _
        n|j        | _
        t          j        |j        |j                  | _        d S r   )rF   rG   r   rS   rJ   r   r   r   r   r   transform_act_fnrQ   rR   r^   s     r,   rG   z$FNetPredictionHeadTransform.__init__B  s    Yv163EFF
f'-- 	6$*6+<$=D!!$*$5D!f&8f>STTTr.   r   r   c                     |                      |          }|                     |          }|                     |          }|S r0   )r   r   rQ   r   s     r,   rl   z#FNetPredictionHeadTransform.forwardK  s=    

=11--m<<}55r.   r   rr   s   @r,   r   r   A  sc        U U U U UU\ el        r.   r   c                   ,     e Zd Z fdZd ZddZ xZS )FNetLMPredictionHeadc                 :   t                                                       t          |          | _        t	          j        |j        |j                  | _        t	          j	        t          j        |j                            | _        | j        | j        _        d S r0   )rF   rG   r   	transformr   rS   rJ   rI   decoder	Parameterr%   r[   biasr^   s     r,   rG   zFNetLMPredictionHead.__init__S  ss    4V<< y!3V5FGGLV->!?!?@@	 Ir.   c                 Z    |                      |          }|                     |          }|S r0   )r   r   r   s     r,   rl   zFNetLMPredictionHead.forward^  s*    }55]33r.   r   Nc                     | j         j        j        j        dk    r| j        | j         _        d S | j         j        | _        d S )Nmeta)r   r   rd   r$   r_   s    r,   _tie_weightsz!FNetLMPredictionHead._tie_weightsc  s<    <#(F22 $	DL )DIIIr.   )r   N)rm   rn   ro   rG   rl   r   rq   rr   s   @r,   r   r   R  s[        	& 	& 	& 	& 	&  
* * * * * * * *r.   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyMLMHeadc                 p    t                                                       t          |          | _        d S r0   )rF   rG   r   predictionsr^   s     r,   rG   zFNetOnlyMLMHead.__init__m  s/    /77r.   c                 0    |                      |          }|S r0   )r   )r_   sequence_outputprediction_scoress      r,   rl   zFNetOnlyMLMHead.forwardq  s     ,,_==  r.   r   rr   s   @r,   r   r   l  sG        8 8 8 8 8! ! ! ! ! ! !r.   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyNSPHeadc                     t                                                       t          j        |j        d          | _        d S Nrx   )rF   rG   r   rS   rJ   seq_relationshipr^   s     r,   rG   zFNetOnlyNSPHead.__init__x  s6     "	&*<a @ @r.   c                 0    |                      |          }|S r0   )r   )r_   r   seq_relationship_scores      r,   rl   zFNetOnlyNSPHead.forward|  s    !%!6!6}!E!E%%r.   r   rr   s   @r,   r   r   w  sL        A A A A A& & & & & & &r.   r   c                   $     e Zd Z fdZd Z xZS )FNetPreTrainingHeadsc                     t                                                       t          |          | _        t	          j        |j        d          | _        d S r   )rF   rG   r   r   r   rS   rJ   r   r^   s     r,   rG   zFNetPreTrainingHeads.__init__  sF    /77 "	&*<a @ @r.   c                 ^    |                      |          }|                     |          }||fS r0   )r   r   )r_   r   r   r   r   s        r,   rl   zFNetPreTrainingHeads.forward  s6     ,,_==!%!6!6}!E!E "888r.   r   rr   s   @r,   r   r     sL        A A A A A
9 9 9 9 9 9 9r.   r   c                   $    e Zd ZdZeZdZdZd ZdS )FNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    fnetTc                    t          |t          j                  rT|j        j                            d| j        j                   |j         |j        j        	                                 dS dS t          |t          j
                  r_|j        j                            d| j        j                   |j        +|j        j        |j                 	                                 dS dS t          |t          j                  r?|j        j        	                                 |j        j                            d           dS dS )zInitialize the weightsg        )meanstdNg      ?)r   r   rS   weightdatanormal_r`   initializer_ranger   zero_rH   r=   rQ   fill_)r_   modules     r,   _init_weightsz!FNetPreTrainedModel._init_weights  s)   fbi(( 	* M&&CT[5R&SSS{& &&((((( '&-- 	*M&&CT[5R&SSS!-"6#56<<>>>>> .--- 	*K""$$$M$$S)))))	* 	*r.   N)	rm   rn   ro   rp   r!   config_classbase_model_prefixsupports_gradient_checkpointingr  r   r.   r,   r   r     s@         
 L&*#* * * * *r.   r   c                       e Zd ZU dZdZeej                 ed<   dZ	ej        ed<   dZ
ej        ed<   dZeeej                          ed<   dS )FNetForPreTrainingOutputa  
    Output type of [`FNetForPreTraining`].

    Args:
        loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
            Total loss as the sum of the masked language modeling loss and the next sequence prediction
            (classification) loss.
        prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer
            plus the initial embedding outputs.
    Nlossprediction_logitsseq_relationship_logitsr   )rm   rn   ro   rp   r  r   r%   FloatTensor__annotations__r  r  r   r   r   r.   r,   r  r    s{          $ )-D(5$
%,,,+/u(///15U.5558<M8E%"345<<<<<r.   r  aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`FNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z^The bare FNet Model transformer outputting raw hidden-states without any specific head on top.c                   R    e Zd ZdZd fd	Zd Zd Z ee	                    d                     e
eee          	 	 	 	 	 	 dd	eej                 d
eej                 deej                 deej                 dee         dee         deeef         fd                        Z xZS )	FNetModelz

    The model can behave as an encoder, following the architecture described in [FNet: Mixing Tokens with Fourier
    Transforms](https://arxiv.org/abs/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.

    Tc                     t                                          |           || _        t          |          | _        t          |          | _        |rt          |          nd | _        | 	                                 d S r0   )
rF   rG   r`   r;   rk   r   encoderr   pooler	post_init)r_   r`   add_pooling_layerra   s      r,   rG   zFNetModel.__init__  sq       (00"6**,=Gj(((4 	r.   c                     | j         j        S r0   rk   rL   r   s    r,   get_input_embeddingszFNetModel.get_input_embeddings
  s    ..r.   c                     || j         _        d S r0   r  )r_   values     r,   set_input_embeddingszFNetModel.set_input_embeddings  s    */'''r.   batch_size, sequence_length
checkpointoutput_typer  Nrf   rC   r@   rg   r   r   r   c                 X   ||n| j         j        }||n| j         j        }||t          d          ||                                }|\  }}	n3|"|                                d d         }|\  }}	nt          d          | j         j        r%|	dk    r| j         j        |	k    rt          d          ||j        n|j        }
|gt          | j	        d          r1| j	        j
        d d d |	f         }|                    ||	          }|}n!t          j        |t          j        |
          }| 	                    ||||          }|                     |||	          }|d
         }| j        |                     |          nd }|s||f|dd          z   S t#          |||j                  S )NzDYou cannot specify both input_ids and inputs_embeds at the same timerA   z5You have to specify either input_ids or inputs_embedsr{   zThe `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations.rC   rc   )rf   r@   rC   rg   )r   r   r   r    )r   pooler_outputr   )r`   r   use_return_dict
ValueErrorr\   r~   r   rd   re   rk   rC   rZ   r%   r[   r]   r  r  r   r   )r_   rf   rC   r@   rg   r   r   rh   
batch_sizer+   rd   ri   rj   embedding_outputencoder_outputsr   r)  s                    r,   rl   zFNetModel.forward  s&     %9$D  $+Jj 	 &1%<kk$+B] ]%>cddd"#..**K%0"J

&',,..ss3K%0"J

TUUU K5	d""0J>>;  
 &/%:!!@T!t(899 [*./*HKZK*X'3J3Q3QR\^h3i3i0!A!&[
SY!Z!Z!Z??%)'	 + 
 
 ,,!5# ' 
 

 *!,8<8OO444UY 	J#]3oabb6III)-')7
 
 
 	
r.   )T)NNNNNN)rm   rn   ro   rp   rG   r   r#  r   FNET_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCr   r%   
LongTensorr  boolr   r   rl   rq   rr   s   @r,   r  r    si       
 
 
 
 
 
 
/ / /0 0 0 +*+@+G+GHe+f+fgg&#$   15593759/3&*C
 C
E,-C
 !!12C
 u/0	C

   12C
 'tnC
 d^C
 
uo%	&C
 C
 C
  hgC
 C
 C
 C
 C
r.   r  z
    FNet Model with two heads on top as done during the pretraining: a `masked language modeling` head and a `next
    sentence prediction (classification)` head.
    c                       e Zd ZddgZ fdZd Zd Z ee	                    d                     e
ee          	 	 	 	 	 	 	 	 dd	eej                 d
eej                 deej                 deej                 deej                 deej                 dee         dee         deeef         fd                        Z xZS )FNetForPreTrainingcls.predictions.decoder.biascls.predictions.decoder.weightc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r0   )rF   rG   r  r  r   clsr  r^   s     r,   rG   zFNetForPreTraining.__init__f  sQ       f%%	'// 	r.   c                 $    | j         j        j        S r0   r:  r   r   r   s    r,   get_output_embeddingsz(FNetForPreTraining.get_output_embeddingso      x#++r.   c                 T    || j         j        _        |j        | j         j        _        d S r0   r:  r   r   r   r_   new_embeddingss     r,   set_output_embeddingsz(FNetForPreTraining.set_output_embeddingsr  %    '5$$2$7!!!r.   r$  r'  r  Nrf   rC   r@   rg   labelsnext_sentence_labelr   r   r   c	                    ||n| j         j        }|                     ||||||          }	|	dd         \  }
}|                     |
|          \  }}d}||t	                      } ||                    d| j         j                  |                    d                    } ||                    dd          |                    d                    }||z   }|s||f|	dd         z   }||f|z   n|S t          ||||	j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.
        kwargs (`Dict[str, any]`, *optional*, defaults to `{}`):
            Used to hide legacy arguments that have been deprecated.

        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```NrC   r@   rg   r   r   rx   rA   )r  r  r  r   )	r`   r*  r  r:  r
   viewrI   r  r   )r_   rf   rC   r@   rg   rF  rG  r   r   r   r   r   r   r   
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                      r,   rl   zFNetForPreTraining.forwardv  s_   T &1%<kk$+B])))%'!5#  
 
 *1!&48HH_m4\4\11
"5"A'))H%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN!)*@*E*Eb!*L*LNaNfNfgiNjNj!k!k'*<<J 	R')?@7122;NF/9/EZMF**6Q'/$:!/	
 
 
 	
r.   NNNNNNNN)rm   rn   ro   _tied_weights_keysrG   r=  rC  r   r/  r0  r   r  r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   r6  r6  \  s        9:Z[    , , ,8 8 8 +*+@+G+GHe+f+fgg+CRabbb -115/304)-6:/3&*F
 F
EL)F
 !.F
 u|,	F

  -F
 &F
 &el3F
 'tnF
 d^F
 
u..	/F
 F
 F
 cb hgF
 F
 F
 F
 F
r.   r6  z2FNet Model with a `language modeling` head on top.c                   p    e Zd ZddgZ fdZd Zd Z ee	                    d                     e
eee          	 	 	 	 	 	 	 dd	eej                 d
eej                 deej                 deej                 deej                 dee         dee         deeef         fd                        Z xZS )FNetForMaskedLMr7  r8  c                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r0   )rF   rG   r  r  r   r:  r  r^   s     r,   rG   zFNetForMaskedLM.__init__  Q       f%%	"6** 	r.   c                 $    | j         j        j        S r0   r<  r   s    r,   r=  z%FNetForMaskedLM.get_output_embeddings  r>  r.   c                 T    || j         j        _        |j        | j         j        _        d S r0   r@  rA  s     r,   rC  z%FNetForMaskedLM.set_output_embeddings  rD  r.   r$  r%  Nrf   rC   r@   rg   rF  r   r   r   c                    ||n| j         j        }|                     ||||||          }|d         }	|                     |	          }
d}|Kt	                      } ||
                    d| j         j                  |                    d                    }|s|
f|dd         z   }||f|z   n|S t          ||
|j                  S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
        NrI  r   rA   rx   r  logitsr   )	r`   r*  r  r:  r
   rJ  rI   r   r   )r_   rf   rC   r@   rg   rF  r   r   r   r   r   rM  rL  r   s                 r,   rl   zFNetForMaskedLM.forward  s    , &1%<kk$+B])))%'!5#  
 
 "!* HH_55'))H%X&7&<&<RAW&X&XZ`ZeZefhZiZijjN 	Z')GABBK7F3A3M^%..SYY>:K[b[pqqqqr.   NNNNNNN)rm   rn   ro   rP  rG   r=  rC  r   r/  r0  r   r1  r   r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   rR  rR    sw       8:Z[    , , ,8 8 8 +*+@+G+GHe+f+fgg&"$   -115/304)-/3&*'r 'rEL)'r !.'r u|,	'r
  -'r &'r 'tn'r d^'r 
un$	%'r 'r 'r  hg'r 'r 'r 'r 'rr.   rR  zJFNet Model with a `next sentence prediction (classification)` head on top.c                   Z    e Zd Z fdZ ee                    d                     eee	          	 	 	 	 	 	 	 dde
ej                 de
ej                 de
ej                 de
ej                 d	e
ej                 d
e
e         de
e         deeef         fd                        Z xZS )FNetForNextSentencePredictionc                     t                                          |           t          |          | _        t	          |          | _        |                                  d S r0   )rF   rG   r  r  r   r:  r  r^   s     r,   rG   z&FNetForNextSentencePrediction.__init__
  rT  r.   r$  rE  Nrf   rC   r@   rg   rF  r   r   r   c                    d|v r/t          j        dt                     |                    d          }||n| j        j        }|                     ||||||          }	|	d         }
|                     |
          }d}|At                      } ||	                    dd          |	                    d                    }|s|f|	dd         z   }||f|z   n|S t          |||	j                  S )	a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring). Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> logits = outputs.logits
        >>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
        ```rG  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.NrI  r    rA   rx   rX  )warningswarnFutureWarningpopr`   r*  r  r:  r
   rJ  r   r   )r_   rf   rC   r@   rg   rF  r   r   kwargsr   r   seq_relationship_scoresrN  rL  r   s                  r,   rl   z%FNetForNextSentencePrediction.forward  s:   N !F**M%  
 ZZ 566F%0%<kk$+B])))%'!5#  
 
  
"&((="9"9!'))H!)*A*F*Fr1*M*Mv{{[]!_!_ 	b-/'!""+=F7I7U')F22[aa*#*!/
 
 
 	
r.   rZ  )rm   rn   ro   rG   r   r/  r0  r   r   r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   r\  r\    sD       
     +*+@+G+GHe+f+fgg+FUdeee -115/304)-/3&*I
 I
EL)I
 !.I
 u|,	I

  -I
 &I
 'tnI
 d^I
 
u11	2I
 I
 I
 fe hgI
 I
 I
 I
 I
r.   r\  z
    FNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                   \    e Zd Z fdZ ee                    d                     eee	e
          	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee         dee         deee	f         fd                        Z xZS )FNetForSequenceClassificationc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r0   rF   rG   
num_labelsr  r  r   rU   rV   rW   rS   rJ   
classifierr  r^   s     r,   rG   z&FNetForSequenceClassification.__init__i  sy        +f%%	z&"<==)F$68IJJ 	r.   r$  r%  Nrf   rC   r@   rg   rF  r   r   r   c                    ||n| j         j        }|                     ||||||          }|d         }	|                     |	          }	|                     |	          }
d}|Z| j         j        f| j        dk    rd| j         _        nN| j        dk    r7|j        t          j	        k    s|j        t          j
        k    rd| j         _        nd| j         _        | j         j        dk    rWt                      }| j        dk    r1 ||
                                |                                          }n ||
|          }n| j         j        dk    rGt                      } ||
                    d| j                  |                    d                    }n*| j         j        dk    rt                      } ||
|          }|s|
f|dd         z   }||f|z   n|S t!          ||
|j        	          S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        NrI  r    
regressionsingle_label_classificationmulti_label_classificationrA   rx   rX  )r`   r*  r  rW   rj  problem_typeri  rE   r%   r]   intr   squeezer
   rJ  r	   r   r   )r_   rf   rC   r@   rg   rF  r   r   r   r   rY  r  rL  r   s                 r,   rl   z%FNetForSequenceClassification.forwardt  s   , &1%<kk$+B])))%'!5#  
 
  
]33//{'/?a''/;DK,,_q((flej.H.HFL\a\eLeLe/LDK,,/KDK,{'<77"99?a''#8FNN$4$4fnn6F6FGGDD#8FF33DD)-JJJ+--xB @ @&++b//RR)-III,..x// 	FY,F)-)9TGf$$vE'T&PWPeffffr.   rZ  )rm   rn   ro   rG   r   r/  r0  r   r1  r   r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   rf  rf  a  sN       	 	 	 	 	 +*+@+G+GHe+f+fgg&,$   -115/304)-/3&*9g 9gEL)9g !.9g u|,	9g
  -9g &9g 'tn9g d^9g 
u..	/9g 9g 9g  hg9g 9g 9g 9g 9gr.   rf  z
    FNet Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
    softmax) e.g. for RocStories/SWAG tasks.
    c                   \    e Zd Z fdZ ee                    d                     eee	e
          	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee         dee         deee	f         fd                        Z xZS )FNetForMultipleChoicec                    t                                          |           t          |          | _        t	          j        |j                  | _        t	          j        |j	        d          | _
        |                                  d S r   )rF   rG   r  r  r   rU   rV   rW   rS   rJ   rj  r  r^   s     r,   rG   zFNetForMultipleChoice.__init__  sl       f%%	z&"<==)F$6:: 	r.   z(batch_size, num_choices, sequence_lengthr%  Nrf   rC   r@   rg   rF  r   r   r   c                 N   ||n| j         j        }||j        d         n|j        d         }|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|)|                    d|                    d                    nd}|=|                    d|                    d          |                    d                    nd}|                     ||||||          }	|	d         }
|                     |
          }
|                     |
          }|                    d|          }d}|t                      } |||          }|s|f|	dd         z   }||f|z   n|S t          |||	j
                  S )aJ  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr    rA   rI  rx   rX  )r`   r*  r#   rJ  r\   r  rW   rj  r
   r   r   )r_   rf   rC   r@   rg   rF  r   r   num_choicesr   r   rY  reshaped_logitsr  rL  r   s                   r,   rl   zFNetForMultipleChoice.forward  s   , &1%<kk$+B],5,Aioa((}GZ[\G]>G>SINN2y~~b'9'9:::Y]	M[Mg,,R1D1DR1H1HIIImqGSG_|((\->->r-B-BCCCei ( r=#5#5b#9#9=;M;Mb;Q;QRRR 	 )))%'!5#  
 
  
]33// ++b+66'))H8OV44D 	F%''!""+5F)-)9TGf$$vE(d?ZaZoppppr.   rZ  )rm   rn   ro   rG   r   r/  r0  r   r1  r   r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   rs  rs    sN            +*+@+G+GHr+s+stt&-$   -115/304)-/3&*4q 4qEL)4q !.4q u|,	4q
  -4q &4q 'tn4q d^4q 
u//	04q 4q 4q  ut4q 4q 4q 4q 4qr.   rs  z
    FNet Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                   \    e Zd Z fdZ ee                    d                     eee	e
          	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
ee         dee         deee	f         fd                        Z xZS )FNetForTokenClassificationc                 6   t                                          |           |j        | _        t          |          | _        t          j        |j                  | _        t          j	        |j
        |j                  | _        |                                  d S r0   rh  r^   s     r,   rG   z#FNetForTokenClassification.__init__  sy        +f%%	z&"<==)F$68IJJ 	r.   r$  r%  Nrf   rC   r@   rg   rF  r   r   r   c                    ||n| j         j        }|                     ||||||          }|d         }	|                     |	          }	|                     |	          }
d}|Ft                      } ||
                    d| j                  |                    d                    }|s|
f|dd         z   }||f|z   n|S t          ||
|j	                  S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        NrI  r   rA   rx   rX  )
r`   r*  r  rW   rj  r
   rJ  ri  r   r   )r_   rf   rC   r@   rg   rF  r   r   r   r   rY  r  rL  r   s                 r,   rl   z"FNetForTokenClassification.forward  s    ( &1%<kk$+B])))%'!5#  
 
 "!*,,7711'))H8FKKDO<<fkk"ooNND 	FY,F)-)9TGf$$vE$$vWMbccccr.   rZ  )rm   rn   ro   rG   r   r/  r0  r   r1  r   r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   rz  rz    sN       
 
 
 
 
 +*+@+G+GHe+f+fgg&)$   -115/304)-/3&*(d (dEL)(d !.(d u|,	(d
  -(d &(d 'tn(d d^(d 
u++	,(d (d (d  hg(d (d (d (d (dr.   rz  z
    FNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
    layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                   x    e Zd Z fdZ ee                    d                     eee	e
          	 	 	 	 	 	 	 	 ddeej                 deej                 deej                 deej                 d	eej                 d
eej                 dee         dee         deee	f         fd                        Z xZS )FNetForQuestionAnsweringc                     t                                          |           |j        | _        t          |          | _        t          j        |j        |j                  | _        | 	                                 d S r0   )
rF   rG   ri  r  r  r   rS   rJ   
qa_outputsr  r^   s     r,   rG   z!FNetForQuestionAnswering.__init__R  se        +f%%	)F$68IJJ 	r.   r$  r%  Nrf   rC   r@   rg   start_positionsend_positionsr   r   r   c	                    ||n| j         j        }|                     ||||||          }	|	d         }
|                     |
          }|                    dd          \  }}|                    d                                          }|                    d                                          }d}||t          |                                          dk    r|                    d          }t          |                                          dk    r|                    d          }|                    d          }|	                    d|          }|	                    d|          }t          |          } |||          } |||          }||z   dz  }|s||f|	dd         z   }||f|z   n|S t          ||||	j        	          S )
a  
        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        NrI  r   r    rA   ry   )ignore_indexrx   )r  start_logits
end_logitsr   )r`   r*  r  r  splitrq  
contiguouslenr\   clampr
   r   r   )r_   rf   rC   r@   rg   r  r  r   r   r   r   rY  r  r  rK  ignored_indexrL  
start_lossend_lossr   s                       r,   rl   z FNetForQuestionAnswering.forward]  s   6 &1%<kk$+B])))%'!5#  
 
 "!*11#)<<r<#:#: j#++B//::<<''++6688

&=+D?''))**Q.."1"9"9""="==%%''((1,, - 5 5b 9 9(--a00M-33A}EEO)//=AAM']CCCH!,@@Jx
M::H$x/14J 	R"J/'!""+=F/9/EZMF**6Q+,:]d]r
 
 
 	
r.   rO  )rm   rn   ro   rG   r   r/  r0  r   r1  r   r2  r   r%   r   r4  r   r   rl   rq   rr   s   @r,   r~  r~  J  sQ       	 	 	 	 	 +*+@+G+GHe+f+fgg&0$   -115/3042604/3&*>
 >
EL)>
 !.>
 u|,	>

  ->
 "%,/>
  ->
 'tn>
 d^>
 
u22	3>
 >
 >
  hg>
 >
 >
 >
 >
r.   r~  )Orp   r_  dataclassesr   	functoolsr   typingr   r   r   r%   torch.utils.checkpointr   torch.nnr	   r
   r   utilsr   scipyr   activationsr   modeling_outputsr   r   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   r   r   r   configuration_fnetr!   
get_loggerrm   loggerr1  r2  r-   r1   r9   Moduler;   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r  FNET_START_DOCSTRINGr/  r  r6  rR  r\  rf  rs  rz  r~  r   r.   r,   <module>r     s      ! ! ! ! ! !       ) ) ) ) ) ) ) ) ) )            A A A A A A A A A A ' ' ' ' ' '   ! ! ! ! ! !
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 . - - - - - 6 6 6 6 6 6              + * * * * * 
	H	%	%( M M M> > >
   : : : : :RY : : :z# # # # #	 # # #L    bi   
 
 
 
 
29 
 
 
    ry               	   6a a a a a") a a a>            ")   "* * * * *29 * * *4! ! ! ! !bi ! ! !& & & & &bi & & &	9 	9 	9 	9 	929 	9 	9 	9* * * * */ * * *8 = = = = ={ = = =2	   F d c
 c
 c
 c
 c
# c
 c
	 c
L   [
 [
 [
 [
 [
, [
 [
 [
| NPdee@r @r @r @r @r) @r @r fe@rF T U
 U
 U
 U
 U
$7 U
 U
	 U
p   Kg Kg Kg Kg Kg$7 Kg Kg Kg\   Eq Eq Eq Eq Eq/ Eq Eq EqP   ;d ;d ;d ;d ;d!4 ;d ;d ;d|   P
 P
 P
 P
 P
2 P
 P
 P
 P
 P
r.   