
    gp                     J   d dl Z d dlmZmZ ddlmZ ddlmZmZ  ej	        e
          Z e            rd dlZ	 	 	 d dee         ded         d	ee         d
edef         fdZ	 	 	 d dee         ded         d	ee         d
edef         fdZ	 	 	 d dee         ded         d	ee         d
edef         fdZ	 d!deddd	ee         d
edef         fdZ	 d!deddd	ee         d
edef         fdZ	 d!deddd	ee         d
edef         fdZeeeeeedZ	 	 d"dedededee         dee         f
dZd!dedee         fdZd!dedee         fdZd!dedee         fdZd!dedee         fdZd!dedee         fdZd!dedee         fdZeeeeeedZd!dedee         fdZ dS )#    N)OptionalTuple   )PretrainedConfig)is_torch_availableloggingconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc                    | (t          |          dk    rt          d| d|            t          |          dk    r|d         }|d         }nR| P| j        }t          | d          r| j        nd}t          | d	| j        | j        z            }t          ||z            }d}d|t          j
        d|d
t          j                                                                      |          |z  z  z  }	|	|fS )a  
    Computes the inverse frequencies according to the original RoPE implementation
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_default_rope_parameters`, got `rope_kwargs`= and `config`=basedimpartial_rotary_factor      ?head_dim   dtype)len
ValueError
rope_thetahasattrr   getattrhidden_sizenum_attention_headsinttorcharangeint64floatto)
r	   r
   r   rope_kwargsr   r   r   r   attention_factorinv_freqs
             \/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/modeling_rope_utils.py _compute_default_rope_parametersr(      s2   * c+..22iEPi i`fi i
 
 	
 ;!6"% 		 @GPg@h@h q < <nq6:v/AVE_/_``(2233 du|AsAU[IIIOOQQTTU[\\_bbcdH%%%    c                     | (t          |          dk    rt          d| d|            t          |          dk    r	|d         }n| | j        d         }t          | ||fi |\  }}||z  }||fS )a  
    Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_linear_scaling_rope_parameters`, got `rope_kwargs`=r   factor)r   r   rope_scalingr(   )r	   r
   r   r$   r+   r&   r%   s          r'   '_compute_linear_scaling_rope_parametersr-   G   s    * c+..22pLWp pgmp p
 
 	
 ;!X&		$X. "B&&RY!i!i]h!i!iH
 H%%%r)   c                    | (t          |          dk    rt          d| d|            t          |          dk    r!|d         }|d         }|d         }|d         }nf| d| j        }t          | d	          r| j        nd
}t          | d| j        | j        z            }	t          |	|z            }| j	        }| j
        d         }d
}
|||k    r|n|}|||z  |z  |dz
  z
  ||dz
  z  z  z  }d
|t          j        d|dt          j                                                                      |          |z  z  z  }||
fS )a4  
    Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length, used to update the dynamic RoPE at inference time.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_dynamic_ntk_parameters`, got `rope_kwargs`=r   r   r   max_position_embeddingsr+   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r/   r,   r   r    r!   r"   r#   )r	   r
   r   r$   r   r   r/   r+   r   r   r%   r&   s               r'   _compute_dynamic_ntk_parametersr0   p   s   , c+..22hDOh h_eh h
 
 	
 ;!6"% "-.G"HX&		 @GPg@h@h q < <nq6:v/AVE_/_``(2233"("@$X. !,;R1R1RggXoG FW$'>>6A:NTW[^ab[bTcddDdu|AsAU[IIIOOQQTTU[\\_bbcdH%%%r)   c                 \   t          |          dk    rt          d|           | j        }t          | d          r| j        nd}t          | d| j        | j        z            }t          ||z            }| j	        }| j
        d         }	| j
                            d          }
|
d	t          j        |	          z  dz   }
| j
                            d
          pd}| j
                            d          pd}d fd}d }|t          j        d|d                                                              |          |z  z  }d|z  }d|	|z  z  } ||||||          \  }}d ||||dz                                                                |          z
  }|d|z
  z  ||z  z   }||
fS )a  
    Computes the inverse frequencies with NTK scaling. Please refer to the
    [original paper](https://arxiv.org/abs/2309.00071)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   zYUnexpected arguments: `**rope_kwargs` should be unset in `_compute_yarn_parameters`, got r   r   r   r+   r%   Ng?	beta_fast    	beta_slowr   c                     |t          j        || dz  t           j        z  z            z  dt          j        |          z  z  S )zPInverse dimension formula to find the dimension based on the number of rotationsr   )mathlogpi)num_rotationsr   r   r/   s       r'   find_correction_dimz5_compute_yarn_parameters.<locals>.find_correction_dim   sA    dh6-!:Kdg:UVWWW\]`d`him`n`n\noor)   c                     t          j         | |||                    }t          j         ||||                    }t          |d          t	          ||dz
            fS )z.Find dimension range bounds based on rotationsr   r   )r6   floorceilmaxmin)low_rothigh_rotr   r   r/   lowhighr:   s          r'   find_correction_rangez7_compute_yarn_parameters.<locals>.find_correction_range   sh    j,,Wc4AXYYZZy,,XsDBYZZ[[3{{CcAg....r)   c                     | |k    r|dz  }t          j        |t           j                  | z
  || z
  z  }t          j        |dd          }|S )NgMbP?r   r   r   )r   r    float32clamp)r?   r>   r   linear_func	ramp_funcs        r'   linear_ramp_factorz4_compute_yarn_parameters.<locals>.linear_ramp_factor   sQ    #::5LC|Cu}===Cc	RKQ22	r)   r   )r   r   r   r   r   r   r   r   r   r/   r,   getr6   r7   r   r    r"   r#   )r	   r
   r   r$   r   r   r   r   r/   r+   r%   r2   r4   rD   rJ   	pos_freqsinv_freq_extrapolationinv_freq_interpolationrB   rC   inv_freq_extrapolation_factorr&   r:   s                         @r'   _compute_yarn_parametersrP      s,   ( ;!uhsuu
 
 	
 D<CFLc<d<dmF88jmvz6+=A[+[\\H
h..
/
/C$< *F *../ABB&!1!11C7 #''44:I#''449Ip p p/ / / / /   aa006688;;FCCcIJI 9_ FY$67%%iCG^__IC %&(:(:3cQh(O(O(U(U(W(W(Z(Z[a(b(b$b!!&C"CD
 #@
@	A 
 %%%r)   c                    t          |          dk    rt          d|           | j        }t          | d          r| j        nd}t          | d| j        | j        z            }t          ||z            }| j	        d         }| j	        d         }	| j	        
                    d          }
| j	        
                    d	          }t          | d
          r)|r|| j        k     r| j        }n| j        }| j        }||z  }
n| j        }||
z  }|G|
dk    rd}n>t          j        dt          j        |
          t          j        |          z  z             }||k    r"t!          j        |t           j        |          }n!t!          j        |	t           j        |          }t!          j        d|dt           j        |                                          |z  }d|||z  z  z  }||fS )a  
    Computes the inverse frequencies with LongRoPE scaling. Please refer to the
    [original implementation](https://github.com/microsoft/LongRoPE)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   z]Unexpected arguments: `**rope_kwargs` should be unset in `_compute_longrope_parameters`, got r   r   r   long_factorshort_factorr+   r%    original_max_position_embeddingsNr   )r   r
   r   )r   r   r   r   r   r   r   r   r   r,   rK   rT   r/   r6   sqrtr7   r   tensorrF   r    r!   r"   )r	   r
   r   r$   r   r   r   r   rR   rS   r+   r%    expanded_max_position_embeddingsr/   ext_factorsinv_freq_shaper&   s                    r'   _compute_longrope_parametersrZ      s   * ;! 
 
 	

 D<CFLc<d<dmF88jmvz6+=A[+[\\H
h..
/
/C%m4K&~6L $$X..F*../ABB
 v9:: 	L 	Nw!HHH/5/V,,/5/M,"("I14KK"("@+BV+K( S=="#yTXf-=-=I`@a@a-a)abb (*AAAl;emFSSSl<u}VTTT\!S!5;vNNNTTVVY\\NkD.$889H%%%r)   c                    t          | ||fi |\  }}| j        d         }| j        d         }| j        d         }| j        d         }	|	|z  }
|	|z  }dt          j        z  |z  }t	          j        ||
k    ||z  |          }|	|z  |z
  ||z
  z  }d|z
  |z  |z  ||z  z   }||k      ||
k     z  }t	          j        |||          }||fS )a  
    Computes the inverse frequencies for llama 3.1.

    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r+   low_freq_factorhigh_freq_factorrT   r   r   )r(   r,   r6   r8   r   where)r	   r
   r   r$   r&   r%   r+   r\   r]   old_context_lenlow_freq_wavelenhigh_freq_wavelenwaveleninv_freq_llamasmooth_factorsmoothed_inv_freqis_medium_freqs                    r'   _compute_llama3_parametersrg   6  s   ( "B&&RY!i!i]h!i!iH *F)*;<O*+=>)*LMO&8'*::$'kH$G [+;!;X=NPXYYN$w.@EUXgEghM]*n<vEXfHff!223BR8R6SSN[1BNSSN+++r)   )defaultlineardynamicyarnlongropellama3	rope_typereceived_keysrequired_keysoptional_keysignore_keysc                     d|v r|dhz  }|                     d           |||z  }||z
  }|rt          d|  d|           |	||z
  |z
  }n||z
  }|r"t                              d|  d|            dS dS )zYCompare the received keys in `config.rope_scaling` against the expected and optional keystypern   Nz9Missing required keys in `rope_scaling` for 'rope_type'='z': z5Unrecognized keys in `rope_scaling` for 'rope_type'=')addKeyErrorloggerwarning)rn   ro   rp   rq   rr   missing_keysunused_keyss          r'   _check_received_keysr{   n  s     &!+&&& $ =0L qoS\ooamooppp #m3mC#m3 ljyjj]hjjkkkkkl lr)   c                     | j         }|                    d|                    dd                     }dh}t          |                                          }t	          ||||           d S )Nrn   rt   rr   )r,   rK   setkeysr{   )r	   rr   r,   rn   rp   ro   s         r'   !_validate_default_rope_parametersr     sl    &L  l.>.>vt.L.LMMI MM))++,,MM=kZZZZZZr)   c                 `   | j         }|                    d|                    dd                     }ddh}t          |                                          }t	          ||||           |d         }|t          |t                    r|dk     rt                              d|            d S d S )Nrn   rt   r+   r}   r   8`rope_scaling`'s factor field must be a float >= 1, got 	r,   rK   r~   r   r{   
isinstancer"   rw   rx   )r	   rr   r,   rn   rp   ro   r+   s          r'   (_validate_linear_scaling_rope_parametersr     s    &L  l.>.>vt.L.LMMI (+M))++,,MM=kZZZZ(#F~Z66~&3,,ZRXZZ[[[[[ ;G,r)   c                 h   | j         }|                    d|                    dd                     }ddh}dh}t          |                                          }t	          |||||           |d         }|t          |t                    r|dk     rt                              d|            d S d S )Nrn   rt   r+   rT   r}   r   r   r   )r	   rr   r,   rn   rp   rq   ro   r+   s           r'   )_validate_dynamic_scaling_rope_parametersr     s    &L  l.>.>vt.L.LMMI (+M78M))++,,MM=-]hiiii(#F~Z66~&3,,ZRXZZ[[[[[ ;G,r)   c                    | j         }|                    d|                    dd                     }ddh}h d}t          |                                          }t	          |||||           |d         }|t          |t                    r|dk     rt                              d|            |                    d          }|8t          |t                    r|d	k     rt                              d
|            |                    d          }	|	2t          |	t                    st                              d|	            |                    d          }
|
2t          |
t                    st                              d|
            |	pd|
pdk     r#t                              d|	 d|
 d           d S d S )Nrn   rt   r+   >   r2   r4   r%   r}   r   r   r%   r   L`rope_scaling`'s attention_factor field must be a float greater than 0, got r2   z6`rope_scaling`'s beta_fast field must be a float, got r4   z6`rope_scaling`'s beta_slow field must be a float, got r3   r   zO`rope_scaling`'s beta_fast field must be greater than beta_slow, got beta_fast=z( (defaults to 32 if None) and beta_slow=z (defaults to 1 if None)r   )r	   rr   r,   rn   rp   rq   ro   r+   r%   r2   r4   s              r'   _validate_yarn_parametersr     s    &L  l.>.>vt.L.LMMI (+MBBBM))++,,MM=-]hiiii(#F~Z66~&3,,ZRXZZ[[[#''(:;;#Z8H%-P-P#TdghThThm[kmm	
 	
 	
   --IZ	5%A%A[PY[[\\\  --IZ	5%A%A[PY[[\\\RIN++Z^g Z Z6?Z Z Z	
 	
 	
 	
 	
 ,+r)   c                    | j         }|                    d|                    dd                     }h d}h d}t          |                                          }t	          |||||           t          | d          r| j        nd}t          | d| j        | j	        z            }t          ||z            }	|                    d	          }
t          |
t                    s6t          d
 |
D                       rt                              d|
            t!          |
          |	dz  k    s0t                              d|	dz   dt!          |
                      |                    d          }t          |t                    s6t          d |D                       rt                              d|            t!          |          |	dz  k    s0t                              d|	dz   dt!          |                      t          | d          rt                              d           d S |                    d          }|t                              d           n8t          |t$                    r|dk     rt                              d|            |                    d          }|:t          |t$                    r|dk     r!t                              d|            d S d S d S )Nrn   rt   >   rn   rR   rS   >   r+   r%   rT   r}   r   r   r   rS   c              3   N   K   | ] }t          |t          t          f          V  !d S Nr   r   r"   .0xs     r'   	<genexpr>z0_validate_longrope_parameters.<locals>.<genexpr>  s1      1d1dRS*Qe2M2M1d1d1d1d1d1dr)   zC`rope_scaling`'s short_factor field must be a list of numbers, got r   z5`rope_scaling`'s short_factor field must have length z, got rR   c              3   N   K   | ] }t          |t          t          f          V  !d S r   r   r   s     r'   r   z0_validate_longrope_parameters.<locals>.<genexpr>  s1      0b0bQRAU|1L1L0b0b0b0b0b0br)   zB`rope_scaling`'s long_factor field must be a list of numbers, got z4`rope_scaling`'s long_factor field must have length rT   aY  This model has set a `original_max_position_embeddings` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_scaling`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.r+   z1Missing required keys in `rope_scaling`: 'factor'r   r%   g        r   )r,   rK   r~   r   r{   r   r   r   r   r   r   r   listallrw   rx   r   warning_oncer"   )r	   rr   r,   rn   rp   rq   ro   r   r   r   rS   rR   r+   r%   s                 r'   _validate_longrope_parametersr     s3   &L  l.>.>vt.L.LMMI@@@MVVVM))++,,MM=-]hiiii<CFLc<d<dmF88jmvz6+=A[+[\\H
h..
/
/C##N33LlD)) mc1d1dWc1d1d1d.d.d mk]ikklll|q((rsVWxrr_bco_p_prrsss""=11Kk4(( kS0b0bVa0b0b0b-b-b ki\giijjj{sax''pcUVhpp^abm^n^nppqqq
 v9:: A	
 	
 	
 	
 	
 !!(++>NNNOOOOFE** 	`fsllNN^V\^^___'++,>??'.66 :JS:P:Pucsuu     (':P:Pr)   c                 t   | j         }|                    d|                    dd                     }h d}t          |                                          }t	          ||||           |d         }|t          |t                    r|dk     rt                              d|            |d         }|d	         }|t          |t                    st                              d
|            |t          |t                    st                              d|            ||k    r t                              d| d|            |d         }	|	t          |	t                    st                              d|	            |	| j
        k    r't                              d|	 d| j
                    d S d S )Nrn   rt   >   r+   rn   r\   r]   rT   r}   r+   r   r   r\   r]   z<`rope_scaling`'s low_freq_factor field must be a float, got z=`rope_scaling`'s high_freq_factor field must be a float, got zc`rope_scaling`'s high_freq_factor field must be greater than low_freq_factor, got high_freq_factor=z and low_freq_factor=rT   zP`rope_scaling`'s original_max_position_embeddings field must be an integer, got zg`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got z and max_position_embeddings=)r,   rK   r~   r   r{   r   r"   rw   rx   r   r/   )
r	   rr   r,   rn   rp   ro   r+   r\   r]   rT   s
             r'   _validate_llama3_parametersr     s   &L  l.>.>vt.L.LMMIvvvM))++,,MM=kZZZZ(#F~Z66~&3,,ZRXZZ[[["#45O#$67j%&H&HgVegghhhz2BE'J'JiWgiijjj?**HH H6EH H	
 	
 	

 (44V'W$'/zBbdg7h7h/2/2 2	
 	
 	
 (6+IIIo/o oNTNlo o	
 	
 	
 	
 	
 JIr)   c                    t          | dd          }|dS |                    d|                    dd                    }t                              |          }| || |           dS t                              d| d           dS )	zO
    Validate the RoPE config arguments, given a `PretrainedConfig` object
    r,   Nrn   rt   rh   r}   zTMissing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='')r   rK   ROPE_VALIDATION_FUNCTIONSrw   rx   )r	   rr   r,   rn   validation_fns        r'   rope_config_validationr   -  s     6>488L   l.>.>vy.Q.QRRI-11)<<M f+666666oclooo	
 	
 	
 	
 	
r)   )NNNr   )NN)!r6   typingr   r   configuration_utilsr   utilsr   r   
get_logger__name__rw   r   r   r"   r(   r-   r0   rP   rZ   rg   ROPE_INIT_FUNCTIONSstrr~   r{   r   r   r   r   r   r   r   r    r)   r'   <module>r      s    " " " " " " " " 1 1 1 1 1 1 . . . . . . . . 
	H	%	%  LLL *.'+!'& '&%&'&^$'& c]'&
 >5 !'& '& '& '&V *.'+!&& &&%&&&^$&& c]&&
 >5 !&& && && &&T *.'+!0& 0&%&0&^$0& c]0&
 >5 !0& 0& 0& 0&h PTL& L&L&&4L&?G}L&
>5 !L& L& L& L&` PTA& A&A&&4A&?G}A&
>5 !A& A& A& A&J PT(, (,(,&4(,?G}(,
>5 !(, (, (, (,^ 05.$,(   $(!%l lll l C=	l
 #l l l l:[ [.> [XVY] [ [ [ [	\ 	\5E 	\T\]`Ta 	\ 	\ 	\ 	\\ \6F \U]^aUb \ \ \ \
 
&6 
Xc] 
 
 
 
>/ /*: /RU / / / /d!
 !
(8 !
xPS} !
 !
 !
 !
L 168%-)  
 
#3 
(3- 
 
 
 
 
 
r)   