o
    ?"go                     @   sd  d dl Z d dlmZ ddlmZ ddlmZmZ ee	Z
e r$d dlZ			d.dee ded d	ee d
edef fddZ			d.dee ded d	ee d
edef fddZ			d.dee ded d	ee d
edef fddZ	d/deddd	ee d
edef fddZ	d/deddd	ee d
edef fddZ	d/deddd	ee d
edef fddZeeeeeedZ		d0dedededee dee f
ddZd/dedee fd d!Zd/dedee fd"d#Zd/dedee fd$d%Zd/dedee fd&d'Zd/dedee fd(d)Zd/dedee fd*d+ZeeeeeedZd/dedee fd,d-Z dS )1    N)Optional   )PretrainedConfig)is_torch_availableloggingconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc           
      K   s   | durt |dkrtd| d|  t |dkr#|d }|d }n!| durD| j}t| dr2| jnd}t| d	| j| j }t|| }d}d|t	j
d|d
t	jd ||   }	|	|fS )a  
    Computes the inverse frequencies according to the original RoPE implementation
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_default_rope_parameters`, got `rope_kwargs`= and `config`=basedimpartial_rotary_factor      ?head_dim   dtype)len
ValueError
rope_thetahasattrr   getattrhidden_sizenum_attention_headsinttorcharangeint64floatto)
r   r   r	   rope_kwargsr   r   r   r   attention_factorinv_freq r$   b/mnt/skqttb/ctump_chatbot/chatbot/lib/python3.10/site-packages/transformers/modeling_rope_utils.py _compute_default_rope_parameters   s&   
*r&   c                 K   sx   | durt |dkrtd| d|  t |dkr|d }n	| dur(| jd }t| ||fi |\}}|| }||fS )a  
    Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_linear_scaling_rope_parameters`, got `rope_kwargs`=r   factor)r   r   rope_scalingr&   )r   r   r	   r!   r'   r#   r"   r$   r$   r%   '_compute_linear_scaling_rope_parametersG   s   

r)   c                 K   s  | durt |dkrtd| d|  t |dkr+|d }|d }|d }|d }n)| durT| j}t| d	r:| jnd
}t| d| j| j }	t|	| }| j	}| j
d }d
}
|dur`||kr`|n|}||| | |d  ||d    }d
|tjd|dtjd ||   }||
fS )a4  
    Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length, used to update the dynamic RoPE at inference time.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    Nr   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_dynamic_ntk_parameters`, got `rope_kwargs`=r   r   r   max_position_embeddingsr'   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r*   r(   r   r   r   r   r    )r   r   r	   r!   r   r   r*   r'   r   r   r"   r#   r$   r$   r%   _compute_dynamic_ntk_parametersp   s2   

$*r+   c                    sH  t |dkrtd| | j}t| dr| jnd}t| d| j| j }t|| }| j	}| j
d }	| j
d}
|
du rEd	t|	 d }
| j
d
pLd}| j
dpTd}dd   fdd}dd }|td|d ||  }d| }d|	|  }||||||\}}d||||d  | }|d|  ||  }||
fS )a  
    Computes the inverse frequencies with NTK scaling. Please refer to the
    [original paper](https://arxiv.org/abs/2309.00071)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   zYUnexpected arguments: `**rope_kwargs` should be unset in `_compute_yarn_parameters`, got r   r   r   r'   r"   Ng?	beta_fast    	beta_slowr   c                 S   s*   |t || d t j   dt |  S )zPInverse dimension formula to find the dimension based on the number of rotationsr   )mathlogpi)num_rotationsr   r   r*   r$   r$   r%   find_correction_dim   s   *z5_compute_yarn_parameters.<locals>.find_correction_dimc                    s@   t  | |||}t  ||||}t|dt||d fS )z.Find dimension range bounds based on rotationsr   r   )r/   floorceilmaxmin)low_rothigh_rotr   r   r*   lowhighr3   r$   r%   find_correction_range   s   z7_compute_yarn_parameters.<locals>.find_correction_rangec                 S   s>   | |kr|d7 }t j|t jd|  ||   }t |dd}|S )NgMbP?r   r   r   )r   r   float32clamp)r7   r6   r   linear_func	ramp_funcr$   r$   r%   linear_ramp_factor   s
   z4_compute_yarn_parameters.<locals>.linear_ramp_factorr   )r   r   r   r   r   r   r   r   r   r*   r(   getr/   r0   r   r   r   r    )r   r   r	   r!   r   r   r   r   r*   r'   r"   r,   r.   r=   rB   	pos_freqsinv_freq_extrapolationinv_freq_interpolationr:   r;   inv_freq_extrapolation_factorr#   r$   r<   r%   _compute_yarn_parameters   s8   
 

rH   c                 K   s@  t |dkrtd| | j}t| dr| jnd}t| d| j| j }t|| }| j	d }| j	d }	| j	
d}
| j	
d	}t| d
rO| j}| j| j }
n| j}|du rl|
dkr]d}ntdt|
t|  }|r|||kr|tj|tj|d}n	tj|	tj|d}tjd|dtj|d | }d|||   }||fS )a  
    Computes the inverse frequencies with LongRoPE scaling. Please refer to the
    [original implementation](https://github.com/microsoft/LongRoPE)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   z]Unexpected arguments: `**rope_kwargs` should be unset in `_compute_longrope_parameters`, got r   r   r   long_factorshort_factorr'   r"    original_max_position_embeddingsNr   )r   r   r   )r   r   r   r   r   r   r   r   r   r(   rC   rK   r*   r/   sqrtr0   r   tensorr>   r   r   r   )r   r   r	   r!   r   r   r   r   rI   rJ   r'   r"   rK   ext_factorsinv_freq_shaper#   r$   r$   r%   _compute_longrope_parameters   s8   


rP   c                 K   s   t | ||fi |\}}| jd }| jd }| jd }| jd }	|	| }
|	| }dtj | }t||
k|| |}|	| | ||  }d| | | ||  }||k  ||
k  }t|||}||fS )a  
    Computes the inverse frequencies for llama 3.1.

    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r'   low_freq_factorhigh_freq_factorrK   r   r   )r&   r(   r/   r1   r   where)r   r   r	   r!   r#   r"   r'   rQ   rR   old_context_lenlow_freq_wavelenhigh_freq_wavelenwaveleninv_freq_llamasmooth_factorsmoothed_inv_freqis_medium_freqr$   r$   r%   _compute_llama3_parameters1  s   



r\   )defaultlineardynamicyarnlongropellama3	rope_typereceived_keysrequired_keysoptional_keysignore_keysc                 C   s   d|v r|dh8 }| d |dur||8 }|| }|r&td|  d| |dur1|| | }n|| }|rDtd|  d|  dS dS )zYCompare the received keys in `config.rope_scaling` against the expected and optional keystyperc   Nz9Missing required keys in `rope_scaling` for 'rope_type'='z': z5Unrecognized keys in `rope_scaling` for 'rope_type'=')addKeyErrorloggerwarning)rc   rd   re   rf   rg   missing_keysunused_keysr$   r$   r%   _check_received_keysi  s   	

ro   c                 C   s@   | j }|d|dd }dh}t| }t||||d d S )Nrc   rh   rg   )r(   rC   setkeysro   )r   rg   r(   rc   re   rd   r$   r$   r%   !_validate_default_rope_parameters  s
   rs   c                 C   sx   | j }|d|dd }ddh}t| }t||||d |d }|d u s0t|tr0|dk r:td|  d S d S )Nrc   rh   r'   rp   r   8`rope_scaling`'s factor field must be a float >= 1, got 	r(   rC   rq   rr   ro   
isinstancer   rk   rl   )r   rg   r(   rc   re   rd   r'   r$   r$   r%   (_validate_linear_scaling_rope_parameters  s   rw   c                 C   s   | j }|d|dd }ddh}dh}t| }t|||||d |d }|d u s4t|tr4|dk r>td|  d S d S )Nrc   rh   r'   rK   rp   r   rt   ru   )r   rg   r(   rc   re   rf   rd   r'   r$   r$   r%   )_validate_dynamic_scaling_rope_parameters  s   rx   c                 C   s6  | j }|d|dd }ddh}h d}t| }t|||||d |d }|d u s5t|tr5|dk r=td|  |d}|d urWt|trO|d	k rWtd
|  |d}	|	d urmt|	tsmtd|	  |d}
|
d urt|
tstd|
  |	pd|
pdk rtd|	 d|
 d d S d S )Nrc   rh   r'   >   r,   r.   r"   rp   r   rt   r"   r   L`rope_scaling`'s attention_factor field must be a float greater than 0, got r,   z6`rope_scaling`'s beta_fast field must be a float, got r.   z6`rope_scaling`'s beta_slow field must be a float, got r-   r   zO`rope_scaling`'s beta_fast field must be greater than beta_slow, got beta_fast=z( (defaults to 32 if None) and beta_slow=z (defaults to 1 if None)ru   )r   rg   r(   rc   re   rf   rd   r'   r"   r,   r.   r$   r$   r%   _validate_yarn_parameters  s6   


rz   c                 C   s  | j }|d|dd }h d}h d}t| }t|||||d t| dr,| jnd}t| d| j| j	 }t
|| }	|d	}
t|
tsYtd
d |
D rYtd|
  t|
|	d ksptd|	d  dt|
  |d}t|tstdd |D rtd|  t||	d kstd|	d  dt|  t| drtd d S |d}|d u rtd nt|tr|dk rtd|  |d}|d urt|tr|dk rtd|  d S d S d S )Nrc   rh   >   rc   rI   rJ   >   r'   r"   rK   rp   r   r   r   rJ   c                 s       | ]
}t |ttfV  qd S Nrv   r   r   .0xr$   r$   r%   	<genexpr>      z0_validate_longrope_parameters.<locals>.<genexpr>zC`rope_scaling`'s short_factor field must be a list of numbers, got r   z5`rope_scaling`'s short_factor field must have length z, got rI   c                 s   r{   r|   r}   r~   r$   r$   r%   r     r   zB`rope_scaling`'s long_factor field must be a list of numbers, got z4`rope_scaling`'s long_factor field must have length rK   aY  This model has set a `original_max_position_embeddings` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_scaling`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.r'   z1Missing required keys in `rope_scaling`: 'factor'rt   r"   g        ry   )r(   rC   rq   rr   ro   r   r   r   r   r   r   rv   listallrk   rl   r   warning_oncer   )r   rg   r(   rc   re   rf   rd   r   r   r   rJ   rI   r'   r"   r$   r$   r%   _validate_longrope_parameters  sH   




r   c           
      C   s6  | j }|d|dd }h d}t| }t||||d |d }|d u s0t|tr0|dk r8td|  |d }|d	 }|d u sIt|tsQtd
|  |d u sZt|tsbtd|  ||krqtd| d|  |d }	|	d u s~t|	t	std|	  |	| j
krtd|	 d| j
  d S d S )Nrc   rh   >   r'   rc   rQ   rR   rK   rp   r'   r   rt   rQ   rR   z<`rope_scaling`'s low_freq_factor field must be a float, got z=`rope_scaling`'s high_freq_factor field must be a float, got zc`rope_scaling`'s high_freq_factor field must be greater than low_freq_factor, got high_freq_factor=z and low_freq_factor=rK   zP`rope_scaling`'s original_max_position_embeddings field must be an integer, got zg`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got z and max_position_embeddings=)r(   rC   rq   rr   ro   rv   r   rk   rl   r   r*   )
r   rg   r(   rc   re   rd   r'   rQ   rR   rK   r$   r$   r%   _validate_llama3_parameters  sL   
r   c                 C   sd   t | dd}|du rdS |d|dd}t|}|dur'|| |d dS td| d dS )	zO
    Validate the RoPE config arguments, given a `PretrainedConfig` object
    r(   Nrc   rh   r]   rp   zTMissing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='')r   rC   ROPE_VALIDATION_FUNCTIONSrk   rl   )r   rg   r(   rc   validation_fnr$   r$   r%   rope_config_validation(  s   

r   )NNNr|   )NN)!r/   typingr   configuration_utilsr   utilsr   r   
get_logger__name__rk   r   r   tupler   r&   r)   r+   rH   rP   r\   ROPE_INIT_FUNCTIONSstrrq   ro   rs   rw   rx   rz   r   r   r   r   r$   r$   r$   r%   <module>   s   


+

*

4

P

@

/
2&
