o
    S"gF                     @  s  d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZmZ eeZerad dlmZ zd dlmZ W n	 eyL   Y nw z
d dlmZmZ W n	 ey`   Y nw 					d,d-ddZ					d,d.ddZ									d/d0d#d$Z						%d1d2d*d+ZdS )3    )annotationsN)Path)TYPE_CHECKINGCallableLiteral)disable_datasets_cachingis_datasets_availableSentenceTransformer)OVQuantizationConfig)OptimizationConfigQuantizationConfigFmodelr
   optimization_config4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']model_name_or_pathstrpush_to_hubbool	create_prfile_suffix
str | NonereturnNonec              
     s   ddl m} ddlm} zddlm}m}	 ddlm}
 W n t	y'   t	dw t
| |r@t| r@t
| d |r@t
| d j|sDtd| d j}|	|t
trf|
jvr\td p_ t|
  d	u rld
 t fddd||| dd d	S )a  
    Export an optimized ONNX model from a SentenceTransformer model.

    The O1-O4 optimization levels are defined by Optimum and are documented here:
    https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

    The optimization levels are:

    - O1: basic general optimizations.
    - O2: basic and extended general optimizations, transformers-specific fusions.
    - O3: same as O2 with GELU approximation.
    - O4: same as O3 with mixed precision (fp16, GPU-only)

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be optimized. Must be loaded with `backend="onnx"`.
        optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
        push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="onnx"`.
        ValueError: If the provided optimization_config is not valid.

    Returns:
        None
    r   r	   Transformer)ORTModelForFeatureExtractionORTOptimizer)AutoOptimizationConfigPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`]The model must be a Transformer-based SentenceTransformer model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.N	optimizedc                      j |  dS N)r   )optimizesave_dirr   r   	optimizer _/mnt/skqttb/ctump_chatbot/chatbot/lib/python3.10/site-packages/sentence_transformers/backend.py<lambda>j       z-export_optimized_onnx_model.<locals>.<lambda>export_optimized_onnx_modelonnxexport_functionexport_function_nameconfigr   r   r   r   backend)sentence_transformersr
   (sentence_transformers.models.Transformerr   optimum.onnxruntimer   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelen
auto_model
ValueErrorfrom_pretrainedr   _LEVELSgetattrsave_or_push_to_hub_model)r   r   r   r   r   r   r
   r   r   r   r   	ort_modelr)   r'   r*   r-      sT   '




r-   quantization_configFQuantizationConfig | Literal['arm64', 'avx2', 'avx512', 'avx512_vnni']c              
     s.  ddl m} ddlm} zddlm}m}	 ddlm}
 W n t	y'   t	dw t
| |r@t| r@t
| d |r@t
| d j|sDtd| d j}|	|t
trvdvr[td	d
d
 }t|
dd pujj  d|   d
u rjj  d t fddd||| dd d
S )a  
    Export a quantized ONNX model from a SentenceTransformer model.

    This function applies dynamic quantization, i.e. without a calibration dataset.
    Each of the default quantization configurations quantize the model to int8, allowing
    for faster inference on CPUs, but are likely slower on GPUs.

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be quantized. Must be loaded with `backend="onnx"`.
        quantization_config (QuantizationConfig): The quantization configuration.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the quantized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="onnx"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r	   r   )r   ORTQuantizer)AutoQuantizationConfigr   r    )arm64avx2avx512avx512_vnnizqquantization_config must be an QuantizationConfig instance or one of 'arm64', 'avx2', 'avx512', or 'avx512_vnni'.NF)	is_static_
_quantizedc                   r"   r#   quantizer%   r   rB   	quantizerr)   r*   r+      r,   z5export_dynamic_quantized_onnx_model.<locals>.<lambda>#export_dynamic_quantized_onnx_modelr.   r/   )r4   r
   r5   r   r6   r   rD   r7   rE   r8   r9   r:   r;   r<   r=   r   r?   weights_dtypenamelowerr@   )r   rB   r   r   r   r   r
   r   r   rD   rE   rA   quantization_config_namer)   rO   r*   rQ   u   sV   !



rQ   qint8_quantized"OVQuantizationConfig | dict | Nonedataset_namedataset_config_namedataset_splitcolumn_namec
              
     s  ddl m}
 ddlm} zddlm}m}m}m} W n t	y%   t	dw t
 s-t	dt|
rEtrEtd |rEtd j|sItd|du rP| }d j}||d	||td
d |||fD r}td
d |||fD s}tdfdd|dur|nd}|dur|nd}|dur|nd}durndt  j||fdd|dur|jnd|d W d   n1 sw   Y  t fddd|||||	dd dS )a  
    Export a quantized OpenVINO model from a SentenceTransformer model.

    This function applies Post-Training Static Quantization (PTQ) using a calibration dataset, which calibrates
    quantization constants without requiring model retraining. Each default quantization configuration converts
    the model to int8 precision, enabling faster inference while maintaining accuracy.

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be quantized. Must be loaded with `backend="openvino"`.
        quantization_config (OVQuantizationConfig | dict | None): The quantization configuration. If None, default values are used.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        dataset_name(str, optional): The name of the dataset to load for calibration.
            If not specified, the `sst2` subset of the `glue` dataset will be used by default.
        dataset_config_name (str, optional): The specific configuration of the dataset to load.
        dataset_split (str, optional): The split of the dataset to load (e.g., 'train', 'test'). Defaults to None.
        column_name (str, optional): The column name in the dataset to use for calibration. Defaults to None.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str, optional): The suffix to add to the quantized model file name. Defaults to `qint8_quantized`.

    Raises:
        ImportError: If the required packages `optimum` and `openvino` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="openvino"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r	   r   )OVConfigOVModelForFeatureExtractionr   OVQuantizerzPlease install datasets, optimum-intel and openvino to use this function. You can install them with pip: `pip install datasets optimum[openvino]`zaPlease install datasets to use this function. You can install it with pip: `pip install datasets`zaThe model must be a Transformer-based SentenceTransformer model loaded with `backend="openvino"`.N)rB   c                 s  s    | ]}|d uV  qd S Nr)   ).0paramr)   r)   r*   	<genexpr>  s    z9export_static_quantized_openvino_model.<locals>.<genexpr>zEither specify all of `dataset_name`, `dataset_config_name`, `dataset_split`, and `column_name`, or leave them all unspecified.c                   s    j | ddddS )N
max_lengthi  T)paddingrc   
truncation)	tokenizerexamples)r   r)   r*   preprocess_function  s   zCexport_static_quantized_openvino_model.<locals>.preprocess_functiongluesst2trainsentencec                   s   |   S r_   r)   rg   )r[   ri   r)   r*   r+   %  s    z8export_static_quantized_openvino_model.<locals>.<lambda>i,  )rX   rY   ri   num_samplesrZ   c                   s   j  | dS )N)save_directory	ov_configrM   r%   )calibration_datasetrp   rP   r)   r*   r+   +  s    &export_static_quantized_openvino_modelopenvinor/   )r4   r
   r5   r   optimum.intelr\   r]   r   r^   r8   r   r9   r:   r;   r<   r=   anyallr   get_calibration_datasetrn   r@   )r   rB   r   rX   rY   rZ   r[   r   r   r   r
   r   r\   r]   r   r^   ov_modelr)   )rq   r[   r   rp   ri   rP   r*   rr      sv   *


"
	
rr   r.   r0   r   r1   r3   c                 C  s  |dkrd| d}n
|dkrd| d}t  }	| |	 |dkrCt|	| }	t|	d |	|  t|	d |	| d	 |	 }	|dkrht|	| }
|
jd
d
d t|	| }|
| }t|| |
 }	|rd}|rt|	dd	dd	dd}d| d| d| d| d| d| d}t
j|	||dd| d|||d nIt|| }
|
jd
d
d t|	| }|
| }t|| |dkrt|	| d	}t|
| d	}t|| W d    d S W d    d S W d    d S 1 sw   Y  d S )Nr.   model_z.onnxrs   openvino_model_z.xmlzopenvino_model.xmlzopenvino_model.binz.binT)parentsexist_ok (z(
	z, z,
	)z
)zGHello!

*This pull request has been automatically generated from the [`zT`](https://sbert.net/docs/package_reference/util.html#sentence_transformers.backend.zI) function from the Sentence Transformers library.*

## Config
```python
a  
```

## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import SentenceTransformer

# TODO: Fill in the PR number
pr_number = 2
model = SentenceTransformer(
    "z5",
    revision=f"refs/pr/{pr_number}",
    backend="z#",
    model_kwargs={"file_name": "a  "},
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)
```
r   zAdd exported z model )folder_pathpath_in_reporepo_id	repo_typecommit_messagecommit_descriptionr   )tempfileTemporaryDirectoryr   shutilmovewith_suffixas_posixmkdirreprreplacehuggingface_hubupload_foldercopy)r0   r1   r2   r   r   r   r   r3   	file_namer&   dst_dirsourcedestinationr   opt_config_string
bin_sourcebin_destinationr)   r)   r*   r@   8  st   

 !N:"r@   )FFN)r   r
   r   r   r   r   r   r   r   r   r   r   r   r   )r   r
   rB   rC   r   r   r   r   r   r   r   r   r   r   )NNNNFFrV   )r   r
   rB   rW   r   r   rX   r   rY   r   rZ   r   r[   r   r   r   r   r   r   r   r   r   )FFNr.   )r0   r   r1   r   r   r   r   r   r   r   r   r   r3   r   )
__future__r   loggingr   r   pathlibr   typingr   r   r   r   sentence_transformers.utilr   r   	getLogger__name__logger)sentence_transformers.SentenceTransformerr
   rt   r   r8   r7   r   r   r-   rQ   rr   r@   r)   r)   r)   r*   <module>   sT    
]Xt