
    NgF                    ,   d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZmZ  ej        e          Zer.d dlmZ 	 d dlmZ n# e$ r Y nw xY w	 d dlmZmZ n# e$ r Y nw xY w	 	 	 d(d)dZ	 	 	 d(d*dZ	 	 	 	 	 	 	 d+d,d!Z	 	 	 	 d-d.d'ZdS )/    )annotationsN)Path)TYPE_CHECKINGCallableLiteral)disable_datasets_cachingis_datasets_availableSentenceTransformer)OVQuantizationConfig)OptimizationConfigQuantizationConfigFmodelr   optimization_config4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']model_name_or_pathstrpush_to_hubbool	create_prfile_suffix
str | NonereturnNonec           
     R   ddl m} ddlm} 	 ddlm}m}	 ddlm}
 n# t          $ r t          d          w xY wt          | |          r@t          |           r1t          | d         |          rt          | d         j        |          st          d          | d         j        }|	                    |          t          t                    r4|
j        vrt          d          p t#          |
                      d
t%          fdd|||d           d	S )a  
    Export an optimized ONNX model from a SentenceTransformer model.

    The O1-O4 optimization levels are defined by Optimum and are documented here:
    https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

    The optimization levels are:

    - O1: basic general optimizations.
    - O2: basic and extended general optimizations, transformers-specific fusions.
    - O3: same as O2 with GELU approximation.
    - O4: same as O3 with mixed precision (fp16, GPU-only)

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be optimized. Must be loaded with `backend="onnx"`.
        optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
        push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="onnx"`.
        ValueError: If the provided optimization_config is not valid.

    Returns:
        None
    r   r
   Transformer)ORTModelForFeatureExtractionORTOptimizer)AutoOptimizationConfigPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`]The model must be a Transformer-based SentenceTransformer model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.N	optimizedc                4                         |           S N)r   )optimize)save_dirr   r   	optimizers    Y/var/www/html/ai-engine/env/lib/python3.11/site-packages/sentence_transformers/backend.py<lambda>z-export_optimized_onnx_model.<locals>.<lambda>j       ););<OQYgr);)s)s     export_optimized_onnx_modelonnxexport_functionexport_function_nameconfigr   r   r   r   backend)sentence_transformersr   (sentence_transformers.models.Transformerr   optimum.onnxruntimer   r   !optimum.onnxruntime.configurationr    ImportError
isinstancelen
auto_model
ValueErrorfrom_pretrainedr   _LEVELSgetattrsave_or_push_to_hub_model)r   r   r   r   r   r   r   r   r   r   r    	ort_modelr(   s    `   `      @r)   r-   r-      s   N :99999DDDDDD
RRRRRRRRLLLLLLL 
 
 
8
 
 	

 u122
5zz
 %(K00
 %(-/KLL	
 k
 
 	
 /4Ah.AI,,Y77I%s++ U&<&DDDn   "8%8Rg&<>QRRTT!ssssss:"-	 	 	 	 	 	     :quantization_configFQuantizationConfig | Literal['arm64', 'avx2', 'avx512', 'avx512_vnni']c           
        ddl m} ddlm} 	 ddlm}m}	 ddlm}
 n# t          $ r t          d          w xY wt          | |          r@t          |           r1t          | d         |          rt          | d         j        |          st          d          | d         j        }|	                    |          t          t                    r\dvrt          d	          d
d
         } t!          |
          d          p"j        j                                         d| !j        j                                         dt)          fdd|||d           d
S )a  
    Export a quantized ONNX model from a SentenceTransformer model.

    This function applies dynamic quantization, i.e. without a calibration dataset.
    Each of the default quantization configurations quantize the model to int8, allowing
    for faster inference on CPUs, but are likely slower on GPUs.

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be quantized. Must be loaded with `backend="onnx"`.
        quantization_config (QuantizationConfig): The quantization configuration.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the quantized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="onnx"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r
   r   )r   ORTQuantizer)AutoQuantizationConfigr!   r"   )arm64avx2avx512avx512_vnnizqquantization_config must be an QuantizationConfig instance or one of 'arm64', 'avx2', 'avx512', or 'avx512_vnni'.NF)	is_static_
_quantizedc                4                         |           S r%   quantize)r'   r   rC   	quantizers    r)   r*   z5export_dynamic_quantized_onnx_model.<locals>.<lambda>   r+   r,   #export_dynamic_quantized_onnx_modelr.   r/   )r4   r   r5   r   r6   r   rF   r7   rG   r8   r9   r:   r;   r<   r=   r   r?   weights_dtypenamelowerr@   )r   rC   r   r   r   r   r   r   r   rF   rG   rA   quantization_config_namerR   s    `   `       @r)   rS   rS   u   s"   B :99999DDDDDD
RRRRRRRRLLLLLLL 
 
 
8
 
 	

 u122
5zz
 %(K00
 %(-/KLL	
 k
 
 	
 /4Ah.AI,,Y77I%s++ t&PPP D   $7qqq#9 Rg&<>QRR]bccc!s(;(I(N(T(T(V(V%s%sYq%s%s,:?EEGGSSSssssssB"-	 	 	 	 	 	rB   qint8_quantized"OVQuantizationConfig | dict | Nonedataset_namedataset_config_namedataset_splitcolumn_namec
           
         ddl m}
 ddlm} 	 ddlm}m}m}m} n# t          $ r t          d          w xY wt                      st          d          t           |
          r@t                     r1t           d         |          rt           d         j        |          st          d          |
 |            } d         j        } ||	          |                    |          t!          d
 |||fD                       r,t#          d |||fD                       st          d           fd||nd}||nd}||nd}ndt%                      5                      ||fd||j        nd|          ddd           n# 1 swxY w Y   t+          fdd|||||	d           dS )a  
    Export a quantized OpenVINO model from a SentenceTransformer model.

    This function applies Post-Training Static Quantization (PTQ) using a calibration dataset, which calibrates
    quantization constants without requiring model retraining. Each default quantization configuration converts
    the model to int8 precision, enabling faster inference while maintaining accuracy.

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be quantized. Must be loaded with `backend="openvino"`.
        quantization_config (OVQuantizationConfig | dict | None): The quantization configuration. If None, default values are used.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        dataset_name(str, optional): The name of the dataset to load for calibration.
            If not specified, the `sst2` subset of the `glue` dataset will be used by default.
        dataset_config_name (str, optional): The specific configuration of the dataset to load.
        dataset_split (str, optional): The split of the dataset to load (e.g., 'train', 'test'). Defaults to None.
        column_name (str, optional): The column name in the dataset to use for calibration. Defaults to None.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str, optional): The suffix to add to the quantized model file name. Defaults to `qint8_quantized`.

    Raises:
        ImportError: If the required packages `optimum` and `openvino` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="openvino"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r
   r   )OVConfigOVModelForFeatureExtractionr   OVQuantizerzPlease install datasets, optimum-intel and openvino to use this function. You can install them with pip: `pip install datasets optimum[openvino]`zaPlease install datasets to use this function. You can install it with pip: `pip install datasets`zaThe model must be a Transformer-based SentenceTransformer model loaded with `backend="openvino"`.N)rC   c              3     K   | ]}|d uV  	d S N .0params     r)   	<genexpr>z9export_static_quantized_openvino_model.<locals>.<genexpr>  s'      
j
j5
j
j
j
j
j
jr,   c              3     K   | ]}|d uV  	d S rc   rd   re   s     r)   rh   z9export_static_quantized_openvino_model.<locals>.<genexpr>  s@       w w#Tw w w w w wr,   zEither specify all of `dataset_name`, `dataset_config_name`, `dataset_split`, and `column_name`, or leave them all unspecified.c                6                         | ddd          S )N
max_lengthi  T)paddingrk   
truncation)	tokenizer)examplesr   s    r)   preprocess_functionzCexport_static_quantized_openvino_model.<locals>.preprocess_function  s    x#Z^___r,   gluesst2trainsentencec                &     |                    S rc   rd   )ro   r]   rp   s    r)   r*   z8export_static_quantized_openvino_model.<locals>.<lambda>%  s    1D1DXkEZ1[1[ r,   i,  )rZ   r[   rp   num_samplesr\   c                4                         |           S )N)save_directory	ov_configrP   )r'   calibration_datasetry   rR   s    r)   r*   z8export_static_quantized_openvino_model.<locals>.<lambda>+  s#    ););I *< *
 *
 r,   &export_static_quantized_openvino_modelopenvinor/   )r4   r   r5   r   optimum.intelr_   r`   r   ra   r8   r	   r9   r:   r;   r<   r=   anyallr   get_calibration_datasetrv   r@   )r   rC   r   rZ   r[   r\   r]   r   r   r   r   r   r_   r`   r   ra   ov_modelrz   ry   rp   rR   s   `     `          @@@@r)   r{   r{      sJ   T :99999DDDDDD
jjjjjjjjjjjjj 
 
 
V
 
 	


 !"" 
o
 
 	

 u122
5zz
 %(K00
 %(-/JKK	
 o
 
 	
 "2244,1!H,?H-@AAAI++H55I

j
j<9Lm]h*i
j
j
jjj 
sv w w(46I=Ze'fw w w t t 
  N
 
 	
` ` ` ` ` $0#;<<L1D1P--V\%2%>MMGM!,!8++jK	!	#	# 
 
'??% 3 [ [ [ [ [;N;Z+77`c' @ 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 F"-     s   ! ;((FF #F r.   r0   r   r1   r3   c                   |dk    rd| d}n|dk    rd| d}t          j                    5 }	 | |	           |dk    rot          |	          |z  }	t          j        |	dz  |	|z             t          j        |	dz  |	|z                      d	                     |	                                }	|dk    rit          |	          |z  }
|
                    d
d
           t          |	          |z  }|
|z  }t          j        ||           |
                                }	|rd}|r`t          |          	                    dd          	                    dd          	                    dd          }d| d| d| d| d| d| d}t          j        |	||dd| d|||           nt          |          |z  }
|
                    d
d
           t          |	          |z  }|
|z  }t          j        ||           |dk    r_t          |	          |z                      d	          }t          |
          |z                      d	          }t          j        ||           d d d            d S # 1 swxY w Y   d S )Nr.   model_z.onnxr|   openvino_model_z.xmlzopenvino_model.xmlzopenvino_model.binz.binT)parentsexist_ok (z(
	z, z,
	)z
)zGHello!

*This pull request has been automatically generated from the [`zT`](https://sbert.net/docs/package_reference/util.html#sentence_transformers.backend.zI) function from the Sentence Transformers library.*

## Config
```python
a  
```

## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import SentenceTransformer

# TODO: Fill in the PR number
pr_number = 2
model = SentenceTransformer(
    "z5",
    revision=f"refs/pr/{pr_number}",
    backend="z#",
    model_kwargs={"file_name": "a  "},
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)
```
r   zAdd exported z model )folder_pathpath_in_reporepo_id	repo_typecommit_messagecommit_descriptionr   )tempfileTemporaryDirectoryr   shutilmovewith_suffixas_posixmkdirreprreplacehuggingface_hubupload_foldercopy)r0   r1   r2   r   r   r   r   r3   	file_namer'   dst_dirsourcedestinationr   opt_config_string
bin_sourcebin_destinations                    r)   r@   r@   8  sp    &/[///			J		7k777			$	&	& Q9(!!! j  H~~/HK#77I9MNNNK#77(Y:N9[9[\b9c9cddd((**H f8nnw.GMM$M666(^^i/F!I-KK,,,''))H <	9!# !$(LL$8$8g$F$F$N$NtU\$]$]$e$efikp$q$q!& AU& & k& & & &$ %& &( )& &* #,+& & &"B )$$*!LwLLyLL#5#     -..8GMM$M666(^^i/F!I-KK,,, *$$"8nny8EEfMM
#'==9#<"I"I&"Q"QJ888cQ9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9 Q9s   H2I,,I03I0)FFN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   rC   rD   r   r   r   r   r   r   r   r   r   r   )NNNNFFrX   )r   r   rC   rY   r   r   rZ   r   r[   r   r\   r   r]   r   r   r   r   r   r   r   r   r   )FFNr.   )r0   r   r1   r   r   r   r   r   r   r   r   r   r3   r   )
__future__r   loggingr   r   pathlibr   typingr   r   r   r   sentence_transformers.utilr   r	   	getLogger__name__logger)sentence_transformers.SentenceTransformerr   r}   r   r8   r7   r   r   r-   rS   r{   r@   rd   r,   r)   <module>r      s   " " " " " "          3 3 3 3 3 3 3 3 3 3     V V V V V V V V		8	$	$ 
MMMMMM6666666   \\\\\\\\\    "V V V V Vz "Q Q Q Q Qp  $&* $"(l l l l lh "`9 `9 `9 `9 `9 `9 `9s$   A AAA$ $A,+A,