
    Ng`                     ~    d dl mZmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ  G d de
          Zd	S )
    )AnyCallableDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)pre_init)Fieldc                   R   e Zd ZU dZdZeed<   eed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   eZeed
<    eg           Zee         ed<   	  ed          Zeed<   	  ed          Zeed<   	  ed          Zeed<   	  ed          Zeee                  ed<   	 edeeef         deeef         fd            Zedefd            ZdedefdZ	 	 ddedeee                  dee          dedef
dZ!	 	 ddedeee                  dee          dede"e#         f
dZ$dS )	ExLlamaV2a+  ExllamaV2 API.

    - working only with GPTQ models for now.
    - Lora models are not supported yet.

    To use, you should have the exllamav2 library installed, and provide the
    path to the Llama model as a named parameter to the constructor.
    Check out:

    Example:
        .. code-block:: python

            from langchain_community.llms import Exllamav2

            llm = Exllamav2(model_path="/path/to/llama/model")

    #TODO:
    - Add loras support
    - Add support for custom settings
    - Add support for custom stop sequences
    Nclient
model_pathexllama_cacheconfig	generator	tokenizersettingslogfuncstop_sequences   max_new_tokensT	streamingverbosedisallowed_tokensvaluesreturnc                    	 dd l }n"# t          $ r}t          d          |d }~ww xY w|j                                        st	          d          	 ddlm}m}m}m	} ddl
m}m}	 n# t          $ r t          d          w xY w|d         }
|
sd |d	<   |d	         }|d
         r|d
         } ||j                   nt          d           |            }|d         |_        |                                  ||          } ||d          }|                    |            ||          }|d         r |	|||          }n ||||          }d |d         D             |d<   t%          |d|d                     |d|d                     |                    d          }|r|                    ||           ||d<   ||d<   ||d<   ||d<   ||d<   |S )Nr   z@Unable to import torch, please install with `pip install torch`.z/CUDA is not available. ExllamaV2 requires CUDA.)r   ExLlamaV2CacheExLlamaV2ConfigExLlamaV2Tokenizer)ExLlamaV2BaseGeneratorExLlamaV2StreamingGeneratorzCould not import exllamav2 library. Please install the exllamav2 library with (cuda 12.1 is required)example : !python -m pip install https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whlr   c                      d S )N )argskwargss     ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/llms/exllamav2.py<lambda>z0ExLlamaV2.validate_environment.<locals>.<lambda>_   s         r   r   z<settings is required. Custom settings are not supported yet.r   T)lazyr   c                 Z    g | ](}|                                                                 )S r'   )striplower).0xs     r*   
<listcomp>z2ExLlamaV2.validate_environment.<locals>.<listcomp>z   s*    #X#X#X!AGGIIOO$5$5#X#X#Xr,   r   zstop_sequences r   r   r   r   r   r   )torchImportErrorcudais_availableEnvironmentError	exllamav2r   r!   r"   r#   exllamav2.generatorr$   r%   __dict__NotImplementedError	model_dirprepareload_autosplitsetattrgetdisallow_tokens)clsr   r4   er   r!   r"   r#   r$   r%   r   r   r   r   modelr   r   r   
disalloweds                      r*   validate_environmentzExLlamaV2.validate_environment>   s   	LLLL 	 	 	R 	
 z&&(( 	V"#TUUU	                     	 	 	^  	 # 	= < <F9#* 	j)HGH%&&&&%N   !""!,/	&!!&u4888]+++&&v..	+ 	P33E=)TTII..umYOOI $Y#XvFV?W#X#X#X *F3C,DEEE<&)9":<<===ZZ 344
 	<$$Y
;;; x'{!x'{"/s    
&!&A' 'Bc                     dS )zReturn type of llm.r   r'   )selfs    r*   	_llm_typezExLlamaV2._llm_type   s	     {r,   textc                 @    | j         j                            |          S )z-Get the number of tokens present in the text.)r   r   
num_tokens)rI   rK   s     r*   get_num_tokenszExLlamaV2.get_num_tokens   s    ~'224888r,   promptstoprun_managerr)   c                     | j         }| j        r2d}|                     ||||          D ]}|t          |          z  }|S |                    || j        | j                  }|t          |          d          }|S )N )rO   rP   rQ   r)   )rO   gen_settingsrM   )r   r   _streamstrgenerate_simpler   r   len)	rI   rO   rP   rQ   r)   r   combined_text_outputchunkoutputs	            r*   _callzExLlamaV2._call   s     N	> 	#% Dk& &   3 3 %E

2$$''..!]. /  F CKKMM*FMr,   c              +     K   | j                             |          }| j                                         | j                            g            | j                            || j                   d}	 | j                                        \  }}}	|dz  }|r|                    || j	                   |V  |s|| j
        k    rnSd S )Nr   T   )tokenr   )r   encoder   warmupset_stop_conditionsbegin_streamr   streamon_llm_new_tokenr   r   )
rI   rO   rP   rQ   r)   	input_idsgenerated_tokensrZ   eos_s
             r*   rU   zExLlamaV2._stream   s       N))&11	**2...##It}===	 N1133ME3! ,, L -    KKK &$*===	 	r,   )NN)%__name__
__module____qualname____doc__r   r   __annotations__rV   r   r   r   r   r   printr   r   r   r   r   r   intr   boolr   r   r   r   r   rG   propertyrJ   rN   r	   r\   r   r   rU   r'   r,   r*   r   r   
   s         , FCOOOM3FCIsIs Hc GX %b		NDI)))=%**NC$$$/eDkkIt!!!8E$KKGT- .3U4[[xS	*8887I$sCx. IT#s(^ I I I XIV 3    X93 93 9 9 9 9 %):>	  tCy! 67	
  
   : %):>	  tCy! 67	
  
/	"     r,   r   N)typingr   r   r   r   r   r   langchain_core.callbacksr	   langchain_core.language_modelsr
   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r'   r,   r*   <module>ry      s    @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ = = = = = = . . . . . . 2 2 2 2 2 2 ) ) ) ) ) )      ~ ~ ~ ~ ~ ~ ~ ~ ~ ~r,   