
    Ng(                        d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ  G d
 de          ZdS )    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_names)
ConfigDictc                     e Zd ZU dZdZded<   	 dZded<   	 dZd	ed
<   	 dZded<   	 dZ	ded<   	 dZ
ded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded<   	 dZded <   	 d!Zd	ed"<   	 d#Zded$<   	 d#Zded%<   	 dZded&<   	 d'Zded(<   	 d)Zded*<   	  ed+,          Zed@d.            ZedAd0            ZedBd2            Z	 dCdDd7Z	 	 dEdFd;Z 	 	 dEdGd=Z!dHd?Z"dS )I	LlamafileaO  Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode:

        `./path/to/model.llamafile --server --nobrowser`

    Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile()
            llm.invoke("Tell me a joke.")
    zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etaforbid)extrareturnc                    dS )N	llamafile )selfs    ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/llms/llamafile.py	_llm_typezLlamafile._llm_typex   s    {    	List[str]c                P    g dfdt          | j                  D             }|S )N)r   cachecallback_manager	callbacksmetadatanamer   r   tagsverbosecustom_get_token_idsc                    g | ]}|v|	S r1   r1   ).0kignore_keyss     r3   
<listcomp>z/Llamafile._param_fieldnames.<locals>.<listcomp>   s*     
 
 
1KCWCWACWCWCWr5   )r   	__class__)r2   attrsrC   s     @r3   _param_fieldnameszLlamafile._param_fieldnames|   sM    
 
 

 
 
 
/??
 
 
 r5   Dict[str, Any]c                D    i }| j         D ]}t          | |          ||<   |S N)rG   getattr)r2   params	fieldnames      r3   _default_paramszLlamafile._default_params   s5    / 	9 	9I 'i 8 8F9r5   stopOptional[List[str]]kwargsr   c                    | j         }|                                D ]\  }}||v r|||<   |t          |          dk    r||d<   | j        rd|d<   |S )Nr   rO   Tstream)rN   itemslenr   )r2   rO   rQ   rL   rB   vs         r3   _get_parameterszLlamafile._get_parameters   sr     %
 LLNN 	 	DAqF{{q	D		A!F6N> 	$#F8r5   promptrun_manager"Optional[CallbackManagerForLLMRun]c                P   | j         rjt                      5 } | j        |f||d|D ]}|                    |j                   |                                }ddd           n# 1 swxY w Y   |S  | j        dd|i|}d|i|}		 t          j        | j	         dddi|	d| j
        	          }
n@# t          j        j        $ r) t          j                            d
| j	         d          w xY w|
                                 d|
_        |
                                d         }|S )a  Request prompt completion from the llamafile server and return the
        output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            The string generated by the model.

        )rO   rY   NrO   rX   /completionContent-Typeapplication/jsonFurlheadersjsonrS   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8contentr1   )r   r   _streamwritetextgetvaluerW   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingrb   )r2   rX   rO   rY   rQ   buffchunkri   rL   payloadresponses              r3   _callzLlamafile._call   s   , > $	 't)T\!%; BH  + +E JJuz****}}' ' ' ' ' ' ' ' ' ' ' ' ' ' ' K *T)>>t>v>>F262G#==555&(: !  0   &6   )99D37=D D D   %%''' 'H==??9-DKs   AA%%A),A)(B1 1=C.Iterator[GenerationChunk]c              +    K    | j         dd|i|}d|vrd|d<   d|i|}	 t          j        | j         dddi|d| j                  }n@# t          j        j        $ r) t          j                            d	| j         d
          w xY wd|_        |                    d          D ]H}| 	                    |          }	t          |	          }
|r|                    |
j                   |
V  IdS )a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rO   rS   TrX   r\   r]   r^   r_   rd   re   utf8)decode_unicode)ri   )tokenNr1   )rW   rk   rl   r   r   rm   rn   rp   
iter_lines_get_chunk_contentr   on_llm_new_tokenri   )r2   rX   rO   rY   rQ   rL   rs   rt   	raw_chunkrf   rr   s              r3   rg   zLlamafile._stream   sl     L &%::4:6::6!!#F8V.v.	}}111"$6 ,  HH "2 	 	 	%55@/3}@ @ @  	 #!,,D,AA 	 	I--i88G#111E ?,,5:,>>>KKKK	 	s   (A =Brr   c                    |                     d          r1|                    d          }t          j        |          }|d         S |S )zWhen streaming is turned on, llamafile server returns lines like:

        'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

        Here, we convert this to a dict and return the value of the 'content'
        field
        zdata:zdata: rf   )
startswithlstriprb   loads)r2   rr   cleaneddatas       r3   r|   zLlamafile._get_chunk_content1  sI     G$$ 	ll8,,G:g&&D	?"Lr5   )r.   r   )r.   r6   )r.   rH   rJ   )rO   rP   rQ   r   r.   rH   )NN)
rX   r   rO   rP   rY   rZ   rQ   r   r.   r   )
rX   r   rO   rP   rY   rZ   rQ   r   r.   rv   )rr   r   r.   r   )#__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r   r   r   r    r!   r"   r#   r%   r&   r'   r(   r)   r*   r+   r   model_configpropertyr4   rG   rN   rW   ru   rg   r|   r1   r5   r3   r   r      s         * ,H++++;%)O))))%IJ
 DNNNN# K#EOOOO E7 E@ I<
 FOOOO
 ENI   N M$ K "!!!!A"""""AH LGLF:  L    X    X.    X +/    . %):>	: : : : :~ %):>	D D D D DL     r5   r   )
__future__r   rb   ior   typingr   r   r   r   r	   rk    langchain_core.callbacks.managerr
   #langchain_core.language_models.llmsr   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r1   r5   r3   <module>r      s    " " " " " "        6 6 6 6 6 6 6 6 6 6 6 6 6 6  E E E E E E 3 3 3 3 3 3 2 2 2 2 2 2 9 9 9 9 9 9      p p p p p p p p p pr5   