
    Ng'                         d dl mZmZmZmZmZmZmZmZ d dl	m
Z
 d dlmZ erd dlmZmZ d dlmZ  G d de          ZdS )	    )TYPE_CHECKINGAnyDict	GeneratorListMappingOptionalUnion)CallbackManagerForLLMRun)LLM)RESTfulChatModelHandleRESTfulGenerateModelHandle)LlamaCppGenerateConfigc                       e Zd ZU dZeed<   ee         ed<   	 ee         ed<   	 eeef         ed<   	 	 	 ddee         dee         def fdZ	e
defd	            Ze
deeef         fd
            Z	 	 ddedeee                  dee         dedef
dZ	 	 dded         dedee         ded         deeddf         f
dZ xZS )
Xinferencea  `Xinference` large-scale model inference service.

    To use, you should have the xinference library installed:

    .. code-block:: bash

       pip install "xinference[all]"

    If you're simply using the services provided by Xinference, you can utilize the xinference_client package:

    .. code-block:: bash

        pip install xinference_client

    Check out: https://github.com/xorbitsai/inference
    To run, you need to start a Xinference supervisor on one server and Xinference workers on the other servers

    Example:
        To start a local instance of Xinference, run

        .. code-block:: bash

           $ xinference

        You can also deploy Xinference in a distributed cluster. Here are the steps:

        Starting the supervisor:

        .. code-block:: bash

           $ xinference-supervisor

        Starting the worker:

        .. code-block:: bash

           $ xinference-worker

    Then, launch a model using command line interface (CLI).

    Example:

    .. code-block:: bash

       $ xinference launch -n orca -s 3 -q q4_0

    It will return a model UID. Then, you can use Xinference with LangChain.

    Example:

    .. code-block:: python

        from langchain_community.llms import Xinference

        llm = Xinference(
            server_url="http://0.0.0.0:9997",
            model_uid = {model_uid} # replace model_uid with the model UID return from launching the model
        )

        llm.invoke(
            prompt="Q: where can we visit in the capital of France? A:",
            generate_config={"max_tokens": 1024, "stream": True},
        )

    To view all the supported builtin models, run:

    .. code-block:: bash

        $ xinference list --all

    client
server_url	model_uidmodel_kwargsNc                 F   	 ddl m} n:# t          $ r- 	 ddlm} n"# t          $ r}t          d          |d }~ww xY wY nw xY w|pi } t	                      j        di |||d | j        t          d          | j        t          d           ||          | _	        d S )Nr   )RESTfulClientzCould not import RESTfulClient from xinference. Please install it with `pip install xinference` or `pip install xinference_client`.r   r   r   zPlease provide server URLzPlease provide the model UID )
xinference.clientr   ImportErrorxinference_clientsuper__init__r   
ValueErrorr   r   )selfr   r   r   r   e	__class__s         _/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/llms/xinference.pyr   zXinference.__init__\   s   		7777777 	 	 	;;;;;;;   !Y   <;	 $)r 	
 	
(& , 	
 	
 	
 ?"8999>!;<<<#mJ//s)   
 
AA
;6;A Areturnc                     dS )zReturn type of llm.
xinferencer   r    s    r#   	_llm_typezXinference._llm_type   s	     |    c                 <    i d| j         id| j        id| j        iS )zGet the identifying parameters.r   r   r   r   r'   s    r#   _identifying_paramszXinference._identifying_params   s<    
T_-
DN+
 t01
 	
r)   promptstoprun_managerkwargsc                 ^   | j                             | j                  }|                    di           }i | j        |}|r||d<   |r:|                    d          r%d}|                     ||||          D ]}||z  }|S |                    ||          }	|	d         d         d	         S )
aq  Call the xinference model and return the output.

        Args:
            prompt: The prompt to use for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Returns:
            The generated string by the model.
        generate_configr-   stream )modelr,   r.   r1   r,   r1   choicesr   text)r   	get_modelr   getr   _stream_generategenerate)
r    r,   r-   r.   r/   r4   r1   combined_text_outputtoken
completions
             r#   _callzXinference._call   s    $ %%dn554:JJ?PRT4U4UBT.B/B 	+&*OF# 	4228<< 	4#% ..' /	 /   . . %-$$'' vWWJi(+F33r)   r4   )r   r   r1   r   c              #   r  K   |                     ||          }|D ]}t          |t                    r|                    dg           }|rk|d         }t          |t                    rN|                    dd          }	|                    d          }
|r|                    |	| j        |
           |	V  dS )	a^  
        Args:
            prompt: The prompt to use for generation.
            model: The model used for generation.
            stop: Optional list of stop words to use when generating.
            generate_config: Optional dictionary for the configuration used for
                generation.

        Yields:
            A string token.
        r5   r6   r   r7   r3   logprobs)r=   verbose	log_probsN)r;   
isinstancedictr9   on_llm_new_tokenrB   )r    r4   r,   r.   r1   streaming_responsechunkr6   choicer=   rC   s              r#   r:   zXinference._stream_generate   s      $ #^^? , 
 
 ( 	$ 	$E%&& $))Ir22 	$$QZF!&$// $ &

62 6 6$*JJz$:$:	& '88&+T\Y 9    $	$ 	$r)   )NN)__name__
__module____qualname____doc__r   __annotations__r	   strr   r   propertyr(   r   r+   r   r   r?   r
   r   r:   __classcell__)r"   s   @r#   r   r      s        F FP KKK&}#sCx.   : %)#'!0 !0SM!0 C=!0 	!0 !0 !0 !0 !0 !0F 3    X 
WS#X%6 
 
 
 X
 %):>	(4 (4(4 tCy!(4 67	(4
 (4 
(4 (4 (4 (4\ ;?>B!$ !$KL!$ !$ 67	!$
 "":;!$ 
3d?	#!$ !$ !$ !$ !$ !$ !$ !$r)   r   N)typingr   r   r   r   r   r   r	   r
   langchain_core.callbacksr   #langchain_core.language_models.llmsr   r   r   r   xinference.model.llm.corer   r   r   r)   r#   <module>rV      s    V V V V V V V V V V V V V V V V V V V V = = = = = = 3 3 3 3 3 3 ATTTTTTTT@@@@@@M$ M$ M$ M$ M$ M$ M$ M$ M$ M$r)   