
    Ngg$                         d dl mZ d dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ  G d d	ee          Z G d
 de          Z G d de
          ZdS )    )Enum)AnyIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)	BaseModel
ConfigDict)enforce_stop_tokensc                       e Zd ZdZdZdZdS )Devicez,The device to use for inference, cuda or cpucudacpuN)__name__
__module____qualname____doc__r   r        b/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/llms/titan_takeoff.pyr   r      s        66D
CCCr   r   c                       e Zd ZU dZ ed          Zeed<   	 ej	        Z
eed<   	 dZeed<   	 dZee         ed	<   	 d
Zeed<   	 dZeed<   dS )ReaderConfigzAConfiguration for the reader to be deployed in Titan Takeoff API.r   )protected_namespaces
model_namedeviceprimaryconsumer_groupNtensor_paralleli   max_seq_length   max_batch_size)r   r   r   r   r   model_configstr__annotations__r   r   r   r   r    r   intr!   r#   r   r   r   r   r      s         KK:  L OOO&[FF   6#NC###5%)OXc])))INCKNC@@r   r   c                   V    e Zd ZU dZdZeed<   	 dZeed<   	 dZ	eed<   	 dZ
eed	<   	 d
Zeed<   	 ddddg fdededed	edee         f
 fdZedefd            Z	 	 ddedeee                  dee         dedef
dZ	 	 ddedeee                  dee         dedee         f
dZ xZS )TitanTakeoffa  Titan Takeoff API LLMs.

    Titan Takeoff is a wrapper to interface with Takeoff Inference API for
    generative text to text language models.

    You can use this wrapper to send requests to a generative language model
    and to deploy readers with Takeoff.

    Examples:
        This is an example how to deploy a generative language model and send
        requests.

        .. code-block:: python
            # Import the TitanTakeoff class from community package
            import time
            from langchain_community.llms import TitanTakeoff

            # Specify the embedding reader you'd like to deploy
            reader_1 = {
                "model_name": "TheBloke/Llama-2-7b-Chat-AWQ",
                "device": "cuda",
                "tensor_parallel": 1,
                "consumer_group": "llama"
            }

            # For every reader you pass into models arg Takeoff will spin
            # up a reader according to the specs you provide. If you don't
            # specify the arg no models are spun up and it assumes you have
            # already done this separately.
            llm = TitanTakeoff(models=[reader_1])

            # Wait for the reader to be deployed, time needed depends on the
            # model size and your internet speed
            time.sleep(60)

            # Returns the query, ie a List[float], sent to `llama` consumer group
            # where we just spun up the Llama 7B model
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama"
            ))

            # You can also send generation parameters to the model, any of the
            # following can be passed in as kwargs:
            # https://docs.titanml.co/docs/next/apis/Takeoff%20inference_REST_API/generate#request
            # for instance:
            print(embed.invoke(
                "Where can I see football?", consumer_group="llama", max_new_tokens=100
            ))
    zhttp://localhostbase_urli  porti  	mgmt_portF	streamingNclientmodelsc                    t                                          ||||           	 ddlm} n# t          $ r t	          d          w xY w || j        | j        | j                  | _        |D ]}| j        	                    |           dS )a  Initialize the Titan Takeoff language wrapper.

        Args:
            base_url (str, optional): The base URL where the Takeoff
                Inference Server is listening. Defaults to `http://localhost`.
            port (int, optional): What port is Takeoff Inference API
                listening on. Defaults to 3000.
            mgmt_port (int, optional): What port is Takeoff Management API
                listening on. Defaults to 3001.
            streaming (bool, optional): Whether you want to by default use the
                generate_stream endpoint over generate to stream responses.
                Defaults to False. In reality, this is not significantly different
                as the streamed response is buffered and returned similar to the
                non-streamed response, but the run manager is applied per token
                generated.
            models (List[ReaderConfig], optional): Any readers you'd like to
                spin up on. Defaults to [].

        Raises:
            ImportError: If you haven't installed takeoff-client, you will
            get an ImportError. To remedy run `pip install 'takeoff-client==0.4.0'`
        )r*   r+   r,   r-   r   )TakeoffClientzjtakeoff-client is required for TitanTakeoff. Please install it with `pip install 'takeoff-client>=0.4.0'`.)r+   r,   N)
super__init__takeoff_clientr1   ImportErrorr*   r+   r,   r.   create_reader)	selfr*   r+   r,   r-   r/   r1   model	__class__s	           r   r3   zTitanTakeoff.__init__o   s    < 	DI 	 	
 	
 	
	4444444 	 	 	P  	
 $mM	T^
 
 
  	- 	-EK%%e,,,,	- 	-s	   / A	returnc                     dS )zReturn type of llm.titan_takeoffr   )r7   s    r   	_llm_typezTitanTakeoff._llm_type   s	     r   promptstoprun_managerkwargsc                     | j         r)d}|                     |||          D ]}||j        z  }|S  | j        j        |fi |}|d         }|t          ||          }|S )a  Call out to Titan Takeoff (Pro) generate endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Returns:
            The string generated by the model.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"

                # Use of model(prompt), ie `__call__` was deprecated in LangChain 0.1.7,
                # use model.invoke(prompt) instead.
                response = model.invoke(prompt)

         )r>   r?   r@   text)r-   _streamrD   r.   generater   )	r7   r>   r?   r@   rA   text_outputchunkresponserD   s	            r   _callzTitanTakeoff._call   s    : > 	K' &   * *
 uz)'4;'99&99&tT22Dr   c              +   .  K    | j         j        |fi |}d}|D ]}||j        z  }d|v r|                    d          rd}t	          |                    dd                    dk    r.|                    dd          \  }}	|                    d          }|r3t          |          }
d}|r|                    |
j	                   |
V  |rGt          |
                    dd                    }
|r|                    |
j	                   |
V  d	S d	S )
a  Call out to Titan Takeoff (Pro) stream endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager: Optional callback manager to use when streaming.

        Yields:
            A dictionary like object containing a string token.

        Example:
            .. code-block:: python

                model = TitanTakeoff()

                prompt = "What is the capital of the United Kingdom?"
                response = model.stream(prompt)

                # OR

                model = TitanTakeoff(streaming=True)

                response = model.invoke(prompt)

        rC   zdata:      
)rD   )tokenz</s>N)r.   generate_streamdata
startswithlensplitrstripr
   on_llm_new_tokenrD   replace)r7   r>   r?   r@   rA   rI   bufferrD   content_rH   s              r   rE   zTitanTakeoff._stream   s]     @ /4;.v@@@@ 	  	 DdiF&  $$W--  Fv||GQ//00A55!'gq!9!9JGQ$^^D11F  +888EF" G#445:4FFFKKK  	#)C)CDDDE ?,,5:,>>>KKKKK		 	r   )NN)r   r   r   r   r*   r%   r&   r+   r'   r,   r-   boolr.   r   r   r   r3   propertyr=   r   r   rJ   r   r
   rE   __classcell__)r9   s   @r   r)   r)   -   s        0 0d 'Hc&&&WD#EIsPIt8FCE +%',- ,-,- ,- 	,-
 ,- \",- ,- ,- ,- ,- ,-\ 3    X %):>	, ,, tCy!, 67	,
 , 
, , , ,b %):>	8 88 tCy!8 67	8
 8 
/	"8 8 8 8 8 8 8 8r   r)   N)enumr   typingr   r   r   r   langchain_core.callbacksr   #langchain_core.language_models.llmsr	   langchain_core.outputsr
   pydanticr   r   langchain_community.llms.utilsr   r%   r   r   r)   r   r   r   <module>re      s7         0 0 0 0 0 0 0 0 0 0 0 0 = = = = = = 3 3 3 3 3 3 2 2 2 2 2 2 * * * * * * * * > > > > > >    S$   A A A A A9 A A A4[ [ [ [ [3 [ [ [ [ [r   