
    Ng!                         d dl Zd dlZd dlZd dlmZmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ  ej        e          Zddded	ed
ede	ee                  dedefdZdededefdZ G d de          ZdS )    N)AnyCallableListMappingOptional)CallbackManagerForLLMRun)LLM)
ConfigDictenforce_stop_tokens)stoppipelinepromptargsr   kwargsreturnc                B     | |g|R i |}|t          ||          }|S )zInference function to send to the remote hardware.

    Accepts a pipeline callable (or, more likely,
    a key pointing to the model on the cluster's object store)
    and returns text predictions for each document
    in the batch.
    r   )r   r   r   r   r   texts         `/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/llms/self_hosted.py_generate_textr      s=     8F,T,,,V,,D"4..K    devicec                 .   t          | t                    r<t          | d          5 }t          j        |          } ddd           n# 1 swxY w Y   t
          j                            d          ddl}|j	        
                                }|dk     s||k    rt          d| d| d          |dk     r!|dk    rt                              d	|           |                    |          | _        | j                            | j                  | _        | S )
z+Send a pipeline to a device on the cluster.rbNtorchr   zGot device==z', device is required to be within [-1, )zDevice has %d GPUs available. Provide device={deviceId} to `from_model_id` to use availableGPUs for execution. deviceId is -1 for CPU and can be a positive integer associated with CUDA device id.)
isinstancestropenpickleload	importlibutil	find_specr   cudadevice_count
ValueErrorloggerwarningr   modelto)r   r   fr   cuda_device_counts        r   _send_pipeline_to_devicer/   #   ss   (C   &(D!! 	&Q {1~~H	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&
 ~((4!J3355B;;6%666Mv M M8IM M M   A::+a//NNL "    ,,v..!**8?;;Os   AAAc                       e Zd ZU dZdZeed<   dZeed<   eZ	e
ed<   	 dZeed<   	 e
ed<   	 dZee         ed<   	 d	d
gZee         ed<   	 dZeed<   	  ed          Zdef fdZe	 	 ddededeee                  dededefd            Zedeeef         fd            Zedefd            Z	 	 ddedeee                  dee         dedef
dZ  xZ!S )SelfHostedPipelinea	  Model inference on self-hosted remote hardware.

    Supported hardware includes auto-launched instances on AWS, GCP, Azure,
    and Lambda, as well as servers specified
    by IP address and SSH credentials (such as on-prem, or another
    cloud like Paperspace, Coreweave, etc.).

    To use, you should have the ``runhouse`` python package installed.

    Example for custom pipeline and inference functions:
        .. code-block:: python

            from langchain_community.llms import SelfHostedPipeline
            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
            import runhouse as rh

            def load_pipeline():
                tokenizer = AutoTokenizer.from_pretrained("gpt2")
                model = AutoModelForCausalLM.from_pretrained("gpt2")
                return pipeline(
                    "text-generation", model=model, tokenizer=tokenizer,
                    max_new_tokens=10
                )
            def inference_fn(pipeline, prompt, stop = None):
                return pipeline(prompt)[0]["generated_text"]

            gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
            llm = SelfHostedPipeline(
                model_load_fn=load_pipeline,
                hardware=gpu,
                model_reqs=model_reqs, inference_fn=inference_fn
            )
    Example for <2GB model (can be serialized and sent directly to the server):
        .. code-block:: python

            from langchain_community.llms import SelfHostedPipeline
            import runhouse as rh
            gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
            my_model = ...
            llm = SelfHostedPipeline.from_pipeline(
                pipeline=my_model,
                hardware=gpu,
                model_reqs=["./", "torch", "transformers"],
            )
    Example passing model path for larger models:
        .. code-block:: python

            from langchain_community.llms import SelfHostedPipeline
            import runhouse as rh
            import pickle
            from transformers import pipeline

            generator = pipeline(model="gpt2")
            rh.blob(pickle.dumps(generator), path="models/pipeline.pkl"
                ).save().to(gpu, path="models")
            llm = SelfHostedPipeline.from_pipeline(
                pipeline="models/pipeline.pkl",
                hardware=gpu,
                model_reqs=["./", "torch", "transformers"],
            )
    Npipeline_refclientinference_fnhardwaremodel_load_fnload_fn_kwargsz./r   
model_reqsFallow_dangerous_deserializationforbid)extrar   c                    |                     d          st          d           t                      j        di | 	 ddl}n# t
          $ r t          d          w xY w|                    | j                                      | j	        | j
                  }| j        pi } |j        di || _        |                    | j                                      | j	        | j
                  | _        dS )	zInit the pipeline with an auxiliary function.

        The load function must be in global scope to be imported
        and run on the server, i.e. in a module and not a REPL or closure.
        Then, initialize the remote inference function.
        r9   aQ  SelfHostedPipeline relies on the pickle module. You will need to set allow_dangerous_deserialization=True if you want to opt-in to allow deserialization of data using pickle.Data can be compromised by a malicious actor if not handled properly to include a malicious payload that when deserialized with pickle can execute arbitrary code. r   NzXCould not import runhouse python package. Please install it with `pip install runhouse`.)fn)reqs )getr(   super__init__runhouseImportErrorfunctionr6   r,   r5   r8   r7   remoter2   r4   r3   )selfr   rhremote_load_fn_load_fn_kwargs	__class__s        r   rB   zSelfHostedPipeline.__init__   s1    zz;<< 		6   	""6"""	!!!!! 	 	 	A  	 (:;;>>M ? 
 
 -31N1DDODDkkT%6k77::M ; 
 
s    A Ar   r   r   r   c                     t          |t                    st                              d           ||d} | d|t          |ddg|pg z   d|S )z=Init the SelfHostedPipeline from a pipeline object or string.zSerializing pipeline to send to remote hardware. Note, it can be quite slowto serialize and send large models with each execution. Consider sending the pipelineto the cluster and passing the path to the pipeline instead.)r   r   transformersr   )r7   r6   r5   r8   r?   )r   r   r)   r*   r/   )clsr   r5   r8   r   r   r7   s          r   from_pipelinez SelfHostedPipeline.from_pipeline   s     (C(( 	NNO   '/&AAs 
)2&0J4D"E	
 

 
 
 	
r   c                     i d| j         iS )zGet the identifying parameters.r5   )r5   rG   s    r   _identifying_paramsz&SelfHostedPipeline._identifying_params   s    
4=)
 	
r   c                     dS )Nself_hosted_llmr?   rQ   s    r   	_llm_typezSelfHostedPipeline._llm_type   s      r   r   r   run_managerc                 .     | j         d| j        ||d|S )N)r   r   r   r?   )r3   r2   )rG   r   r   rV   r   s        r   _callzSelfHostedPipeline._call   s6     t{ 
&vD
 
DJ
 
 	
r   )Nr   )NN)"__name__
__module____qualname____doc__r2   r   __annotations__r3   r   r4   r   r5   r7   r   dictr8   r   r   r9   boolr
   model_configrB   classmethodintr	   rO   propertyr   rR   rU   r   rX   __classcell__)rK   s   @r   r1   r1   B   s3        < <| L#FC+L(+++<Hc<<%)NHTN)))?!7OJS	+++E,1#T111 :  L#
 #
 #
 #
 #
 #
 #
J 
 +/
 

 
 T#Y'	

 
 
 

 
 
 [
6 
WS#X%6 
 
 
 X
 !3 ! ! ! X! %):>		
 	
	
 tCy!	
 67		

 	
 
	
 	
 	
 	
 	
 	
 	
 	
r   r1   )importlib.utilr#   loggingr!   typingr   r   r   r   r   langchain_core.callbacksr   #langchain_core.language_models.llmsr	   pydanticr
   langchain_community.llms.utilsr   	getLoggerrY   r)   r   r   rb   r/   r1   r?   r   r   <module>rm      sr         9 9 9 9 9 9 9 9 9 9 9 9 9 9 = = = = = = 3 3 3 3 3 3       > > > > > >		8	$	$ !%	    49
	
  	   (s C C    >j
 j
 j
 j
 j
 j
 j
 j
 j
 j
r   