
    g$                         d dl Z ddlmZ  e            rd dlmZmZ d dlmZ  G d de j        j	                  Z
	 	 dded	e j        d
e j        fdZdS )    N   )is_torch_available)PreTrainedModelStaticCache)"is_torch_greater_or_equal_than_2_3c            	            e Zd ZdZdef fdZdej        dej        fdZe	dej
        j        dej        d	ed
ej        fd            Z xZS )$TorchExportableModuleWithStaticCachea  
    A wrapper module designed to make a `PreTrainedModel` exportable with `torch.export`,
    specifically for use with static caching. This module ensures that the exported model
    is compatible with further lowering and execution in `ExecuTorch`.

    Note:
        This class is specifically designed to support export process using `torch.export`
        in a way that ensures the model can be further lowered and run efficiently in `ExecuTorch`.
    modelc                    t                                                       |j        t          d          |j        j        st          d          |j        j        dk    rt          d          || _        t          | j        j        | j        j        j	        j
        | j        j        j	        j        | j        j                  | _        t          d | j        j        j        D                       | _        | j        rat#          j        t#          j        | j        j        | j        j        t"          j                            }|                     d	|d
           dS dS )a  
        Initializes the wrapper module with the pretrained model.

        Args:
            model (`PreTrainedModel`): The pretrained model to wrap. The model must have caching
            enabled and use a 'static' caching implementation.

        Raises:
            AssertionError: If the pretrained model does not have caching enabled or if it does
            not use a 'static' caching implementation in `model.generation_config`.
        NzkThe model must have a generation config to be exported with static caching. Please set `generation_config`.zvThe model must have caching enabled to be exported with static caching. Please set `generation_config.use_cache=True`.staticzThe model must use a 'static' caching implementation to be exported with static caching. Please set `generation_config.cache_implementation='static'`.)config
batch_sizemax_cache_lendtypec              3      K   | ]}d |v V  	dS )CausalLMN ).0archs     `/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/integrations/executorch.py	<genexpr>z@TorchExportableModuleWithStaticCache.__init__.<locals>.<genexpr>M   s'      \\DZ4/\\\\\\    r   maskF)
persistent)super__init__generation_configAssertionError	use_cachecache_implementationr
   r   r   cache_configr   r   r   static_cacheanyarchitectures	is_causaltorchtrilonesboolregister_buffer)selfr
   causal_mask	__class__s      r   r   z-TorchExportableModuleWithStaticCache.__init__%   sz    	 "* 2  
 &0 	 A  
 "78CC P  
 
':$z3@K*6CQ*"	
 
 
 \\DJ<M<[\\\\\> 	H*
%3%3*   K    GGGGG	H 	Hr   	input_idscache_positionc                     |j         \  }}| j        r| j        |d|f         nd}|                     |||                    d          || j        d          }|j        S )a  
        Forward pass of the module, which is compatible with the ExecuTorch runtime.

        Args:
            input_ids (`torch.Tensor`): Tensor representing current input token id to the module.
            cache_position (`torch.Tensor`): Tensor representing current input position in the cache.

        Returns:
            torch.Tensor: Logits output from the model.

        This forward adapter serves two primary purposes:

        1. **Making the Model `torch.export`-Compatible**:
            The adapter hides unsupported objects, such as the `Cache`, from the graph inputs and outputs,
            enabling the model to be exportable using `torch.export` without encountering issues.

        2. **Ensuring Compatibility with `ExecuTorch` runtime**:
            The adapter matches the model's forward signature with that in `executorch/extension/llm/runner`,
            ensuring that the exported model can be executed in `ExecuTorch` out-of-the-box.
        Nr   T)r/   attention_maskposition_idsr0   past_key_valuesr    )shaper&   r   r
   	unsqueezer#   logits)r,   r/   r0   _seqlen	attn_maskoutss          r   forwardz,TorchExportableModuleWithStaticCache.forwardX   su    * O	6:>.RDIngvg566d	zz$'11!44) -  
 
 {r   exported_programprompt_token_idsmax_new_tokensreturnc                    |j         d         }||z   }|                                 D ]9\  }}|                    d          r|j         d         }t          ||          } n:g }t	          t          ||                    D ]}	|                                                     |dd|	|	dz   f         t          j        |	gt          j	                            }
|
                    |d         |	                                                    t          j        |
dddddf         d	                                          }|
                    |           t          |          |k     r|                                                     t          j        |ggt          j	                  t          j        t          |          gt          j	                            }
t          j        |
dddddf         d	                                          }|
                    |           t          |          |k     t          j        |gt          j	                  S )
a  
        Generate a sequence of tokens using an exported program.

        This util function is designed to test exported models by simulating the generation process.
        It processes the input prompt tokens sequentially (no parallel prefill).
        This generate function is not intended to replace the original `generate` method, and the support
        for leveraging the original `generate` is potentially planed!

        Args:
            exported_program (`torch.export.ExportedProgram`): The exported program generated via `torch.export`.
            prompt_token_ids (`torch.Tensor`): Tensor representing the input prompt token IDs.
            max_new_tokens (`int`): Maximum number of new tokens to generate. Note that the total generation
                length is limited by both `max_new_tokens` and the model's cache size.

        Returns:
            torch.Tensor: A tensor containing the generated sequence of token IDs, including the original prompt tokens.
        zstatic_cache.key_cacher   N   r   )r/   r0   r   )dim)r5   named_buffers
startswithminrangemoduler<   r'   tensorlongappenditemargmaxlen)r=   r>   r?   prompt_token_lenmax_generation_lengthbuffer_namebufferr   response_tokens	input_posresultcurrent_tokens               r   generatez-TorchExportableModuleWithStaticCache.generatey   s`   * ,1"5 0> A#3#A#A#C#C 	 	K%%&>??  &Q(+,A=(Q(Q%
 s#8:JKKLL 	J 	JI%,,..66*111i)a-.G+GH$|YKuzJJJ 7  F ""#3A#6y#A#F#F#H#HIIIIVAAAr111H%52>>>CCEE}---/""%:::%,,..66,'8
KKK$|S-A-A,B%*UUU 7  F "L2qqq)9rBBBGGIIM""=111 /""%::: |_-UZ@@@@r   )__name__
__module____qualname____doc__r   r   r'   Tensorr<   staticmethodexportExportedProgramintrX   __classcell__)r.   s   @r   r	   r	      s         1Ho 1H 1H 1H 1H 1H 1Hf u|    B /A,6/AJO,/Ahk/A	/A /A /A \/A /A /A /A /Ar   r	   r
   example_input_idsexample_cache_positionc                 f   t           st          d          ddl} |j                    5  ||n |j        dgg|j                  }||n |j        dg|j                  }|j        j                            t          |           |fd|idd	          }|cddd           S # 1 swxY w Y   dS )
a6  
    Convert a `PreTrainedModel` into an exportable module and export it using `torch.export`,
    ensuring the exported model is compatible with `ExecuTorch`.

    Args:
        model (`PreTrainedModel`): The pretrained model to be exported.
        example_input_ids (`torch.Tensor`): Example input token id used by `torch.export`.
        example_cache_position (`torch.Tensor`): Example current cache position used by `torch.export`.

    Returns:
        Exported program (`torch.export.ExportedProgram`): The exported program generated via `torch.export`.
    ztorch >= 2.3 is required.r   NrC   r   r0   FT)argskwargspre_dispatchstrict)
r   ImportErrortorch.export._traceno_gradrJ   rK   r_   _trace_exportr	   )r
   rc   rd   r'   r=   s        r   convert_and_export_with_cachero      s?   $ . 75666	     "3!>LELSTRUQV^c^hDiDiDi 	 '=&H""lel\][^fkfpNqNqNq 	 !<.66077#%$&<= 7 
 
  %                                   s   A/B&&B*-B*)NN)r'   utils.import_utilsr   transformersr   r   transformers.pytorch_utilsr   nnModuler	   r]   ro   r   r   r   <module>ru      s     3 3 3 3 3 3  N        NMMMMMOA OA OA OA OA58? OA OA OAh '++/)  ) ) |)  "L)  )  )  )  )  ) r   