
     NgFB                     :   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlZd dlmZmZ eeee
ee         f         f         Z ej        e          Z G d d          Z G d d          Z G d	 d
          Z G d de          Z G d d          ZdS )    N)OrderedDict)AnyDictListMappingOptionalTupleUnion)InferenceSession
RunOptionsc                      e Zd Zedededefd            Zededefd            Zedefd            Zedefd            Z	ed	e
j        fd
            Zedej        fd            Zededeee
j        f         fd            ZdS )
TypeHelperort_sessionnamereturnc                     t          |                                           D ]\  }}|j        |k    r	|j        c S t	          d| d          )Nzinput name 
 not found)	enumerate
get_inputsr   type
ValueError)r   r   _iinputs       f/var/www/html/ai-engine/env/lib/python3.11/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_typezTypeHelper.get_input_type   sc    ";#9#9#;#;<< 	" 	"IBzT!!z!!! "7t777888    c                     t          |                                           D ]\  }}|j        |k    r	|j        c S t	          d| d          )Nzoutput name r   )r   get_outputsr   r   r   )r   r   r   outputs       r   get_output_typezTypeHelper.get_output_type   se    #K$;$;$=$=>> 	# 	#JB{d""{""" # 8888999r   ort_typec                     t           j        t           j        t           j        t           j        t
          d}| |vrt          |  d          ||          S N)ztensor(int64)ztensor(int32)ztensor(float)ztensor(float16)ztensor(bool) not found in map)numpylonglongintcfloat32float16boolr   )r!   ort_type_to_numpy_type_maps     r   ort_type_to_numpy_typez!TypeHelper.ort_type_to_numpy_type!   sX     #^"Z"]$} &
 &
" 555;;;<<<)(33r   c                     t           j        t           j        t           j        t           j        t           j        d}| |vrt          |  d          ||          S r#   )torchint64int32r(   r)   r*   r   )r!   ort_type_to_torch_type_maps     r   ort_type_to_torch_typez!TypeHelper.ort_type_to_torch_type/   sZ     #["["]$}!J&
 &
" 555;;;<<<)(33r   
numpy_typec                 @   t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t          t          j        i}| |vrt          |  d          ||          S Nr$   )
r%   r&   r.   r/   r'   r0   r(   r)   r*   r   )r3   numpy_type_to_torch_type_maps     r   numpy_type_to_torch_typez#TypeHelper.numpy_type_to_torch_type=   sp     NEKJKM5=M5=%*(
$ 999
===>>>+J77r   
torch_typec           
         t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          i}| |vrt          |  d          ||          S r5   )
r.   r/   r%   r&   r0   r'   r(   r)   r*   r   )r8   torch_type_to_numpy_type_maps     r   torch_type_to_numpy_typez#TypeHelper.torch_type_to_numpy_typeL   sg     KKM5=M5=J(
$ 999
===>>>+J77r   c                    i }|                                  D ])}t                              |j                  ||j        <   *|                                 D ])}t                              |j                  ||j        <   *|S )z:Create a mapping from input/output name to numpy data type)r   r   r,   r   r   r   )r   name_to_numpy_typer   r   s       r   get_io_numpy_type_mapz TypeHelper.get_io_numpy_type_mapZ   s       ++-- 	[ 	[E-7-N-Nuz-Z-Zuz**!--// 	] 	]F.8.O.OPVP[.\.\v{++!!r   N)__name__
__module____qualname__staticmethodr   strr   r    r,   r2   r%   dtyper7   r.   r;   r   r>    r   r   r   r      sQ       9$4 9C 9C 9 9 9 \9 :3 :3 : : : \: 4 4 4 4 \4 4 4 4 4 \4 8U[ 8 8 8 \8 8U[ 8 8 8 \8 "+; "S%+EU@V " " " \" " "r   r   c                       e Zd Zedefd            Ze	 ddej        dej        dej        deej                 fd            Z	edd
            Z
dS )IOBindingHelperr   c                     i }|                                 D ]f\  }}t                              | |          }t                              |          }t	          j        t          j        |          ||          ||<   g|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)rD   device)itemsr   r    r2   r.   emptyr%   prod)r   output_shapesrI   output_buffersr   shaper!   r8   s           r   get_output_buffersz"IOBindingHelper.get_output_buffersg   s     (..00 	c 	cKD%!11+tDDH#::8DDJ#(;uz%/@/@
[a#b#b#bN4  r   N	input_idsposition_idsattention_maskpastc                    |t                               |           }|                                 }|                                sJ |                    d|j        j        d|d         t          |                                          |	                                           |t          |          D ]\  }	}
|
                                sJ |
	                                }|dk    r|	                                }|                    d|	 |
j        j        d|d|	          t          |
                                          |           |q|                                sJ |                    d|j        j        d|d         t          |                                          |	                                           |q|                                sJ |                    d|j        j        d|d         t          |                                          |	                                           |                                 D ]}|j        }||         }t                              | d|j        j         dt          |                                                      |                    ||j        j        d||         ||         |	                                           |S )	z)Returnas IO binding object for a session.NrQ   r   past_rS   rR   z device type=z shape=)r   r>   
io_bindingis_contiguous
bind_inputrI   r   listsizedata_ptrr   r   r   loggerdebugbind_output)r   rQ   rR   rS   rT   rN   rM   name_to_np_typerW   ipast_ir\   r   output_nameoutput_buffers                  r   prepare_io_bindingz"IOBindingHelper.prepare_io_bindingq   s    "(>>{KKO !++--
 &&(((((!K(!!""  	
 	
 	
 &t__  	6++-----!??,,q==  )1133H%%AKKM&#KAKK0''    %!//11111!! %* 01^((**++''))   #--/////!!#(/\&&(())%%''   "--// 	 	F +K*;7MLLKttm6J6OttX\]j]o]o]q]qXrXrttuuu""$),k*&&((    r   Tc                    g }|                                  D ]}|j        }||         }||         }|dt          j        |                                       |                                                                          }	|r:|                    |	                                                                           |                    |	           |S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r   r%   rL   reshapeclonedetachappendcpu)
r   rN   rM   return_numpyort_outputsr   rc   bufferrO   copy_tensors
             r   "get_outputs_from_io_binding_bufferz2IOBindingHelper.get_outputs_from_io_binding_buffer   s     !--// 	0 	0F +K#K0F!+.E UZ%6%6!67??FFLLNNUUWWK 0"";??#4#4#:#:#<#<===="";////r   N)T)r?   r@   rA   rB   r   rP   r.   Tensorr   re   rp   rE   r   r   rG   rG   f   s        (8    \  S S<S lS 	S
 5< S S S \Sj    \  r   rG   c                       e Zd ZdZddedej        fdZdedefdZ	d	 Z
d
edej        fdZdefdZddeeej        f         dedefdZeddedededeeef         fd            ZdS )CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerFr   rI   c                    || _         d | j                                         D             | _        d | j                                         D             | _        t
                              | j                   | _        | j                                         | _        || _	        t                      | _        t                      | _        || _        i | _        d S )Nc                     g | ]	}|j         
S rE   r   ).0r   s     r   
<listcomp>z(CudaSession.__init__.<locals>.<listcomp>   s    RRR5EJRRRr   c                     g | ]	}|j         
S rE   rw   )rx   r   s     r   ry   z(CudaSession.__init__.<locals>.<listcomp>   s    VVVVV[VVVr   )r   r   input_namesr   output_namesr   r>   io_name_to_numpy_typerW   enable_cuda_graphr   input_tensorsoutput_tensorsrI   buffer_sharing)selfr   rI   r~   s       r   __init__zCudaSession.__init__   s    &RRD4D4O4O4Q4QRRRVVt7G7S7S7U7UVVV%/%E%EdFV%W%W"*5577!2(]])mm /1r   
input_namerc   c                 Z    || j         v sJ || j        v sJ || j        |<   || j        |<   d S rq   )r{   r|   r   )r   r   rc   s      r   set_buffer_sharingzCudaSession.set_buffer_sharing   sJ    T-----d/////*5J'+5K(((r   c                     | ` | `| `d S rq   )r   r   rW   )r   s    r   __del__zCudaSession.__del__   s    OOOr   r   tensorc           	         |j         j        |j         j        nd}t          |j                  dk    rdgnt	          |j                  }| j                            ||j         j        || j        |         ||	                                           || j
        v rh| j                            | j
        |         |j         j        || j        |         ||	                                           || j        | j
        |         <   d S d S )Nr      )rI   indexlenrO   rZ   rW   rY   r   r}   r\   r   r_   r   )r   r   r   	device_idtensor_shapes        r   bind_input_and_buffer_sharingz)CudaSession.bind_input_and_buffer_sharing   s	   +1=+>+JFM''PQ	!&,//144ss$v|:L:L""M&t,OO	
 	
 	
 4&&&O''#D)"*40!!   >DD 3D 9::: '&r   
shape_dictc                 |   | j         r|                                D ]\  }}|| j        v r|| j        v r@t	          | j        |         j                  t	          |          k    rHt          d          | j        |         }t          j	        t	          |          t                              |                                        | j                  }|| j        |<   |                     ||           |                                D ]1\  }}|| j        v r!|| j        v r1t	          | j        |         j                  t	          |          k    rJ|| j        v rT| j        |         }t          j	        t	          |          t                              |                                        | j                  }|| j        |<   | j                            ||j        j        |j        j        |j        j        nd|t-          |                                          |                                           3dS )z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)rD   )rI   Nr   )r~   rJ   r{   r   tuplerO   RuntimeErrorr}   r.   rK   r   r7   torI   r   r|   r   r   rW   r_   r   r   rZ   r[   r\   )r   r   r   rO   numpy_dtyper   s         r   allocate_bufferszCudaSession.allocate_buffers  s9   ! 	E)//11 E Ee4+++t111 !3D!9!?@@E%LLPP$*+UVVV"&"<T"BK"[uZ=`=`al=m=mnnnqq#{ r  F 06D&t,66tVDDD%++-- 	 	KD%t(((4...59LT9R9X3Y3Y]bch]i]i3i3i4..."8>U5\\9\9\]h9i9ijjjmm; n   -3#D)++M&+1=+>+JFM''PQ''OO%%  	 	r   NT	feed_dictrun_optionssynchronizec                    |                                 D ]\  }}t          |t          j                  r|                                sJ || j        v r| j        r| j        |                                         |                                k    sJ | j        |         j	        |j	        k    sJ |j
        j        dk    sJ | j        |                             |           |                     ||           |rS| j                                         | j                            | j        |           | j                                         n | j                            | j        |           | j        S )z$Bind input tensors and run inferencecuda)rJ   
isinstancer.   rr   rX   r{   r~   r   nelementrD   rI   r   copy_r   rW   synchronize_inputsr   run_with_iobindingsynchronize_outputsr   )r   r   r   r   r   r   s         r   inferzCudaSession.infer6  sp   %OO-- 		E 		ELD&fel33N8L8L8N8NNNNt''') E-d3<<>>&//BSBSSSSS-d39V\IIII!=-7777&t,226::::66tVDDD 	NO..000//MMMO//1111//MMM""r   r   r   r~   streamr   c                 B    | d|d}|dk    rt          |          |d<   |S )NkSameAsRequested)r   arena_extend_strategyr~   r   user_compute_stream)rC   )r   r~   r   optionss       r   get_cuda_provider_optionsz%CudaSession.get_cuda_provider_optionsL  s:     #%7!2
 
 Q;;-0[[G)*r   F)NT)r   )r?   r@   rA   __doc__r   r.   rI   r   rC   r   r   rr   r   	ShapeDictr   r   r   r*   r   rB   intr   r   rE   r   r   rt   rt      sE       VV1 1$4 1el 1 1 1 16S 6s 6 6 6 6  
D# Du| D D D D0(9 ( ( ( (T# #tC$56 #Z #ei # # # #,  S T SV _cdgildl_m    \  r   rt   c                        e Zd Z	 	 	 	 ddedej        deded	ed
ede	e
eef                  f fdZddedefdZdde
eej        f         def fdZ xZS )
GpuBindingFr   Nr   rI   r   enable_gpu_graphgpu_graph_idr   r   c                 @   t                                          |||           |r0|                                D ]\  }}	|                     ||	           |                     |           || _        |rt          j        |          nd | _        || _	        d | _
        d S rq   )superr   rJ   r   r   r   copydeepcopyr   r   last_run_gpu_graph_id)r   r   rI   r   r   r   r   r   r   rc   	__class__s             r   r   zGpuBinding.__init__\  s     	f.>??? 	A+9+?+?+A+A A A'
K''
K@@@@j)))(7GQ$-
333T%)"""r   disable_cuda_graph_in_runr   c                     t                      }|rdn| j        }|                    dt          |                     || _        |S )Nr   r   )r   r   add_run_config_entryrC   r   )r   r   r   r   s       r   get_run_optionszGpuBinding.get_run_optionss  sG    ,,6MrrD<M$$^S5F5FGGG%1"r   r   c                     |                      |          }| j        r|                    dd           t                                          ||          S )N'disable_synchronize_execution_providers1)r   r   r   r   r   )r   r   r   r   r   s       r   r   zGpuBinding.infer~  sR    **+DEE; 	],,-VX[\\\ww}}Y444r   )Fr   r   Nr   )r?   r@   rA   r   r.   rI   r   r*   r   r   r   rC   r   r   r   rr   r   __classcell__)r   s   @r   r   r   [  s        "'37* *%* * 	*
 * * * !c3h0* * * * * *.	 	 	* 	 	 	 	5 5tC$56 5SW 5 5 5 5 5 5 5 5 5 5r   r   c            
       p    e Zd ZdZddedej        dedefdZ	 	 dde	de
deeeef                  defdZd
S )GpuBindingManagerzA manager for I/O bindings that support multiple CUDA Graphs.
    One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
    r   r   r   rI   r   max_cuda_graphsc                 Z    || _         || _        g | _        d | _        || _        || _        d S rq   )r   rI   graph_bindingsno_graph_bindingr   r   )r   r   rI   r   r   s        r   r   zGpuBindingManager.__init__  s:    & ! !%.r   FNr   use_cuda_graphr   r   c           	         | j         D ]}|j        |k    r|c S t          | j                   | j        k    s|sQ| j        )t          | j        | j        || j        |          | _        n| j        	                    |           | j        S t          | j        | j        |dt          | j                   | j        |          }| j         
                    |           |S )N)r   r   T)r   r   r   r   )r   r   r   r   r   r   r   rI   r   r   rj   )r   r   r   r   gpu_graph_bindings        r   get_bindingzGpuBindingManager.get_binding  s    "&!4 	) 	) +z99(((( : t"##t';;;N;$,(2$dk:dkbp) ) )%% %66zBBB(( 'K!T011;)
 
 
 	""#4555  r   )r   r   )FN)r?   r@   rA   r   r   r.   rI   r   r   r   r*   r   r   rC   r   r   rE   r   r   r   r     s         / /$4 /el /TW /nq / / / /"  %37	 !  ! !  ! !c3h0	 !
 
 !  !  !  !  !  !r   r   )r   loggingcollectionsr   typingr   r   r   r   r   r	   r
   r%   r.   onnxruntimer   r   rC   r   r   	getLoggerr?   r]   r   rG   rt   r   r   rE   r   r   <module>r      s     # # # # # # C C C C C C C C C C C C C C C C C C   4 4 4 4 4 4 4 4 Cud3i/001			8	$	$R" R" R" R" R" R" R" R"jn n n n n n n nbA A A A A A A AH)5 )5 )5 )5 )5 )5 )5 )5X3! 3! 3! 3! 3! 3! 3! 3! 3! 3!r   