
    קg-                         d dl mZmZ d dlZd dlmc mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZmZmZ d d
lmZmZ d ZddZdej        fdZdej        fdZd Z d Z!d Z"d Z# e"            Z$ e#            Z%d Z&dS )    )OptionalTupleN)_prims)DispatchKey)autograd_not_implemented)HigherOrderOperator)CUDARngStateHelpermake_contiguous_strides_for)FakeTensorMode)disable_proxy_modes_tracingProxyTorchDispatchModetrack_tensor_tree)_device_dtypec           	      R    t          d| j         d| j         d| j         d          )Nz"You are trying to functionalize a z RNG operator but zE does not use Philox/counter-based RNG. Therefore, functionalizing a zo RNG operator is not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU.)RuntimeErrortype)devices    R/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/_prims/rng_prims.pythrow_on_non_cudar      sQ    
	iV[ 	i 	iFK 	i 	iFLk	i 	i 	i      c                 f   t           j                            d| z   |d|          }|                    |           t	          t           j        j        j        |           }|j        }|r||_	        ||fD ]<}	||	_
        t           j        j        j        |	_        | |z   |	_        ||	_        ||	_        =d S )Nz
rngprims:: )mutates_argsschema)torchlibrary	custom_opregister_fakegetattr_opsopsrngprimsdefault_tags__doc___prims_commonRETURN_TYPENEWreturn_typer   	impl_atenprim_meta_impl)
namer   r+   	impl_metadoctagsrngprim_defprim_packetprimps
             r   register_rng_primr5      s    -))tYR *  K i(((%*.1488KD 
4  % %	+7;&=$% %r   shapec                 f    t          j        t          j        dt          j                            S )Nr   dtype)r   
TensorLiker   tensorint64)r6   s    r   philox_rand_offset_metar=   2   s&     U\!5;???@@@r   c                 b   d}| D ]}||z  }t          j        |t           j                  }d}d}d}t           j                            t           j                                                  }|j        |z  }||z   dz
  |z  }	t          |	|j        |z            }	|dz
  ||	z  |z  z  dz   |z  }
|
S )N   r8         )	r   scalar_tensorr<   cudaget_device_propertiescurrent_devicemax_threads_per_multi_processorminmulti_processor_count)r6   numel_scalardim_sizenumel
block_sizeunrollcurand4_engine_callsdevice_propertyblocks_per_sm	grid_sizeoffsets              r   philox_rand_offsetrS   8   s     L ! ! EK@@@EJFj66uz7P7P7R7RSSO#CzQM#a'J6IID}TUUI	
Y.781<F Mr   c                     d} d}dt           j        dt           j        dt           j        dt          t          t
          df                  dt          d	t          fd
}dt           j        dt           j        dt           j        dt          t          t
          df                  dt          d	t          fd}t          | |||dt           j	        j
        f           d S )Nphilox_randz{(SymInt[] size, Tensor seed, Tensor offset, int[]? stride, Device? device=None, ScalarType? dtype=None) -> (Tensor, Tensor)r6   seedrR   stride.r   r9   c                 ~    |J t          |           }t          j        | |||          }t          |           }||fS )N)r6   stridesr9   r   )r
   r   
TensorMetar=   )r6   rV   rR   rW   r   r9   random_valuess          r   _philox_rand_metaz/register_philox_rand.<locals>._philox_rand_metaT   sT     ~~~,U33)uV
 
 
 )//v&&r   c                 H   |J |j         dk    rg }n|g}|j         dk    rt          |          t          j                            |          5  t          j        ||           t          j        | ||          }d d d            n# 1 swxY w Y   |t          |           fS )NcpurC   )r   r9   )	r   r   r   randomfork_rngr	   set_torch_state_tensorrandrS   )r6   rV   rR   rW   r   r9   devicesr[   s           r   _philox_randz*register_philox_rand.<locals>._philox_rande   s     ~~~;%GGhG;&  #F+++\""7++ 	J 	J5dFCCC!JuV5IIIM	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 	J 07777s   -BBBz$Philox based stateless rand operator)r-   r   r+   r.   r/   r0   )r   SizeTensorr   r   intr   r   r5   Tagnondeterministic_seeded)r-   r   r\   rd   s       r   register_philox_randrj   P   s
   D KF'z'l' ' sCx)	'
 ' ' ' ' '"8z8l8 8 sCx)	8
 8 8 8 8 80 #2i/1     r   c                    |                     d          rE|                     d          }t          |t                    rt          j        |          }|j        S d | D             }t          d |D                       rdS t          d |D                       rdS t          d |D                       rdS t          d	 |D                       rd
S d S )Nr   c                 Z    h | ](}t          |t          j                  |j        j        )S r   )
isinstancer   rf   r   r   ).0args     r   	<setcomp>zget_device.<locals>.<setcomp>   s-    PPP3*S%,2O2OPszPPPr   c              3   "   K   | ]
}|d k    V  dS )rC   Nr   rn   devs     r   	<genexpr>zget_device.<locals>.<genexpr>   s&      
,
,S3&=
,
,
,
,
,
,r   rC   c              3   "   K   | ]
}|d k    V  dS )xpuNr   rr   s     r   rt   zget_device.<locals>.<genexpr>   &      --cSE\------r   rv   c              3   "   K   | ]
}|d k    V  dS )hpuNr   rr   s     r   rt   zget_device.<locals>.<genexpr>   rw   r   ry   c              3   "   K   | ]
}|d k    V  dS )r^   Nr   rr   s     r   rt   zget_device.<locals>.<genexpr>   rw   r   r^   )getrm   strr   r   r   any)argskwargsr   rc   s       r   
get_devicer      s    zz( H%%fc"" 	*\&))F{PP$PPPG

,
,G
,
,
,,, v	--W---	-	- u	--W---	-	- u	--W---	-	- u4r   c                      G d dt                     }  |                                  t          j                  t	          d                                         t          j                  d                                 t          j                  d                                 t          j                  d                                 t          j                  d                                 t          j	                  fd	                                t                    fd
            }                    t                    fd            }S )Nc                   (     e Zd Z fdZ fdZ xZS )>register_run_and_save_rng_state_op.<locals>.RunAndSaveRngStatec                 J    t                                          d           d S )Nrun_and_save_rng_statesuper__init__self	__class__s    r   r   zGregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__init__   s"    GG566666r   c                 >     t                      j        |g|R i |S Nr   __call__)r   opr~   r   r   s       r   r   zGregister_run_and_save_rng_state_op.<locals>.RunAndSaveRngState.__call__   s*    #577#B8888888r   __name__
__module____qualname__r   r   __classcell__r   s   @r   RunAndSaveRngStater      sQ        	7 	7 	7 	7 	7	9 	9 	9 	9 	9 	9 	9 	9 	9r   r   Tdeferred_errorc                 N    t           j                                         | |i |fS r   )r   rC   get_rng_stater   r~   r   s      r   	impl_cudaz5register_run_and_save_rng_state_op.<locals>.impl_cuda   s*    z''))22t+>v+>+>>>r   c                 8    t          j                     | |i |fS r   )r   r   r   s      r   impl_cpuz4register_run_and_save_rng_state_op.<locals>.impl_cpu   s%    "$$bb$&9&&9&999r   c                     t          t          d          r&t          j                                         | |i |fS t	          d          Nry   z2functionalize a hpu RNG operator is not supported.)hasattrr   ry   r   r   r   s      r   impl_hpuz4register_run_and_save_rng_state_op.<locals>.impl_hpu   sL    5%   	B9**,,bb$.A&.A.AAAOPPPr   c                 N    t           j                                         | |i |fS r   )r   rv   r   r   s      r   impl_xpuz4register_run_and_save_rng_state_op.<locals>.impl_xpu   s*    y&&((""d*=f*=*===r   c                 |    	d}t          ||          }||v sJ d|             ||         } || g|R i |S N)rC   r^   ry   rv   zBackend not supported for r   )
r   r~   r   impl_mapr   implr   r   r   r   s
         r   impl_backend_selectz?register_run_and_save_rng_state_op.<locals>.impl_backend_select   sz     	
 
 D&))!!!#H#H#H!!!tB(((((((r   c                 T    | 5   |g|R i |cd d d            S # 1 swxY w Y   d S r   r   )moder   r~   r   r   s       r   impl_fake_tensor_modezAregister_run_and_save_rng_state_op.<locals>.impl_fake_tensor_mode   s      	< 	<&&r;D;;;F;;	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	< 	<s   !!c                 
    |g|R i |}t          j        | j        j        |g|R           }t          j        | j        j        |          }| j                            d	||          }t          ||d | j                  S Ncall_function)constanttracer)pytreetree_mapr   unwrap_proxycreate_proxyr   )
r   r   r~   r   out
proxy_argsproxy_kwargs	out_proxyr   r   s
           r   impl_proxy_dispatch_modezDregister_run_and_save_rng_state_op.<locals>.impl_proxy_dispatch_mode   s    !!"6t666v66_T[%={T{{KK
t{'?HHK,,3Z
 
	 !i$t{SSSSr   )r   py_implr   Autogradr   CUDACPUHPUXPUBackendSelectr   r   )	r   r   r   r   r   r   r   r   r   s	      @@@@@@r   "register_run_and_save_rng_state_opr      s   9 9 9 9 90 9 9 9 0/118"";#788 !7MMM   ##K$455? ? 65? ##KO44: : 54: ##KO44Q Q 54Q
 ##KO44> > 54> ##K$=>>
) 
) 
) 
) 
) 
) 
) ?>
) ##N33< < < < 43<
 ##$:;;T T T T T <;T "!r   c                    	  G d dt                     }  |             	 	                    t          j                  t	          	d                     	                    t          j                  d             	                    t          j                  d             	                    t          j                  d             	                    t          j                  d             	                    t                    	fd	            }	                    t          j
                  fd
            }	                    t                    d             }	j        	fd            }	S )Nc                   (     e Zd Z fdZ fdZ xZS )7register_run_with_rng_state_op.<locals>.RunWithRngStatec                 J    t                                          d           d S )Nrun_with_rng_stater   r   s    r   r   z@register_run_with_rng_state_op.<locals>.RunWithRngState.__init__   s"    GG122222r   c                 @     t                      j        ||g|R i |S r   r   )r   	rng_stater   r~   r   r   s        r   r   z@register_run_with_rng_state_op.<locals>.RunWithRngState.__call__   s,    #577#IrCDCCCFCCCr   r   r   s   @r   RunWithRngStater      sZ        	3 	3 	3 	3 	3	D 	D 	D 	D 	D 	D 	D 	D 	Dr   r   Tr   c                     t           j                                        }t           j                            |                                             ||i |}t           j                            |           |S r   )r   rC   r   set_rng_stater^   r   r   r~   r   current_stater   s         r   r   z1register_run_with_rng_state_op.<locals>.impl_cuda   sb    
0022
  111b$!&!!
  ///
r   c                     t          j                    }t          j        |             ||i |}t          j        |           |S r   )r   r   r   r   s         r   r   z0register_run_with_rng_state_op.<locals>.impl_cpu   sK    +--I&&&b$!&!!M***
r   c                    t          t          d          rft          j                                        }t          j                            |             ||i |}t          j                            |           |S t          d          r   )r   r   ry   r   r   r   r   s         r   r   z0register_run_with_rng_state_op.<locals>.impl_hpu   s{    5%   	!I3355MI##I..."d%f%%CI##M222JOPPPr   c                     t           j                                        }t           j                            |             ||i |}t           j                            |           |S r   )r   rv   r   r   r   s         r   r   z0register_run_with_rng_state_op.<locals>.impl_xpu  sZ    	//11		***b$!&!!	...
r   c                 Z   t                      5   	||g|R i |}d d d            n# 1 swxY w Y   t          j        | j        j        ||g|R           }t          j        | j        j        |          }| j                            d	||          }t          ||d | j                  S r   )r   r   r   r   r   r   r   )
r   r   r   r~   r   r   r   r   r   r   s
            r   r   z@register_run_with_rng_state_op.<locals>.impl_proxy_dispatch_mode
  s    )** 	E 	E$$YDTDDDVDDC	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E_T[%=	2?UPT?U?UVV
t{'?HHK,,/\
 
	 !i$t{SSSSs   *..c                 ~    	
d}t          ||          }||v sJ d|             ||         } || |g|R i |S r   r   )r   r   r~   r   r   r   r   r   r   r   r   s          r   r   z;register_run_with_rng_state_op.<locals>.impl_backend_select  s|     	
 
 D&))!!!#H#H#H!!!tIr3D333F333r   c                 J    | 5   ||i |cd d d            S # 1 swxY w Y   d S r   r   )r   r   r   r~   r   s        r   r   z=register_run_with_rng_state_op.<locals>.impl_fake_tensor_mode$  s      	' 	'2t&v&&	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	's   c                 "   |                      |          }|                      |          }|                      |          }|                                 5   	||g|R i |}|                     |          cd d d            S # 1 swxY w Y   d S r   )unwrap_tensorsredispatch_to_nextwrap_tensors)
ctxr   r   r~   r   unwrapped_rng_stateunwrapped_argsunwrapped_kwargsr   r   s
            r   impl_functionalz7register_run_with_rng_state_op.<locals>.impl_functional+  s    !00;;++D11--f55##%% 	) 	)$$#R*8  <L C ##C((		) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	)s   "BBB)r   r   r   r   r   r   r   r   r   r   r   r   py_functionalize_impl)
r   r   r   r   r   r   r   r   r   r   s
        @@@@@r   register_run_with_rng_state_opr      s   D D D D D- D D D )**4{344 !3DIII    011  21 00  10 00Q Q 10Q 00  10  677
T 
T 
T 
T 87
T  9::
4 
4 
4 
4 
4 
4 
4 ;:
4 //' ' 0/' -	) 	) 	) 	) .-	) r   c                  "    t                       d S r   )rj   r   r   r   register_rng_primsr   >  s    r   r   )'typingr   r   r   torch.utils._pytreeutils_pytreer   r   torch._Cr   torch._higher_order_ops.utilsr   
torch._opsr   torch._prims_commonr	   r
   torch._subclasses.fake_tensorr   "torch.fx.experimental.proxy_tensorr   r   r   torch.typesr   r   r   r5   re   r=   rS   rj   r   r   r   r   r   r   r   r   r   <module>r      s   " " " " " " " "  $ $ $ $ $ $ $ $ $                   B B B B B B * * * * * * O O O O O O O O 8 8 8 8 8 8         
 ( ' ' ' ' ' ' '  % % % %,A:A A A A:   04 4 4n  &=" =" ="@] ] ]@ <;== 3355     r   