
    Χg[                        U d dl Z d dlmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
  G d de          Z G d d	e          Z G d
 de          Z G d de          Zdaeeeej                                   ed<   dej        fdZdS )    N)ListOptional_get_device_index)Function)commc                   :    e Zd Zed             Zed             ZdS )	Broadcastc                    t          d |D                       s
J d            d |D             }|| _        t          |          dk    rdS t          |          | _        |d                                         | _        t          j        || j                  }g }t          | j	        dd                    D ]'\  }}|s |D ]}|
                    ||                    ( | j        |  t          d |D                       S )Nc              3   6   K   | ]}|j         j        d k    V  dS cpuNdevicetype.0is     X/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/nn/parallel/_functions.py	<genexpr>z$Broadcast.forward.<locals>.<genexpr>   <       
 
'(AHMU"
 
 
 
 
 
    z2Broadcast function not implemented for CPU tensorsc                 .    g | ]}t          |d           S Tr   r   xs     r   
<listcomp>z%Broadcast.forward.<locals>.<listcomp>   #    GGGa(D11GGGr   r       c                     g | ]	}|D ]}|
S r   r   )r   tensorsts      r   r   z%Broadcast.forward.<locals>.<listcomp>   s%    @@@G@@1a@@@@r   )alltarget_gpuslen
num_inputs
get_deviceinput_devicer   broadcast_coalesced	enumerateneeds_input_gradappendmark_non_differentiabletuple)ctxr%   inputsoutputsnon_differentiablesidxinput_requires_gradoutputs           r   forwardzBroadcast.forward   sH    
 
,2
 
 
 
 
 	@ 	@?	@ 	@ 	@ HG;GGG%v;;!2V!!9//11*63?CC (1#2Fqrr2J(K(K 	< 	<$C$& <% < <F'..vc{;;;;##%899@@w@@@AAAr   c                 B    dt          j        | j        | j        g|R  z   S )NN)ReduceAddCoalescedapplyr)   r'   r0   grad_outputss     r   backwardzBroadcast.backward   s6    +1cn
/;
 
 
 
 	
r   N__name__
__module____qualname__staticmethodr7   r>   r   r   r   r
   r
   
   sK        B B \B& 
 
 \
 
 
r   r
   c                   :    e Zd Zed             Zed             ZdS )r:   c                     fdt          dt                              D             | _        fdt          dt                              D             }t          j        ||          S )Nc                 D    g | ]}|                                          S r   r(   )r   r   gradss     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>)   s6     
 
 
&'E!H!!
 
 
r   r   c                 *    g | ]}||z            S r   r   )r   r   rH   r'   s     r   r   z.ReduceAddCoalesced.forward.<locals>.<listcomp>-   s'    VVV%A
N*+VVVr   )ranger&   r%   r   reduce_add_coalesced)r0   destinationr'   rH   grads_s     `` r   r7   zReduceAddCoalesced.forward'   s    
 
 
 
+0CJJ
+K+K
 
 
 WVVVVU1c%jj*5U5UVVV(===r   c                 6    dt          j        | j        g|R  z   S )NNN)r
   r;   r%   r<   s     r   r>   zReduceAddCoalesced.backward0   s*    
 OCO;l;;;< 	<r   Nr?   r   r   r   r:   r:   &   sH        > > \> < < \< < <r   r:   c                   :    e Zd Zed             Zed             ZdS )Gatherc                     t          d |D                       s
J d            |dk    rd _        nt          |d          }| _        | _        t	          d |D                        _        t          d |D                       r;|dk    r5t	          d |D                       }t          j        d	           d _        nd
 _        t	           fd|D                        _	        t          j        | j         j                  S )Nc              3   6   K   | ]}|j         j        d k    V  dS r   r   r   s     r   r   z!Gather.forward.<locals>.<genexpr>;   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   >   K   | ]}|                                 V  d S r9   rG   r   s     r   r   z!Gather.forward.<locals>.<genexpr>D   s*      >>!q||~~>>>>>>r   c              3   F   K   | ]}|                                 d k    V  dS r   N)dimr   r#   s     r   r   z!Gather.forward.<locals>.<genexpr>E   s.      ,,quuww!|,,,,,,r   r   c              3   @   K   | ]}|                     d           V  dS )r    N)viewrX   s     r   r   z!Gather.forward.<locals>.<genexpr>F   s,      55166!99555555r   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.Fc              3   L   K   | ]}|                     j                  V  d S r9   )sizerW   )r   r   r0   s     r   r   z!Gather.forward.<locals>.<genexpr>O   s/      @@Asw@@@@@@r   )r$   target_devicer   rW   r/   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr   gather)r0   r]   rW   r1   s   `   r   r7   zGather.forward9   sJ    
 
,2
 
 
 
 
 	= 	=<	= 	= 	= E!! %C-mTBBM -C>>v>>>>>,,V,,,,, 		*55f55555FM'  
 %)C!!$)C!@@@@@@@@@{637C,=>>>r   c                     t                               | j        | j        | j        |          }| j        rt          d |D                       }d|z   S )Nc              3   &   K   | ]}|d          V  dS rV   r   )r   gs     r   r   z"Gather.backward.<locals>.<genexpr>X   s&      #B#BQAaD#B#B#B#B#B#Br   rO   )Scatterr;   r^   rb   rW   ra   r/   )r0   grad_outputscattered_gradss      r   r>   zGather.backwardR   sY    !--NCOSWk
 
   	C##B#B/#B#B#BBBOo--r   Nr?   r   r   r   rQ   rQ   8   sH        ? ? \?0 . . \. . .r   rQ   c                   :    e Zd Zed             Zed             ZdS )rg   c                 n   d |D             }|| _         |j        j        dk    r|                                nd| _        d }t
          j                                        r| j        dk    rd |D             }t          j	        |||| j         |          }|t          |          D ]\  }}t
          j                            ||                   5  t
          j                                        }	|	                    ||                    |                    |	           d d d            n# 1 swxY w Y   |S )Nc                 .    g | ]}t          |d           S r   r   r   s     r   r   z#Scatter.forward.<locals>.<listcomp>_   r   r   r   c                 R    g | ]$}t          t          j        d |                    %S )cuda)_get_streamtorchr   )r   r   s     r   r   z#Scatter.forward.<locals>.<listcomp>e   s9       >DEL8899  r   )rW   r   r   r(   r)   rq   ro   is_availabler   scatterr+   current_streamwait_streamrecord_stream)
r0   r%   chunk_sizesrW   inputstreamsr2   r   r6   main_streams
             r   r7   zScatter.forward]   s}   GG;GGG161Be1K1K5++---QS:""$$ 	)9R)?)? HS  G ,uk;QQ&w// 6 6	6Z&&{1~66 6 6"'*";";"="=K++GAJ777((5556 6 6 6 6 6 6 6 6 6 6 6 6 6 6 s   AD))D-	0D-	c                 D    d d d t          j        | j        | j        g|R  fS r9   )rQ   r;   r)   rW   )r0   rh   s     r   r>   zScatter.backwardr   s*    T4c.>!V+!V!V!VVVr   Nr?   r   r   r   rg   rg   \   sM          \( W W \W W Wr   rg   _streamsr   c                 .   | j         dk    rdS t          t          | j         d          }|dS t          dg|                                z  at          | j                 '|                    | j                  t          | j        <   t          | j                 S )zBGet a background stream for copying between CPU and target device.r   N)r   getattrrq   r|   device_countindexStream)r   
device_mods     r   rp   rp   {   s     {etT22Jt6J33555%!+!2!26<!@!@FL!!r   )r_   typingr   r   rq   torch._utilsr   torch.autogradr   torch.nn.parallelr   r
   r:   rQ   rg   r|   r   __annotations__r   rp   r   r   r   <module>r      sb    ! ! ! ! ! ! ! !  * * * * * * # # # # # # " " " " " "
 
 
 
 
 
 
 
8< < < < < < < <$!. !. !. !. !.X !. !. !.HW W W W Wh W W W8 48(4./
0 7 7 7" " " " " " "r   