
    ΧgH/                         d dl Z d dlmZ d dlmZ  G d de          Z G d de          Z G d de j        j                  Z	dS )	    N)Functionc                   :    e Zd Zed             Zed             ZdS )SyncBatchNormc
           
         |                     t          j                  s4|                     t          j                  s|                                }||                                }t          |                                |                    d          z            }
|
dk    r|	dk     rt          d|
           |j	        d         }|                                dk    r|t          j
        ||          \  }}t          j        d|                                |                    d          z  |j        |j                  }t          j        |||gd          n't          j        d|z  dz   |j        |j                  |                                d	k    r                                }t          j        d||	z  j        j                  }t%          j        ||d
           t          j        ||	|f          t          j        |d          \  }}}ndfdt-          |	          D             }t%          j        ||d
           t          j        |d          t          j        |d          \  }}}t          j                                        rt          j                                        s1|                    d          dk    }||         }||         }||         }|                    d          }|*|j        |j        k    r|                    |j                  }t          j        ||||||||          \  }}|                      |||||                    t          j!                             || _"        |                                dk    rt          j#        ||||||          S t          j$        |          S )Nmemory_format      zEExpected more than 1 value per channel when training, got input size r   )r	   dtypedevicedimglooFasync_opc                 8    g | ]}t          j                  S  )torch
empty_like).0_combineds     W/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/nn/modules/_functions.py
<listcomp>z)SyncBatchNorm.forward.<locals>.<listcomp>Q   s$    SSSAU-h77SSS    )%is_contiguousr   channels_lastchannels_last_3d
contiguousintnumelsize
ValueErrorshapebatch_norm_statsfullr   r   catzeros_get_backend_nameemptydistall_gather_into_tensorreshapesplitrange
all_gatherstackcudais_availableis_current_stream_capturingsqueezeviewto#batch_norm_gather_stats_with_countssave_for_backwardint32process_groupbatch_norm_elemtr   )selfinputweightbiasrunning_meanrunning_varepsmomentumr=   
world_sizer$   num_channelsmeaninvstdcountcombined_sizecombined_flatmean_all
invstd_all	count_allcombined_listmaskcountsr   s                          @r   forwardzSyncBatchNorm.forward   s    e.ABB	'""1G"HH	' $$&&E&&((F5;;==EJJqMM122199a^X\^^   {1~;;==1 1%==LD&JA.j{	  E y$!6A>>>HH {L 1$EK  H **,,66$NN,,M!K
*n	  M 'x    }]Z4OPPH.3k(LVW.X.X.X+Hj)) TSSSzARARSSSMOM8]USSSS{=a888H.3k(LVW.X.X.X+Hj)
'')) 	*ej.T.T.V.V 	* $$R((A-D!$I~H#D)J ###8J(J(JYY|122F@	
 	
f 	ufdFILL<U<UVVV* ;;==1)%tVSQQQ#E***r   c                 h   |                     t          j                  s4|                     t          j                  s|                                }| j        \  }}}}}d x}x}}	| j        }
|                                dk    r,t          j        |||||| j	        d         | j	        d         | j	        d                   \  }}}}	| j	        d         r|j
        d         }t          j        ||gd          }t          j                            |t          j        j        j        |
d           t          j        ||          \  }}|*|j        |j        k    r|                    |j                  }t          j        ||||||||          }|| j	        d         sd }|| j	        d         sd }	nu|j
        d         }| j	        d         r[t          j        d|z  |j        |j                  }t          j                            |t          j        j        j        |
d           |||	d d d d d d f	S )	Nr   r   r	   r
   r   Fr   r   )r   r   r   r    r!   saved_tensorsr=   r#   batch_norm_backward_reduceneeds_input_gradr&   r)   distributed
all_reduceReduceOpSUMr0   r   r9   batch_norm_backward_elemtr*   r   )r?   grad_outputsaved_inputrA   rI   rJ   count_tensor
grad_inputgrad_weight	grad_biasr=   sum_dy
sum_dy_xmurH   r   s                  r   backwardzSyncBatchNorm.backward}   s    %%E4G%HH	3((u7M(NN	3 &0022K:>:L7VT6</33
3[9*"" 0%a(%a(%a(	 	 $Q' %|A 9fj%9qAAA!,,%.2!"	 -    &+[<%H%H"
 %&,$**D*D#YYtz22F"< 	 	
 ~T%:1%=~"~T%:1%=~ 	 ',Q/L$Q' 
 ;$K,=kFX   !,,%.2!"	 -    ;	4tT4QUUUr   N__name__
__module____qualname__staticmethodrT   rf   r   r   r   r   r      sV        r+ r+ \r+h RV RV \RV RV RVr   r   c                   <    e Zd Zedd            Zed             ZdS )CrossMapLRN2d-C6?      ?r	   c                 X   || _         || _        || _        || _        d | _        |                                dk    r%t          d|                                 d          | j        p|                                | _        |                                }|                     d          }|                     d          }|                     d          }	|                     d          }
|                    |           | j                            |           |}t          j
        |d|           t          | j         dz
  dz  dz             }t          ||          }| j                            dd          }|                                 t          |          D ]+}|                    |                    d|                     ,t          d|          D ]}| j                            d|dz
            }| j                            d|          }|                    |           |||z
  dz   k     r3|                    d||z   dz
            }|                    |d	           ||k    r0|                    d||z
            }|                    |d
	           | j                            | j        | j         z                                | j                   t          j
        | j        | j         |           |                    |           |                     ||           |S )N   z,CrossMapLRN2d: Expected input to be 4D, got z
D instead.r   r	   r
      outalphar   )r$   rv   betakscaler   r%   new
resize_as_r   powr"   minselectzero_r1   add_copy_mul_r;   )ctxr@   r$   rv   rw   rx   output
batch_sizechannelsinput_heightinput_widthinput_squarepre_padpre_pad_cropscale_firstcscale_previousscale_currentsquare_nextsquare_previouss                       r   rT   zCrossMapLRN2d.forward   s   		99;;!Vuyy{{VVV   I,	ZZ]]
::a==zz!}}jjmm%   	U### 	%----sx!|q(1,--7H--i&&q!,,|$$ 	8 	8A\00A667777 q(## 
	> 
	>A Y--aQ77NI,,Q22M///8g%)))*11!Q[1_EE"";a"8887{{"."5"5aW"E"E""?""===	sy38+,,11#%888	#)chYF3333EeV,,,r   c           	         | j         \  }}|                                }|                    d          }|                    d          }|                    d          }|                    d          }|                    || j        z   dz
  ||          }	|                    ||          }
d| j        z  | j        z  | j        z  }t          | j        | j        dz
  dz  z
            }|                    |           t          j        | j	        | j         |          
                    |           |	                                 |	                    d||          }t          |          D ]}t          j        ||         ||         |           |                    | j	        |                    t          j        |	                    dd| j        dz
            dd|
           t          |          D ]v}|
                    |	|| j        z   dz
                      ||         |                             ||         |         |
|            |
                    |	|         d	
           w|d d d d fS )Nr   r	   r
   rr   rs   F)keepdimrt   )valuer   ru   )rV   rz   r$   rv   rw   r"   r{   r   r|   ry   r   r   narrowr1   muldiv_sumr   addcmul_)r   r^   r@   r   ra   r   r   r   r   paddded_ratioaccum_ratiocache_ratio_valueinversePrePadpadded_ratio_centernr   s                   r   rf   zCrossMapLRN2d.backward  sl   )v __&&
ZZ]]
::a==zz!}}jjmm		(SX"5"9<UUiik::	MCH4sx?CH1'99::e$$$	#)chYJ777<<[III+221mXNNz"" 	= 	=AIk!nfQi5HIIII$$SYq\222I$$Q38a<88	    8__ = =  q38|a/?!@AAA1a ))!HQK5F4F *      q!1 <<<<= 4tT11r   N)rn   ro   r	   rg   r   r   r   rm   rm      sN        8 8 8 \8t $2 $2 \$2 $2 $2r   rm   c                   :    e Zd Zed             Zed             ZdS )BackwardHookFunctionc                 .     | j         d |D               |S )Nc                      g | ]}|j         	|S r   )requires_grad)r   args     r   r   z0BackwardHookFunction.forward.<locals>.<listcomp>:  s     %S%S%ScAR%Sc%S%S%Sr   )mark_non_differentiabler   argss     r   rT   zBackwardHookFunction.forward8  s&    ##%S%ST%S%S%STTr   c                     |S )Nr   r   s     r   rf   zBackwardHookFunction.backward=  s    r   Nrg   r   r   r   r   r   7  sH          \   \  r   r   )
r   torch.distributedrY   r-   torch.autograd.functionr   r   rm   autogradr   r   r   r   <module>r      s                 , , , , , ,IV IV IV IV IVH IV IV IVXa2 a2 a2 a2 a2H a2 a2 a2H    5>2     r   