
    Ng%                         d Z ddlZddlZddlZddlZddlZddlmZ	 dZ
	 defdZdefdZdefdZdefdZdefdZd	 Z ej                    d
             Zd Zd ZddZddZd ZddZddZddZdS )z PyTorch distributed helpers

Some of this lifted from Detectron2 with other fns added by myself. Some of the Detectron2 fns
were intended for use with GLOO PG. I am using NCCL here with default PG so not everything will work
as is -RW
    Nreturnc                  |    t          j                    sdS t          j                    sdS t          j                    S )N   )distis_availableis_initializedget_world_size     N/var/www/html/ai-engine/env/lib/python3.11/site-packages/effdet/distributed.pyr	   r	      s=     q   q   r   c                  |    t          j                    sdS t          j                    sdS t          j                    S Nr   )r   r   r   get_rankr
   r   r   r   r      s:     q   q=??r   c                      t          j                    sdS t          j                    sdS t          J t          j        t                    S )zh
    Returns:
        The rank of the current process within the local (per-machine) process group.
    r   Ngroup)r   r   r   _LOCAL_PROCESS_GROUPr   r
   r   r   get_local_rankr   %   sP    
  q   q+++=34444r   c                      t          j                    sdS t          j                    sdS t          j        t                    S )zw
    Returns:
        The size of the per-machine process group,
        i.e. the number of processes per machine.
    r   r   )r   r   r   r	   r   r
   r   r   get_local_sizer   2   sE      q   q%9::::r   c                  &    t                      dk    S r   )r   r
   r   r   is_main_processr   ?   s    ::?r   c                      t          j                    sdS t          j                    sdS t          j                    } | dk    rdS t          j                     dS )zj
    Helper function to synchronize (barrier) among all processes when
    using distributed training
    Nr   )r   r   r   r	   barrier)
world_sizes    r   synchronizer   C   s[    
     $&&JQLNNNNNr   c                  |    t          j                    dk    rt          j        d          S t           j        j        S )zj
    Return a process group based on gloo backend, containing all the ranks
    The result is cached.
    ncclgloo)backend)r   get_backend	new_groupr   WORLDr
   r   r   _get_global_gloo_groupr$   R   s6     V##~f----zr   c                    t          j        |          }|dv sJ t          j        |dk    rdnd          }t	          j        |           }t          |          dk    r_t          j        t                    }|
                    d                    t                      t          |          dz  |                     t          j                            |          }t          j        |                              |          }|S )N)r   r   r   cpucudai   @z;Rank {} trying to all-gather {:.2f} GB of data on device {})device)r   r!   torchr(   pickledumpslenlogging	getLogger__name__warningformatr   ByteStoragefrom_buffer
ByteTensorto)datar   r    r(   bufferloggerstoragetensors           r   _serialize_to_tensorr;   ^   s    u%%G&&&&&\7f#4#4%%&AAF\$F
6{{Y"8,,IPP

CKK95v 	
 	
 	

 ++F33Gg&&)))88FMr   c                     t          j        |          }|dk    s
J d            t          j                                         gt          j         j                  } fdt          |          D             }t          j        |||           d |D             }t          |          }||k    rBt          j
        ||z
  ft          j         j                  }t          j         |fd           | fS )	zz
    Returns:
        list[int]: size of the tensor, on each rank
        Tensor: padded tensor that has the max size
    r   r   zHcomm.gather/all_gather must be called from ranks within the given group!dtyper(   c                 ^    g | ])}t          j        d gt           j        j                  *S )r   r=   )r)   zerosint64r(   .0_r:   s     r   
<listcomp>z*_pad_to_largest_tensor.<locals>.<listcomp>{   s=       FGQCu{6=AAA  r   c                 P    g | ]#}t          |                                          $S r
   )intitem)rC   sizes     r   rE   z*_pad_to_largest_tensor.<locals>.<listcomp>   s(    888dTYY[[!!888r   r   dim)r   r	   r)   r:   numelrA   r(   range
all_gathermaxr@   uint8cat)r:   r   r   
local_size	size_listmax_sizepaddings   `      r   _pad_to_largest_tensorrV   p   s    $5111JaQ 	v||~~.ek&-XXXJ   KPQ[K\K\  I 	OIz777788i888I9~~H X+x*46ekRXR_```FG,!444fr   c                 8   t                      dk    r| gS |t                      }t          j         |          dk    r| gS t          | |          t	          |          \  }t          |          fd|D             }t          j        ||           g }t          ||          D ]l\  }                                	                                
                                d|         }|                    t          j        |                     m|S )a;  
    Run all_gather on arbitrary picklable data (not necessarily tensors).
    Args:
        data: any picklable object
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.
    Returns:
        list[data]: list of data gathered from each rank
    r   Nc                 ^    g | ])}t          j        ft           j        j                   *S r=   r)   emptyrP   r(   rC   rD   rT   r:   s     r   rE   zall_gather.<locals>.<listcomp>   s1    hhhYZ5;{%+fmTTThhhr   r   )r	   r$   r   r;   rV   rO   rN   zipr&   numpytobytesappendr*   loads)	r6   r   rS   tensor_list	data_listrI   r7   rT   r:   s	          @@r   rN   rN      s'    1v}&((5!!Q&&v!$..F.vu==Iv9~~H ihhhh^ghhhKOKu5555II{33 / /f##%%--//6f--....r   c                   	
 t                      dk    r| gS |t                      }t          j         |          dk    r| gS t          j        |          }t	          | |          
t          
|          \  }
||k    rt          |          		
fd|D             }t          j        
|||           g }t          ||          D ]l\  }

	                                
                                                                d|         }|                    t          j        |                     m|S t          j        
g ||           g S )a  
    Run gather on arbitrary picklable data (not necessarily tensors).
    Args:
        data: any picklable object
        dst (int): destination rank
        group: a torch process group. By default, will use a group which
            contains all ranks on gloo backend.
    Returns:
        list[data]: on dst, a list of data gathered from each rank. Otherwise,
            an empty list.
    r   Nr   c                 ^    g | ])}t          j        ft           j        j                   *S rY   rZ   r\   s     r   rE   zgather.<locals>.<listcomp>   s1    lll]^u{H;ek&-XXXlllr   dstr   )r	   r$   r   r   r;   rV   rO   gatherr]   r&   r^   r_   r`   r*   ra   )r6   rg   r   rankrS   rb   rc   rI   r7   rT   r:   s            @@r   rh   rh      sd    1v}&(('''1,,v=u%%%D!$..F.vu==Iv s{{y>>lllllbklllFKS>>>>		;77 	3 	3LD&ZZ\\''))1133ETE:FV\&112222FBCu5555	r   c                  n    t           j                            d          } t          |           }|d         S )z
    Returns:
        int: a random number that is the same across all workers.
            If workers need a shared RNG, they can use this shared seed to
            create one.
    All workers must call this function, otherwise it will deadlock.
    l        r   )nprandomrandintrN   )intsall_intss     r   shared_random_seedrp      s0     9W%%D$HA;r   Tc                     t                      }|dk     r| S t          j                    5  g }g }t          |                                           D ]2}|                    |           |                    | |                    3t          j        |d          }t          j        |d           t          j	                    dk    r|r||z  }d t          ||          D             }ddd           n# 1 swxY w Y   |S )ac  
    Reduce the values in the dictionary from all processes so that process with rank
    0 has the reduced results.
    Args:
        input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor.
        average (bool): whether to do average or sum
    Returns:
        a dict with the same keys as input_dict, after reduction.
       r   rJ   )rg   c                     i | ]\  }}||	S r
   r
   )rC   kvs      r   
<dictcomp>zreduce_dict.<locals>.<dictcomp>   s    <<<A1<<<r   N)r	   r)   no_gradsortedkeysr`   stackr   reducer   r]   )
input_dictaverager   namesvaluesrt   reduced_dicts          r   reduce_dictr      sB     !!JA~~	 = =
))** 	) 	)ALLOOOMM*Q-((((V+++F""""=??aG j F<<UF););<<<= = = = = = = = = = = = = = = s   B=C33C7:C7c                    pt           j        j        t          j                  fdt	          | t
                    r:t                      }|                                 D ]\  }} |          }|||<   |S t	          | t          t          f          r4fd| D             }t	          | t                    rt          |          }|S t	          | t          j
                  sJ  |           S )Nc                       fdt                    D             }t          j        |            t          j        |          S )Nc                 8    g | ]}t          j                  S r
   r)   
empty_likerB   s     r   rE   z<all_gather_container.<locals>._do_gather.<locals>.<listcomp>  s$    KKKAu'//KKKr   r   rJ   )rM   r   rN   r)   rQ   )r:   rb   cat_dimr   r   s   ` r   
_do_gatherz(all_gather_container.<locals>._do_gather  sQ    KKKKz9J9JKKKV59999y'2222r   c                 &    g | ]} |          S r
   r
   rC   ru   r   s     r   rE   z(all_gather_container.<locals>.<listcomp>  !    555aJJqMM555r   )r   r   r#   r	   
isinstancedictitemslisttupler)   Tensor)	containerr   r   gatheredrt   ru   r   r   s    ``   @@r   all_gather_containerr      s   %TZ%E$U++J3 3 3 3 3 3 3
 )T"" %66OO%% 	 	DAq
1AHQKK	Ie}	-	- %55559555i'' 	'XH )U\22222z)$$$r   c                 "  	 pt           j        j        t          j                  	t          j                  	fdt          | t                    r:t                      }|                                 D ]\  }} |          }|||<   |S t          | t          t          f          r4fd| D             }t          | t                    rt          |          }|S t          | t          j                  sJ  |           S )Nc                      k    r fdt                    D             }nd }t          j         |           t          j        |          S )Nc                 8    g | ]}t          j                  S r
   r   rB   s     r   rE   z8gather_container.<locals>._do_gather.<locals>.<listcomp>   s$    OOO5+F33OOOr   rf   rJ   )rM   r   rh   r)   rQ   )r:   rb   r   rg   r   	this_rankr   s   ` r   r   z$gather_container.<locals>._do_gather  se    OOOOU:=N=NOOOKKKFKS>>>>y'2222r   c                 &    g | ]} |          S r
   r
   r   s     r   rE   z$gather_container.<locals>.<listcomp>-  r   r   )r   r   r#   r	   r   r   r   r   r   r   r)   r   )
r   rg   r   r   r   rt   ru   r   r   r   s
    ```   @@@r   gather_containerr     s5   %TZ%E$U++Je$$I3 3 3 3 3 3 3 3 3 )T"" %66OO%% 	 	DAq
1AHQKK	Ie}	-	- %55559555i'' 	'XH )U\22222z)$$$r   )N)r   N)Tr   )__doc__	functoolsr-   r^   rk   r*   r)   torch.distributeddistributedr   r   rG   r	   r   r   r   boolr   r   	lru_cacher$   r;   rV   rN   rh   rp   r   r   r   r
   r   r   <module>r      s                           ! ! ! ! !#    
5 
5 
5 
5 
5
; 
; 
; 
; 
;              $  6   D$ $ $ $N
 
 
   <% % % %4% % % % % %r   