
    קgs8                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ dd	lmZm Z m!Z!m"Z" dd
l#m$Z$m%Z% ddgZ& ed          Z' ed          Z(deee'e"f                  dee)e"f         fdZ*	 d-deeef         deej+                 dee         fdZ, G d d          Z-dede$defdZ.dej/        de$dej/        fdZ0de%de$defdZ1dee)         dee)         dee)         fdZ2dee)         dee)         dee)         fd Z3 G d! d"ej4                  Z5d#ej4        d$e)d%e)dej4        fd&Z6d'e7d(e)de7fd)Z8d*Z9ed+             Z:d, Z;dS ).    N)contextmanager)wraps)Stats)	AnyCallablecastDictListOptionalSequenceTypeVarUnion)ShardedTensor)Shard   )_is_wrapped_exception_wrap_exceptionCheckpointExceptionWRAPPED_EXCEPTION)MetadataIndexSTATE_DICT_TYPEfind_tensor_shardfind_state_dict_objectTRresultsreturnc                 ~    t          t          t          t          f         d t	          |           D                       S )Nc                 8    i | ]\  }}t          |          ||S  )r   ).0ierrs      ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/distributed/checkpoint/utils.py
<dictcomp>z%_get_failure_dict.<locals>.<dictcomp>&   s,    OOOFAs4I#4N4NOCOOO    )r   r	   intr   	enumerate)r   s    r$   _get_failure_dictr)   !   s=     S##$OOi00OOO  r&   
local_dictgroupc                    t          |                                           }dgt          j        |          z  }t          j        |||           t          t          t          j        	                    |                              S )z*Gathers all keys, and returns them sorted.N)r+   )
listkeysdistget_world_sizeall_gather_objectsortedset	itertoolschainfrom_iterable)r*   r+   r.   gathered_keyss       r$   _all_gather_keysr8   *   sp     
!!""D&*Vd.A%.H.H%HM=$e<<<<#io33MBBCCDDDr&   c            	          e Zd ZdZdeej                 dedefdZ	defdZ
defdZd	ee         defd
Zd	edeee                  fdZd	edee         fdZdeee                  defdZdedeg ef         deee         gee         f         defdZdedeg ef         deee         gef         defdZdedeg ef         dee         fdZdedeg ef         defdZdS )_DistWrapperaH  
    This is a wrapper around PG that provides a series of features around object collectives.

    It works without distributed initialized, where most collectives turns into nops.

    All variants that take functions are exception robust, meaning that if one or more
    ranks raise errors, all ranks will observe those.
    r+   use_distcoordinator_rankc                     || _         || _        || _        | j        r+t          j        |          | _        | j        |k    | _        d S d| _        d| _        d S )Nr   T)r+   r;   r<   r/   get_rankrankis_coordinator)selfr+   r;   r<   s       r$   __init__z_DistWrapper.__init__?   s`     
  0= 	'e,,DI"&)/?"?DDI"&Dr&   r   c                     | j         S N)r?   rA   s    r$   r>   z_DistWrapper.get_rankO   s
    yr&   c                 F    | j         rt          j        | j                  S dS )Nr   )r;   r/   r0   r+   rE   s    r$   r0   z_DistWrapper.get_world_sizeR   s$    = 	3&tz222qr&   objectc                     |g}| j         r!t          j        || j        | j                   t          t          |d                   S )z_Implement functionality similar to c10d::broadcast_object_list but without distributed enabled.)object_listr+   srcr   )r;   r/   broadcast_object_listr+   r<   r   r   )rA   rG   rI   s      r$   broadcast_objectz_DistWrapper.broadcast_objectW   sQ    h= 	&'j)   
 A{1~&&&r&   c                     | j         rr| j        r;t          t          t                   dgt          j        | j                  z            nd}t          j        || j        r|nd| j	        | j                   |}n|g}|S )zWImplement functionality similar to c10d::gather_object but without distributed enabled.N)objobject_gather_listdstr+   )
r;   r@   r   r
   r   r/   r0   r+   gather_objectr<   )rA   rG   gather_objsresults       r$   rQ   z_DistWrapper.gather_objectb   s    = 	 &T!Wtft':4:'F'FFGGG  262E#O;;4)j	    !FFXFr&   c                     | j         rXt          t          t                   dgt	          j        | j                  z            }t	          j        ||| j                   n|g}|S )z[Implement functionality similar to c10d::all_gather_object but without distributed enabled.N)rI   rN   r+   )r;   r   r
   r   r/   r0   r+   r1   )rA   rG   rR   s      r$   r1   z_DistWrapper.all_gather_objectv   sj    = 	#tAw1DTZ1P1P(PQQK"'V4:     "(Kr&   rI   c                     | j         rUt          t          t                   dg          }t	          j        || j        r|nd| j        | j                   |d         }n|J |d         }|S )zXImplement functionality similar to c10d::scatter_object but without distributed enabled.N)scatter_object_output_listscatter_object_input_listrJ   r+   r   )	r;   r   r
   r   r/   scatter_object_listr@   r<   r+   )rA   rI   gather_resultlocal_replys       r$   scatter_objectz_DistWrapper.scatter_object   s    = 	) a4&11M$+89=9L*V++RV)j	    (*KK***%a.Kr&   stepmap_fun
reduce_func           	         	  |            }n&# t           $ r}t          |          }Y d}~nd}~ww xY w|                     |          }d}| j        r|J t	          |          }t          |          dk    r	 t          t          t          t          t          f                   |t          t          t                   |                              }n.# t           $ r!}t          |          || j        <   Y d}~nd}~ww xY wt          |          dk    r&t          ||          g|                                 z  }|                     |          }	t          |	t                    r|	|	S )a^  
        Compute a value on each rank, then do centralized reduce on a single rank, followed by a scatter.

        This method operates in the following way:
            Run ``map_fun`` on all ranks
            Gather results on rank 0
            Call ``reduce_fun`` on all those values
            Scatter to each rank part of the result.
        Nr   )BaseExceptionr   rQ   r@   r)   lenr   r
   r   r   r   r   r?   r0   r[   
isinstance)
rA   r\   r]   r^   
local_dataeall_dataall_resultsnode_failuresrS   s
             r$   reduce_scatterz_DistWrapper.reduce_scatter   s    	, JJ 	, 	, 	,(++JJJJJJ	, %%j11EI 	*'''-h77M=!!Q&&B"&U1&9#9:;"
4Q#:#:;;# #KK % B B B/>q/A/AM$),,,,,,B =!!A%%'m<<''))* $$[11f122 	Ls)   
 
0+08AC 
C=C88C=c                 h   	  |            }n&# t           $ r}t          |          }Y d}~nd}~ww xY w|                     |          }d}| j        r|J t	          |          }t          |          dk    rY	  |t          t          t                   |                    }n.# t           $ r!}t          |          || j	        <   Y d}~nd}~ww xY wt          |          dk    rt          ||          }|                     |          }	t          |	t                    r|	t          t          |	          S )aa  
        Compute a value on each rank, then do centralized reduce on a single rank, followed by a broadcast.

        This method operates in the following way:
            Run ``map_fun`` on all ranks
            Gather results on rank 0
            Call ``reduce_fun`` on all those values
            Broadcast the reduced value to all ranks.
        Nr   )r`   r   rQ   r@   r)   ra   r   r
   r   r?   r   rL   rb   r   )
rA   r\   r]   r^   rc   rd   re   rS   rg   final_results
             r$   
all_reducez_DistWrapper.all_reduce   sg    	, JJ 	, 	, 	,(++JJJJJJ	, %%j11:> 
	B'''-h77M=!!Q&&B'ZT!Wh(?(?@@FF$ B B B/>q/A/AM$),,,,,,B =!!A%%,T=AA,,V44l$788 	A|$$$s(   
 
0+08)B" "
C,CCc                 4   	  |            }n&# t           $ r}t          |          }Y d}~nd}~ww xY w|                     |          }t          |          }t	          |          dk    rt          ||          t          t          t                   |          S )z
        Compute a value on each rank, then all_gather them.

        This method operates in the following way:
            Run ``map_cp`` on all ranks
            all_gather the values to all ranks
        Nr   )	r`   r   r1   r)   ra   r   r   r
   r   )rA   r\   r]   rS   rd   rf   rg   s          r$   
all_gatherz_DistWrapper.all_gather   s    	(WYYFF 	( 	( 	($Q''FFFFFF	( ,,V44)+66}!!%dM:::DG[)))s   
 
0+0c                 $   d}| j         rG	  |            }n;# t          $ r.}t          || j        t	          |          i          }Y d}~nd}~ww xY w|                     |          }t          |t                    r|t          t          |          S )z
        Compute a value on rank 0 and broadcast it.

        This method operates in the following way:
            Run ``map_cp`` on rank 0
            broadcast the value
        N)	r@   r`   r   r?   r   rL   rb   r   r   )rA   r\   r]   rS   rd   rj   s         r$   	broadcastz_DistWrapper.broadcast  s     ;? 	TT   T T T,TDIq?Q?Q3RSST,,V44l$788 	A|$$$s   
 
A$A		AN)__name__
__module____qualname____doc__r   r/   ProcessGroupboolr'   rB   r>   r0   r   rL   r
   rQ   r1   r[   strr   r   rh   rk   rm   ro   r    r&   r$   r:   r:   5   sK        ')*' ' 	' ' ' ' #        
	'x{ 	'q 	' 	' 	' 	'A (47*;    (
 
d1g 
 
 
 
(47*;     "-- "a%- d1gYQ/0	-
 
- - - -^&%&% "a%&% d1gY\*	&%
 
&% &% &% &%P** "a%* 
a	* * * *2%% "a%% 
	% % % % % %r&   r:   tensorindexc                    |j         t          d|j         d          |                                 }|j        Wt          |          |j        k    r?t          j        ||j                 j        j	                  |j         k    r||j                 S |D ]-}t          j        |j        j	                  |j         k    r|c S .t          d|j          d|j         d          )NzCannot lookup z5 since its a ShardedTensor and no offset was providedzCould not find shard at 'z' for FQN: '')
offset
ValueErrorfqnlocal_shardsrx   ra   torchSizemetadatashard_offsets)rw   rx   shardsshards       r$   _find_shardr     s    |]UY]]]
 
 	
   ""F{KK%+%%
6%+.7EFF%,VV%+&&  :en233u|CCLLL D
WWW59WWW
X
XXr&   c                 v   t          | d          r|                     |          S t          | t                    rt	          | |          j        S |j        b|j        t          j        dgt          | 
                                          z            k    r| S t          d|j         d|j         d          | S )N__get_tensor_shard__r   FQN: '1' is not a ShardedTensor, can't find by offset: 'rz   )hasattrr   rb   r   r   rw   r{   r   r   ra   sizer|   r}   )rw   rx   s     r$   r   r   0  s    v-.. 2**5111&-(( 165))00|<5:qcC,>,>&>????M`UY``QVQ]```
 
 	
 Mr&   
state_dictc                    |j         | vrt          d|j          d          | |j                  }t          |t          j                  rt          ||          S |j         t          d|j          d|j         d          |S )NzCould not find FQN: 'rz   r   r   )r}   r|   rb   r   Tensorr   r{   )r   rx   rN   s      r$   r   r   @  s    y
""====>>>
UY
C#u|$$ 
 e,,,		!`UY``QVQ]```
 
 	
 Jr&   abc                 6    d t          | |          D             S )Nc                     g | ]
\  }}||z   S r    r    r!   i_ai_bs      r$   
<listcomp>z%_element_wise_add.<locals>.<listcomp>O       000(#sC#I000r&   zipr   r   s     r$   _element_wise_addr   N      00c!Qii0000r&   c                 6    d t          | |          D             S )Nc                     g | ]
\  }}||z
  S r    r    r   s      r$   r   z%_element_wise_sub.<locals>.<listcomp>S  r   r&   r   r   s     r$   _element_wise_subr   R  r   r&   c                        e Zd Zdej        dedef fdZej        fdededefdZ	defd	Z
defd
ZdefdZd ZddZ xZS )_ReaderViewbase_streamr{   ra   c                     t                                                       || _        || _        || _        |                     d           d S )Nr   )superrB   r{   ra   r   seek)rA   r   r{   ra   	__class__s       r$   rB   z_ReaderView.__init__W  sB    &		!r&   _ReaderView__offset_ReaderView__whencer   c                     |t           j        k    r| j        |z   }n.|t           j        k    rt           j        }| j        | j        z   |z
  }| j                            ||          S rD   )osSEEK_SETr{   SEEK_ENDra   r   r   )rA   r   r   s      r$   r   z_ReaderView.seek^  s]    r{""{X-HH$${Hdh.(:H$$Xx888r&   c                 D    | j                                         | j        z
  S rD   )r   tellr{   rE   s    r$   r   z_ReaderView.tellf  s    $$&&44r&   c                 4    | j                                         S rD   )r   readablerE   s    r$   r   z_ReaderView.readablei      ((***r&   c                 4    | j                                         S rD   )r   seekablerE   s    r$   r   z_ReaderView.seekablel  r   r&   c                 6    | j                             |          S rD   )r   readinto)rA   r   s     r$   r   z_ReaderView.readintoo  s    ((+++r&   c                 6    | j                             |          S rD   )r   read)rA   r   s     r$   r   z_ReaderView.readr  s    $$T***r&   )r   )rp   rq   rr   ioIOBaser'   rB   r   r   r   r   ru   r   r   r   r   __classcell__)r   s   @r$   r   r   V  s        BI s        35+ 9 9S 9C 9# 9 9 9 95c 5 5 5 5+$ + + + ++$ + + + +, , ,+ + + + + + + +r&   r   filer{   lengthc                 $    t          | ||          S rD   )r   )r   r{   r   s      r$   _create_file_viewr   v  s    tVV,,,r&   device_type	device_idc                      | dk    rdS |  d| S )zDevice info normalization.cpu:r    )r   r   s     r$   _normalize_device_infor   {  s&    eu''I'''r&   Fc               #     K   t           rt          j                    rt          j                    dk    rt	          j                    } |                                  	 d V  |                                  t          |           }|	                    d          
                    d           d S # |                                  t          |           }|	                    d          
                    d           w xY wd V  d S )Nr   time
   )ENABLE_PROFILEr/   is_availabler>   cProfileProfileenabledisabler   
sort_statsprint_stats)profilerstatss     r$   _profiler     s        
t022 
dmoo6J6J#%%	5EEE(OOEV$$0044444 (OOEV$$004444s   B- -AC:c                 L     t                     dt          f fd            }|S )Nr   c                     t          |           dk    rt          j        dj         d           t	          j                  }d |j                                        D             }d|v rd|vsJ | |f            | d         |d<   n2d|v rd|vsJ | |f            | d         |d<   nt          d|            | d	         fi |S  | i |S )
N   zThe argument order of zG has been changed. Please check the document to avoid future breakages.c                 <    g | ]}|j         |j        k    |j        S r    )kindKEYWORD_ONLYname)r!   ps     r$   r   z5_api_bc_check.<locals>.inner_func.<locals>.<listcomp>  s/       16Q^;S;S;S;S;Sr&   storage_writerr   storage_readerzUnexpected kwonlyargs = r   )	ra   warningswarnrp   inspect	signature
parametersvaluesRuntimeError)argskwargssig
kwonlyargsfuncs       r$   
inner_funcz!_api_bc_check.<locals>.inner_func  s>   t99>>MG G G G   #D))C  # 5 5 7 7  J  :--'v555f~555+/7'((!Z//'v555f~555+/7'(("#Jj#J#JKKK4Q**6***4((((r&   )r   r   )r   r   s   ` r$   _api_bc_checkr     s@    
4[[)s ) ) ) ) ) [), r&   rD   )<r   r   r   r4   r   r   
contextlibr   	functoolsr   pstatsr   typingr   r   r   r	   r
   r   r   r   r   r   torch.distributeddistributedr/   'torch.distributed._shard.sharded_tensorr   -torch.distributed._shard.sharded_tensor.shardr   apir   r   r   r   r   r   r   __all__r   r   r'   r)   rt   r8   r:   r   r   r   r   r   r   r   r   r   rv   r   r   r   r   r    r&   r$   <module>r      s     				     				  % % % % % %             V V V V V V V V V V V V V V V V V V V V V V              A A A A A A ? ? ? ? ? ?            5 4 4 4 4 4 4 4  8
9GCLLGCLL%,,-.	# 
 !    FJE ES#XE'/0A'BE	#YE E E Ec% c% c% c% c% c% c% c%LY Ym Y Y Y Y Y*el = U\      } QT    1# 18C= 1T#Y 1 1 1 11# 18C= 1T#Y 1 1 1 1+ + + + +") + + +@-BI -s -C -BI - - - -
( ( ( ( ( ( (         r&   