
    קgd                        U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlm Z  d dl!m"Z"m#Z# d d	l$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= g dZ>dZ?e@eAd<   e G d d                      ZBe G d d                      ZCdZDde@fdZE G d de          ZF G d deF          ZG G d deF          ZHd e2deIfd!ZJd"eId#ee2         deee2                  fd$ZKd%ejL        d&eejM        ej         f         d'e2d(e@de9f
d)ZNd*ed+ejO        d,ejO        d-e1d.eId/ePd0eIddfd1ZQ G d2 d3e          ZR G d4 d5eR          ZS G d6 d7e8          ZT G d8 d9e7          ZU G d: d;eTe5          ZVdS )<    N)ABCabstractmethod)contextmanager)	dataclass)Path)AnyCallablecastDict	GeneratorIOIterableIteratorListOptionalTupleUnion)Tensor)_get_available_device_type_get_device_module)narrow_tensor_by_index)MetadataMetadataIndexSTATE_DICT_TYPEStorageMeta)LoadItemTypeLoadPlanLoadPlannerReadItemSavePlanSavePlanner	WriteItemWriteItemType)BlockingAsyncStager)StorageReaderStorageWriterWriteResult)_create_file_view)Future)FileSystemWriterFileSystemReader
FileSystemFileSystemBase	.metadata_metadata_fnc                   2    e Zd ZU dZeed<   eed<   eed<   dS )_StorageInfoz#This is the per entry storage info.relative_pathoffsetlengthN)__name__
__module____qualname____doc__str__annotations__int     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/distributed/checkpoint/filesystem.pyr1   r1   B   s4         --KKKKKKKKr=   r1   c                       e Zd ZU eed<   dS )_StoragePrefixprefixN)r5   r6   r7   r9   r:   r<   r=   r>   r@   r@   K   s         KKKKKr=   r@   z.distcpreturnc                  B    t          t          j                              S N)r9   uuiduuid4r<   r=   r>   _generate_uuidrG   S   s    tz||r=   c                       e Zd Zedededdfd            Zedd            Zedee	e
j        ef                  fd            ZdS )	_TensorLoadersizeobjrB   Nc                     d S rD   r<   selfrJ   rK   s      r>   addz_TensorLoader.addX       r=   c                     d S rD   r<   rN   s    r>   start_loadingz_TensorLoader.start_loading\   rP   r=   c                     d S rD   r<   rR   s    r>   valuesz_TensorLoader.values`   rP   r=   rB   N)r5   r6   r7   r   r;   objectrO   rS   r   r   torchr   rU   r<   r=   r>   rI   rI   W   s         & T    ^    ^ u|V';!<=    ^  r=   rI   c                   l    e Zd ZdeddfdZdededdfdZd
dZde	e
ej        ef                  fd	ZdS )_SerialCpuLoaderresolve_funrB   Nc                 "    || _         g | _        d S rD   )r[   items)rN   r[   s     r>   __init__z_SerialCpuLoader.__init__f   s    &/1


r=   rJ   rK   c                 >    | j                             ||f           d S rD   )r]   appendrM   s      r>   rO   z_SerialCpuLoader.addj   s"    
4+&&&&&r=   c                     d S rD   r<   rR   s    r>   rS   z_SerialCpuLoader.start_loadingm       r=   c              #   F  K   | j         D ]\  }}|                     |                                          }|                                }|                                                                |                                k    r|                                }||fV  d S rD   )r]   r[   detachcpustoragerJ   numelclonerN   _rK   tensors       r>   rU   z_SerialCpuLoader.valuesp   s      j 	 	FAs%%c**1133FZZ\\F~~$$&&&,,..88    	 	r=   rV   )r5   r6   r7   r	   r^   r;   rW   rO   rS   r   r   rX   r   rU   r<   r=   r>   rZ   rZ   e   s        2H 2 2 2 2 2' '& 'T ' ' ' '   	u|V';!<= 	 	 	 	 	 	r=   rZ   c            	          e Zd Z	 	 ddedeej                 deddfdZe	de
fd            Zdeeej        ef                  fd	Zdd
Zdeeej        ef                  fdZdededdfdZddZdeeej        ef                  fdZdS )_OverlappingCpuLoaderN@B r[   streaminflight_threshholdrB   c                    || _         g | _        || _        d| _        t	          j                    | _        d| _        d| _        |r|j	        nt                      | _	        t          | j	                  | _        t          t          j        j        |p| j                                                  | _        | j        | j                                        k    r3| j                            | j                                                   d S d S )Nr   F)r[   r]   rp   in_flight_datacollectionsdequecurrent_itemsidxstarteddevice_typer   r   device_moduler
   rX   cudaStreamcurrent_streamro   wait_stream)rN   r[   ro   rp   s       r>   r^   z_OverlappingCpuLoader.__init__}   s     '/1
#6 0;0A0C0C"(JF.H.J.J 	 00@AAJvL);)J)J)L)L
 
 ;$,;;====K##D$6$E$E$G$GHHHHH >=r=   c                 <    | j         t          | j                  k    S rD   )rv   lenr]   rR   s    r>   _donez_OverlappingCpuLoader._done   s    x3tz??**r=   c                 ~   g }| j         | j        k    r| j                                         | j         | j        k    r| j                                        }| xj         |d                                         |d                                         z  z  c_         |                    |           | j         | j        k    |S Nr   )	rr   rp   ro   synchronizeru   popleftrg   element_sizer`   )rN   drainedvals      r>   _drainz_OverlappingCpuLoader._drain   s    $":::K##%%%!T%===$,,..C3q6<<>>CF4G4G4I4I#IINN3 !T%=== r=   c                 Z   | j                             | j                  5  | j        sj| j        | j        k     rY| j        | j                 \  }}| xj        dz  c_        |                     |                                          }|j	        j
        | j        k    r|                    dd          }nu|j	        t          j	        d          k    rX|                                                                |                                |j        z  k    r|                                }| j                            ||f           | xj        |                                |                                z  z  c_        | j        s| j        | j        k     Yd d d            d S # 1 swxY w Y   d S )N   re   T)devicenon_blocking)ry   ro   r   rr   rp   r]   rv   r[   rd   r   typerx   torX   untyped_storagerJ   rg   itemsizerh   ru   r`   r   ri   s       r>   _refillz_OverlappingCpuLoader._refill   s   &&t{33 	N 	Nj NT%84;S%S%SDH-3A))#..5577=%)999#YYe$YGGFF]el5&9&999..005577!<<>>FO;< < "("))   ##v||~~8K8K8M8M'MM##) j NT%84;S%S%S	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	N 	Ns   E3F  F$'F$c                     | j         sJ t          | j                  dk    r| j                                         | j        S r   )r   r   ru   ro   r   rR   s    r>   _finishz_OverlappingCpuLoader._finish   sC    zt!""Q&&K##%%%!!r=   rJ   rK   c                 j    | j         rt          d          | j                            ||f           d S )Nz&cannot add items after loading started)rw   RuntimeErrorr]   r`   rM   s      r>   rO   z_OverlappingCpuLoader.add   s<    < 	IGHHH
4+&&&&&r=   c                     | j         rd S d| _         | j                            t          j        d                     |                                  d S )NTr   key)rw   r]   sortoperator
itemgetterr   rR   s    r>   rS   z#_OverlappingCpuLoader.start_loading   sJ    < 	F
H/22333r=   c              #      K   |                                   | j        s7|                                 }|                                  |E d {V  | j        7|                                 E d {V  d S rD   )rS   r   r   r   r   )rN   r   s     r>   rU   z_OverlappingCpuLoader.values   s      * 	kkmmGLLNNN * 	
 <<>>!!!!!!!!!r=   )Nrn   rV   )r5   r6   r7   r	   r   rX   r{   r;   r^   propertyboolr   r   r   r   rW   r   r   r   r   rO   rS   r   rU   r<   r=   r>   rm   rm   |   sk        *.#,	I II &I !	I
 
I I I I. +t + + + X+U5<#789    N N N N0"%f(<"=> " " " "' '& 'T ' ' ' '
   "u|V';!<= " " " " " "r=   rm   itemc                     d}| j         J | j         j        D ]}||z  }| j         j        j        }|t          j                            |          z  S Nr   )tensor_datarJ   
propertiesdtyperX   _utils_element_size)r   rJ   sr   s       r>   
_item_sizer      s_    D'''"  	'-E%,,,U3333r=   binsr]   c                 0   | dk    r|gS d |D             }d |D             }d t          |           D             }d t          |           D             }|                    t          d           t          |          D ]#\  }}||| z                               |           $|D ]p}t          t          |          t          j        d                    d	         }||                             |           ||xx         t          |          z  cc<   q|S )
Nr   c                 <    g | ]}|j         t          j        k    |S r<   r   r#   BYTE_IO.0wis     r>   
<listcomp>z+_split_by_size_and_type.<locals>.<listcomp>   s'    FFFbRW0E%E%Er%E%E%Er=   c                 <    g | ]}|j         t          j        k    |S r<   r   r   s     r>   r   z+_split_by_size_and_type.<locals>.<listcomp>   s'    GGGrbg1F&F&F&F&F&Fr=   c                     g | ]}g S r<   r<   r   rj   s     r>   r   z+_split_by_size_and_type.<locals>.<listcomp>   s    %>%>%>Qb%>%>%>r=   c                     g | ]}d S )r   r<   r   s     r>   r   z+_split_by_size_and_type.<locals>.<listcomp>   s    +++!A+++r=   T)r   reverser   r   )ranger   r   	enumerater`   minr   r   )	r   r]   bytes_wtensor_wbucketsbucket_sizesir   rv   s	            r>   _split_by_size_and_typer      s6   qyywFFEFFFGGGUGGGH%>%>%++%>%>%>G++uT{{+++LMMj$M///7## % %2D  $$$$ , ,)L))x/B1/E/EFFFqIBSZ^^+Nr=   ro   data
write_itemstorage_keyc           	      2   |                                  }|j        t          j        k    rDt	          |t
          j                  sJ |                     |                                           nnt	          |t          j
                  sJ |j        t          j        d          k    sJ t          j        |t          t          t                   |                      |                                  |z
  }t!          |j        |t%          |||                    S )Nre   )indexsize_in_bytesstorage_data)tellr   r#   r   
isinstanceioBytesIOwrite	getbufferrX   r   r   saver
   r   bytesr'   r   r1   )ro   r   r   r   r3   r4   s         r>   _write_itemr      s     [[]]F-///$
+++++T^^%%&&&&$-----{el5111111
4bi00111[[]]V#F!+vv>>   r=   create_stream
file_queueresult_queueplannerrp   	use_fsyncthread_countc           
      j   	 	 |                                 \  }}}	t          j                                        }
t	          t          |
d           }|dk    rQt          j                                        s|r1|                                r|dk    rt          |j        |          }nt          |j                  }d |	D             }|D ]%}|
                    t          |          |           &|                                 d |	D             }g } | |d          5 }|D ]<}|                    |          }|                    t          ||||                     =|                                D ]3\  }}|j        sJ |                    t          ||||                     4|rK	 t#          j        |                                           n## t(          $ r t#          j                     Y nw xY wd d d            n# 1 swxY w Y   |                    |           # t.          j        $ r Y d S w xY w)NTr   r   )rp   c                 <    g | ]}|j         t          j        k    |S r<   r   r   s     r>   r   z+_write_files_from_queue.<locals>.<listcomp>6  s'    UUUrBG}?T4T4T4T4T4Tr=   c                 <    g | ]}|j         t          j        k    |S r<   r   r   s     r>   r   z+_write_files_from_queue.<locals>.<listcomp>;  s'    TTTb27m>S3S3Sr3S3S3Sr=   wb)
get_nowaitrX   _C_get_privateuse1_backend_namegetattrrz   is_availablerm   resolve_datarZ   rO   r   rS   r`   r   rU   is_cpuosfsyncfilenoAttributeErrorsyncputqueueEmpty)r   r   r   r   rp   r   r   	file_namer   write_itemscustom_backend_namecustom_device_modloaderr   r   r   write_resultsro   r   rk   s                       r>   _write_files_from_queuer     s   85	,2<2G2G2I2I/I{K #(("H"H"J"J '/BD I I !!J++-- " * " /@.L.L.N.N "
 (!++.((;  
 *(  VU[UUUH& ? ?


:j11:>>>>  """TTKTTTGMy$// "6")  J"//
;;D!((#FD*kJJ    +1--//  &FJ!=(((!((#FFJLL     ""1111) " " "					"!" " " " " " " " " " " " " " "$ ]+++k5	,l ;   s[   DH B
G='&GG=G.+G=-G..G=1H =HH HH H21H2c                   t   e Zd Zeedeeej        f         dede	e
j        ddf         fd                        Zedeeej        f         dedeeej        f         fd            Zedeeej        f         deeej        f         ddfd	            Zedeeej        f         deeej        f         fd
            Zedeeej        f         ddfd            Zeedeeej        f         defd                        Zedeeej        f         defd            Zedeeej        f         ddfd            ZdS )r-   pathmoderB   Nc                     d S rD   r<   )rN   r   r   s      r>   r   zFileSystemBase.create_streamV  s	    
 	r=   suffixc                     d S rD   r<   rN   r   r   s      r>   concat_pathzFileSystemBase.concat_path]  	     	r=   new_pathc                     d S rD   r<   rN   r   r   s      r>   renamezFileSystemBase.renamec  r   r=   c                     d S rD   r<   rN   r   s     r>   	init_pathzFileSystemBase.init_pathi      r=   c                     d S rD   r<   r   s     r>   mkdirzFileSystemBase.mkdirm  r   r=   checkpoint_idc                     d S rD   r<   clsr  s     r>   validate_checkpoint_idz%FileSystemBase.validate_checkpoint_idq  s	     	r=   c                     d S rD   r<   r   s     r>   existszFileSystemBase.existsv  r   r=   c                     d S rD   r<   r   s     r>   rm_filezFileSystemBase.rm_filez  r   r=   )r5   r6   r7   r   r   r   r9   r   PathLiker   r   IOBaser   r   r   r   r  classmethodr   r  r	  r  r<   r=   r>   r-   r-   U  sF       #r{*+36	29dD(	)   ^ ^
 #r{*+58	sBK	    ^
 #r{*+7<S"+=M7N	   ^
 eC$45 %R[@P:Q    ^ %R[ 01 d    ^ 5bk9I3J t    ^ [ 5bk!12 t    ^ E#r{"23     ^  r=   r-   c            
          e Zd Zedeeej        f         dedee	j
        ddf         fd            Zdeeej        f         dedeeej        f         fdZdeeej        f         deeej        f         fdZdeeej        f         d	eeej        f         ddfd
Zdeeej        f         ddfdZedeeej        f         defd            Zdeeej        f         defdZdeeej        f         ddfdZdS )r,   r   r   rB   Nc              #      K   t          t          |                              |          5 }t          t          j        |          V  d d d            d S # 1 swxY w Y   d S rD   )r
   r   openr   r  )rN   r   r   ro   s       r>   r   zFileSystem.create_stream  s       $""4(( 	*Fry&)))))	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	*s   AAAr   c                 2    t          t          |          |z  S rD   )r
   r   r   s      r>   r   zFileSystem.concat_path  s     D$&((r=   c                 N    t          |t                    st          |          }|S rD   )r   r   r   s     r>   r   zFileSystem.init_path  s$    $%% 	::Dr=   r   c                 |    t          t          |                              t          t          |                     d S rD   )r
   r   r   r   s      r>   r   zFileSystem.rename  s2     	T4T8 4 455555r=   c                 Z    t          t          |                              dd           d S )NT)parentsexist_ok)r
   r   r  r   s     r>   r  zFileSystem.mkdir  s+    T4td;;;;;r=   r  c                    t          |t                    rdS dt          |          v rdS t          |          j        D ]E}|                                r/t          j        t          |          t
          j                  r dS FdS )NTz://F)r   r   r9   r  r	  r   accessW_OK)r  r  ps      r>   r  z!FileSystem.validate_checkpoint_id  s    mT** 	4C&&&&5m$$, 	 	Axxzz biA88 ttur=   c                 P    t          t          |                                          S rD   )r
   r   r	  r   s     r>   r	  zFileSystem.exists  s    D$&&(((r=   c                 T    t          t          |                                           d S rD   )r
   r   unlinkr   s     r>   r  zFileSystem.rm_file  s$    T4!!!!!r=   )r5   r6   r7   r   r   r9   r   r  r   r   r  r   r   r   r   r  r  r   r  r	  r  r<   r=   r>   r,   r,     s       *#r{*+*36*	29dD(	)* * * ^*)#r{*+)58)	sBK	 ) ) ) )
eC$45 %R[@P:Q    
6#r{*+67<S"+=M7N6	6 6 6 6
<%R[ 01 <d < < < < 5bk9I3J t    [)5bk!12 )t ) ) ) )"E#r{"23 " " " " " " "r=   r,   c                       e Zd ZdZ	 	 	 	 	 d!deeej        f         dedede	d	e	d
ede
de
ddf fdZd"deeej        df         ddfdZdeddfdZdedefdZdee         dee         fdZdededeee                  fdZdedeee                  ddfdZdee         fdZedeeej        f         fd            Zedeeej        f         fd            Zedeeej        f         defd             Z xZ S )#_FileSystemWritera  
    Basic implementation of StorageWriter using file IO.

    This implementation makes the following assumptions and simplifications:

    * The checkpoint path is an empty or non-existing directory.
    * File creation is atomic

    The checkpoint consist of one file per write request plus
    a `.metadata` file with the serialized metadata.

    Tr   逖 r   single_file_per_rank
sync_filesr   per_thread_copy_ahead	overwriteargskwargsrB   Nc                    t                                                       t                      | _        | j                            |          | _        || _        || _        || _        || _	        t                      | _        || _        dS )a  
        Initialize the writer pointing to `path`.

        Args:
            path: directory where the checkpoint will be written to.
            single_file_per_rank: Produce one file per rank instead of one file per tensor/blob. Default to True.
            sync_files : force files to be synced to permanent storage. Default to True.
            thread_count: Number of IO threads to use to write. Default to 1.
            per_thread_copy_ahead: How many bytes to copy from the GPU ahead of saving then. Default 10Mb.
            overwrite: Whether to allow overwriting existing checkpoints. Defaults to True.

        N. B. If sync_files is disabled, there's no guarantee that the checkpoint will be consistent in the case of a failure.
        N)superr^   r,   fsr   r   r#  r$  r   r%  rG   save_idr&  )
rN   r   r#  r$  r   r%  r&  r'  r(  	__class__s
            r>   r^   z_FileSystemWriter.__init__  st    0 	,,G%%d++	$8!$(%:"%''"r=   r  c                 n    |r| j                             |          | _        t                      | _        d S rD   )r+  r   r   rG   r,  rN   r  s     r>   resetz_FileSystemWriter.reset  s2     	9))-88DI%''r=   is_coordinatorc                     d S rD   r<   )rN   r1  s     r>   set_up_storage_writerz'_FileSystemWriter.set_up_storage_writer  rb   r=   planc                    | j                             | j                   | j                             | j                  rE| j        r&t          j        d| j         d| j        d           nt          d| j        d          |S )Nz#Detected an existing checkpoint in z#, overwriting since self.overwrite=z. Past version 2.5 of PyTorch, `overwrite` will default to False. Set this variable to True to maintain this functionality or False to raise when an existing checkpoint is found.z-Checkpoint already exists and self.overwrite=.)	r+  r  r   r	  metadata_pathr&  warningswarnr   rN   r4  s     r>   prepare_local_planz$_FileSystemWriter.prepare_local_plan  s    di   7>>$,-- 	X~ Xk$:L k kbfbp k k k    ##VDN#V#V#VWWWr=   plansc                 8    d t          |          D             }|S )Nc           
      b    g | ],\  }}t          j        |t          d | d                    -S )__rj   r   )dataclassesreplacer@   )r   r   r4  s      r>   r   z9_FileSystemWriter.prepare_global_plan.<locals>.<listcomp>  sL     
 
 
4 >)q)))3L3LMMM
 
 
r=   )r   )rN   r<  	new_planss      r>   prepare_global_planz%_FileSystemWriter.prepare_global_plan  s0    
 
$U++
 
 
	 r=   r   c                    |j         dfd}t          j                    }| j        r`t	          | j        |j                  D ]D} |            }| j                            | j	        |          }|
                    |||f           EnM|j        D ]E} |            }| j                            | j	        |          }|
                    |||gf           Ft          j                    }	g }
t          d| j                  D ]f}t          j        t          | j        j        ||	|| j        | j        | j        f          }|                                 |
                    |           gt          | j        j        ||	|| j        | j        | j                   |
D ]}|                                 g }	 	 ||	                                z  }# t          j        $ r( t/                      }|                    |           |cY S w xY w)Nr   c                  6    j           t           } dz  | S r   )rA   DEFAULT_SUFFIX)r   
file_countstorage_plans    r>   gen_filez._FileSystemWriter.write_data.<locals>.gen_file  s*    '.L
LNLLI!OJr=   r   )targetr'  )r   r   r   r   rp   r   r   )r   r   Queuer#  r   r   r]   r+  r   r   r   r   	threadingThreadr   r   r%  r$  startr`   joinr   r   r)   
set_result)rN   r4  r   rJ  r   bucketr   r   r   r   threadsrj   tresfutrH  rI  s                  @@r>   
write_dataz_FileSystemWriter.write_data  sV   
 (,'8
	 	 	 	 	 	 #(+--
$ 		:1$2CTZPP : :$HJJ	w**49i@@i89999:
 
 : :$HJJ	w**49i@@i$89999$)KMMq$+,, 	 	A .G) .O%  A GGIIINN1'/!% $ :o*	
 	
 	
 	
  	 	AFFHHHH	1|..0001{ 	 	 	-3XXCNN3JJJ	s   5G 4HHmetadataresultsc                    i }|D ]!}|                     d |D                        "||_        |                                 |_        t          t          | j                            | j        t           d                    }| j        	                    |d          5 }t          j        ||           | j        rK	 t          j        |                                           n## t           $ r t          j                     Y nw xY wd d d            n# 1 swxY w Y   | j                            | j                  r| j                            | j                   | j                            || j                   d S )Nc                 (    i | ]}|j         |j        S r<   )r   r   )r   wrs     r>   
<dictcomp>z,_FileSystemWriter.finish.<locals>.<dictcomp>A  s    KKKRrxKKKr=   z.tmpr   )updater   storage_metar
   r   r+  r   r   r/   r   pickledumpr$  r   r   r   r   r   r	  r7  r  r   )rN   rX  rY  
storage_mdwr_listtmp_pathmetadata_files          r>   finishz_FileSystemWriter.finish>  s   
 	M 	MGKK7KKKLLLL * $ 1 1 3 3dg11$)=R=R=RSSTTW""8T22 	mK-000 H]11334444%   GIIIII	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 7>>$,-- 	0GOOD.///x!344444s6   D;&C"!D"D?DDDDDc                 8    t          | j        | j                  S )N)r  r,  )r   r  r,  rR   s    r>   r_  z_FileSystemWriter.storage_metaU  s    );T\RRRRr=   c                 r    t          t          | j                            | j        t
                              S rD   )r
   r   r+  r   r   r/   rR   s    r>   r7  z_FileSystemWriter.metadata_pathX  s&    D$'--diFFGGGr=   c                     | j         S )zT
        return the checkpoint_id that will be used to save the checkpoint.
        r   rR   s    r>   r  z_FileSystemWriter.checkpoint_id\      
 yr=   c                 6    t                               |          S rD   r,   r  r  s     r>   r  z(_FileSystemWriter.validate_checkpoint_idc      00???r=   )TTr   r"  TrD   )!r5   r6   r7   r8   r   r9   r   r  r   r;   r   r^   r0  r3  r    r;  r   rD  r!   r)   r'   rW  r   rf  r   r   r_  r   r7  r  r  r  __classcell__r-  s   @r>   r   r     s          &*%/ #  #C$% # # # 	 #
  #  # #  #  #  # 
 #  #  #  #  #  #D( (5bk4)?#@ (D ( ( ( (
D T    x H    h DN    AA A 
[!	"	A A A AF5x 5$tK7H2I 5d 5 5 5 5.Sh{3 S S S S HuS"+%56 H H H XH uS"+%56    X @5bk9I3J @t @ @ @ [@ @ @ @ @r=   r   c                   r    e Zd Zdeeej        f         ddf fdZdede	j
        fdZddeeej        df         ddfdZd	ed
eded         fdZdefdZdededdfdZd	edefdZdee         dee         fdZedeeej        f         fd            Zedeeej        f         defd            Z xZS )r+   r   rB   Nc                     t                                                       t                      | _        | j                            |          | _        i | _        t                      | _        d S rD   )	r*  r^   r,   r+  r   r   r   rG   load_id)rN   r   r-  s     r>   r^   zFileSystemReader.__init__i  sT    ,,G%%d++	?A%''r=   sinfoc                 8    t          ||j        |j                  S rD   )r(   r3   r4   )rN   filert  s      r>   _slice_filezFileSystemReader._slice_filep  s     u|U\BBBr=   r  c                 |    i | _         |r| j                            |          | _        t	                      | _        d S rD   )r   r+  r   r   rG   rs  r/  s     r>   r0  zFileSystemReader.resets  s:     	9))-88DI%''r=   r4  r   c                 6   i }|j         D ]D}| j        |j                 }|j        }|                    |g                               |           E|                                 D ]\  }}| j                            | j        |          }	| j        	                    |	d          5 }
|D ]}| j        |j                 }| 
                    |
|          }|j        t          j        k    rXt          j        |                    |j                            }|                    d           |                    ||           t)          t*          t-          j        t)          t0          t2                   |          dd                    }t5          ||j        |j                  }|                    |                                          }|                                |                                k    s<J d|j         d|                                 d|                                             |                     |           |!                    ||           	 d d d            n# 1 swxY w Y   tE                      }|#                    d            |S )	Nrbr   re   T)map_locationweights_onlyzreq z mismatch sizes z vs )$r]   r   storage_indexr2   
setdefaultr`   r+  r   r   r   rw  r   r   r   r   r   readr4   seek
load_bytesr
   r   rX   loadr   r   r   storage_offsetslengthsresolve_tensorrd   rJ   copy_commit_tensorr)   rQ  )rN   r4  r   per_file	read_itemitem_mdr   r2   reqsr   ro   req
file_slice
read_bytesrk   target_tensorrV  s                    r>   	read_datazFileSystemReader.read_datay  s   .0 	< 	<I'	(?@G(Db))00;;;;#+>>#3#3 	B 	BM4w**49mDDH&&x66 B& B BC"/0ABG!%!1!1&'!B!BJx<#777%'Z
0O0O%P%P
"*****3
;;;;!%"!J $RY
 ; ;-2-1  " " "8"C$7" " )0(>(>s(C(C(J(J(L(L *..00FKKMMAAAn#"3nn]EWEWEYEYnn_e_j_j_l_lnn BAA%++F333--c=AAAA3BB B B B B B B B B B B B B B B: hht
s   $F8I))I-	0I-	c                 >   | j                             | j        d          }| j                             |d          5 }t	          j        |          }d d d            n# 1 swxY w Y   t          |dd           t                      |_        | j	        |j        _	        |S )Nr.   rz  r_  )
r+  r   r   r   r`  r  r   r   r_  rs  )rN   r   re  rX  s       r>   read_metadatazFileSystemReader.read_metadata  s    w""49k::W""4.. 	2-{=11H	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 	2 8^T22:$/MMH!(,%s   AA!$A!rX  r1  c                 0    |j         | _         | j         J d S rD   r@  )rN   rX  r1  s      r>   set_up_storage_readerz&FileSystemReader.set_up_storage_reader  s"    $1 ,,,,,r=   c                     |S rD   r<   r:  s     r>   r;  z#FileSystemReader.prepare_local_plan  s    r=   r<  c                     |S rD   r<   )rN   r<  s     r>   rD  z$FileSystemReader.prepare_global_plan  s    r=   c                     | j         S )zT
        return the checkpoint_id that will be used to load the checkpoint.
        rj  rR   s    r>   r  zFileSystemReader.checkpoint_id  rk  r=   c                 6    t                               |          S rD   rm  r  s     r>   r  z'FileSystemReader.validate_checkpoint_id  rn  r=   rD   )r5   r6   r7   r   r9   r   r  r^   r1   r   r  rw  r0  r   r   r)   r  r   r  r   r  r;  r   rD  r   r  r  r  ro  rp  s   @r>   r+   r+   h  s       (U3#34 ( ( ( ( ( ( (C| C	 C C C C( (5bk4)?#@ (D ( ( ( ()h ) ) ) ) ) )X	x 	 	 	 	-h - -QU - - - -x H    h DN     uS"+%56    X @5bk9I3J @t @ @ @ [@ @ @ @ @r=   r+   c                        e Zd ZdZ	 	 	 	 	 	 ddeeej        f         deded	e	d
e	dededdf fdZ
dedef fdZ xZS )r*   r!  Tr   r"  Fr   r#  r$  r   r%  cache_staged_state_dictr&  rB   Nc           	      X    t                                          |||||||           dS )aM  
        Initialize the writer pointing to `path`.

        Args:
            path: directory where the checkpoint will be written to.
            single_file_per_rank: Produce one file per rank instead of one file per tensor/blob. Default to True.
            sync_files : force files to be synced to permanent storage. Default to True.
            thread_count: Number of IO threads to use to write. Default to 1.
            per_thread_copy_ahead: How many bytes to copy from the GPU ahead of saving then. Default 10Mb.
            cache_staged_state_dict: Whether to cache the staged state_dict. This option decreases staging latency
                at the cost of increases memory usage. Additionally, if this parameter is set to True, it's the expectation
                that the stager is maintained and re-used for multiple dcp.async_save calls. Default to False.
            overwrite: Whether to allow overwriting existing checkpoints. Defaults to True.

        N. B. If sync_files is disabled, there's no guarantee that the checkpoint will be consistent in the case of a failure.
        )r   r#  r$  r   r%  r  r&  N)r*  r^   )	rN   r   r#  r$  r   r%  r  r&  r-  s	           r>   r^   zFileSystemWriter.__init__  sE    4 	!5!%"7$; 	 	
 	
 	
 	
 	
r=   
state_dictc                 T    d| _         t                                          |          S )zOverride of AsyncStager.stager   )r%  r*  stage)rN   r  r-  s     r>   r  zFileSystemWriter.stage  s#     &'"ww}}Z(((r=   )TTr   r"  FT)r5   r6   r7   r8   r   r9   r   r  r   r;   r^   r   r  ro  rp  s   @r>   r*   r*     s           &*%/(-"
 "
C$%"
 #"
 	"

 "
  #"
 "&"
 "
 
"
 "
 "
 "
 "
 "
H) )O ) ) ) ) ) ) ) ) ) )r=   r*   )Wrs   rA  r   r   r   r`  r   rM  rE   r8  abcr   r   
contextlibr   r   pathlibr   typingr   r	   r
   r   r   r   r   r   r   r   r   r   rX   r   torch._utilsr   r   torch.distributed._shard._utilsr   %torch.distributed.checkpoint.metadatar   r   r   r   $torch.distributed.checkpoint.plannerr   r   r   r   r    r!   r"   r#   $torch.distributed.checkpoint.stagingr$   $torch.distributed.checkpoint.storager%   r&   r'   "torch.distributed.checkpoint.utilsr(   torch.futuresr)   __all__r/   r9   r:   r1   r@   rG  rG   rI   rZ   rm   r;   r   r   r  r   r   rL  r   r   r-   r,   r   r+   r*   r<   r=   r>   <module>r     s            				  				         # # # # # # # # % % % % % % ! ! ! ! ! !                                         G G G G G G G G B B B B B B           	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 E D D D D D         
 A @ @ @ @ @             S
R
Rc                            C       }   .W" W" W" W" W"M W" W" W"t4Y 43 4 4 4 4# d9o $tIBW    0I

EL(
)  	
    0AAA +A 	A
 A A A 
A A A AH' ' ' ' 'S ' ' 'T," ," ," ," ," ," ," ,"^w@ w@ w@ w@ w@ w@ w@ w@t[@ [@ [@ [@ [@} [@ [@ [@|7) 7) 7) 7) 7)(*= 7) 7) 7) 7) 7)r=   