
    g                         d dl Zd dlZd dlZd dlZd dlmZmZ d dlZddl	m
Z
 ddlmZmZmZ  ej        e          Zd Z e            rd dlmc mZ  ej                     e G d de
                      ZdS )	    N)	dataclassfield   )TrainingArguments)cached_propertyis_sagemaker_dp_enabledloggingc                     t          j        dd          } 	 t          j        |           } d| vrdS n# t          j        $ r Y dS w xY wt          j        dd          }	 t          j        |          }|                    dd          sdS n# t          j        $ r Y dS w xY wt          j                            d          d uS )NSM_HP_MP_PARAMETERSz{}
partitionsFSM_FRAMEWORK_PARAMSsagemaker_mpi_enabledsmdistributed)	osgetenvjsonloadsJSONDecodeErrorget	importlibutil	find_spec)smp_optionsmpi_optionss     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/sagemaker/training_args_sm.py%is_sagemaker_model_parallel_availabler       s    )1488Kj--{**5 +   uu )1488Kj--6>> 	5	   uu >##O44D@@s!   2 AA*B BBc                        e Zd ZU  edddi          Zeed<    fdZedd	            Z	e
 fd
            Ze
d             Ze
d             Z xZS )SageMakerTrainingArguments helpzTUsed by the SageMaker launcher to send mp-specific args. Ignored in SageMakerTrainer)defaultmetadatamp_parametersc                 |    t                                                       t          j        dt                     d S )Nz~`SageMakerTrainingArguments` is deprecated and will be removed in v5 of Transformers. You can use `TrainingArguments` instead.)super__post_init__warningswarnFutureWarningself	__class__s    r   r&   z(SageMakerTrainingArguments.__post_init__E   s?    +	
 	
 	
 	
 	
    returntorch.devicec                    t                               d           t          j                                        rCt          j                                        r%| j        dk    rt                               d           | j        rt          j	        d          }d| _
        nt                      r1t          j                    }t          j	        d|          }d| _
        nHt                      rrdd l}t          j                            d| j        	           t#          t%          j        d
                    | _        t          j	        d| j                  }d| _
        n| j        dk    rXt          j	        t          j                                        rdnd          }t          j                                        | _
        net          j                                        s&t          j                            d| j        	           t          j	        d| j                  }d| _
        |j        dk    rt          j                            |           |S )NzPyTorch: setting up devicesztorch.distributed process group is initialized, but local_rank == -1. In order to use Torch DDP, launch your script with `python -m torch.distributed.launchcpur   cuda   smddp)backendtimeoutSMDATAPARALLEL_LOCAL_RANKzcuda:0nccl)loggerinfotorchdistributedis_availableis_initialized
local_rankwarningno_cudadevice_n_gpur   smpr   ,smdistributed.dataparallel.torch.torch_smddpinit_process_groupddp_timeout_deltaintr   r   r3   device_counttype
set_device)r+   rC   r@   r   s       r   _setup_devicesz)SageMakerTrainingArguments._setup_devicesM   s   1222))++ 	0A0P0P0R0R 	W[WfjlWlWlNNi   < 	\%((FDKK244 	))J\&*55FDKK$&& 	????00$J`0aaa!"),G"H"HIIDO\&$/::FDKK_"" \ej.E.E.G.G"R((USSF  *1133DKK $3355 e!44VTMc4ddd\&$/::FDK;&  J!!&)))r-   c                 l    t                      rt          j                    S t                      j        S N)r   rE   dp_sizer%   
world_sizer*   s    r   rQ   z%SageMakerTrainingArguments.world_size{   s*    022 	!;== ww!!r-   c                      t                       S rO   )r   r+   s    r   place_model_on_devicez0SageMakerTrainingArguments.place_model_on_device   s    8::::r-   c                     dS )NF rS   s    r   !_no_sync_in_gradient_accumulationz<SageMakerTrainingArguments._no_sync_in_gradient_accumulation   s    ur-   )r.   r/   )__name__
__module____qualname__r   r#   str__annotations__r&   r   rM   propertyrQ   rT   rW   __classcell__)r,   s   @r   r   r   >   s         pq  M3   

 
 
 
 
 + + + _+Z " " " " X" ; ; X;   X    r-   r   )importlib.utilr   r   r   r'   dataclassesr   r   r<   training_argsr   utilsr   r   r	   
get_loggerrX   r:   r   !smdistributed.modelparallel.torchmodelparallelrE   initr   rV   r-   r   <module>rg      s+        				  ( ( ( ( ( ( ( (  - - - - - - E E E E E E E E E E 
	H	%	%
A A A0 )(** 333333333CHJJJ I I I I I!2 I I I I Ir-   