
    g/                         d dl Z ddlmZmZmZmZmZmZmZm	Z	m
Z
mZmZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZmZmZmZmZmZmZmZ d Z dS )	    N   )ComputeEnvironmentDistributedTypeis_deepspeed_availableis_fp8_availableis_mlu_availableis_mps_availableis_msamp_availableis_musa_availableis_npu_availableis_transformer_engine_availableis_transformers_availableis_xpu_available)DEEPSPEED_MULTINODE_LAUNCHERSFSDP_AUTO_WRAP_POLICYFSDP_BACKWARD_PREFETCHFSDP_SHARDING_STRATEGYFSDP_STATE_DICT_TYPETORCH_DYNAMO_MODES   )ClusterConfig)DYNAMO_BACKENDS
_ask_field_ask_options_convert_distributed_mode_convert_dynamo_backend_convert_fp8_backend_convert_mixed_precision_convert_yes_no_to_boolc                    9 t          dg dt                    } d}d}d}d }d }d }d}d}d}	| t          j        t          j        t          j        t          j        t          j        t          j        fv rt          dt          d	          }|dk    rt          d
t          t          |                    t                    }t          d          }t          dt                    }t          dt          dd          }|st          dd	          }t          dt          dd          }	| t          j        k    rt          dt          dd          }
n| t          j        k    rd}
nd}
i }i }|
rt          dt          dd          |d<   | t          j        k    ryt          dt          dd          }|r_t          dt          d	          }t           j                            |                                          |d<   t          dd	          |d<   |
sdt)                      rV| t          j        t          j        t          j        t          j        t          j        fvrt          dt          dd          |d<   i }t          dt          dd          }|rd}t          dd  t,          D             t.          d!	          ||d"z   <   t          d#t          dd          }|r[t          d$t0          d% d	          ||d&z   <   t          d't          dd          ||d(z   <   t          d)t          dd          ||d*z   <   |
 ot3                      }i }| t          j        t          j        t          j        t          j        t          j        t          j        fv r|st          d+t          dd          }|r$t          j        } t7                      s
J d,            | t          j        k    ryt          d-t          dd          }|rt          d.t          d/	          |d0<   nt          d1g d2t          d!	          |d3<   g d49|d3         d!k    rzt          d599fd6          |d7<   t          d899fd9          |d:<   |d:         d;k    rt          d<t          d=	          |d><   |d7         d;k    rt          d?t          d=	          |d@<   t          dAt          d	          |dB<   t          dCt          dd          }|rt          dDt8          dE	          |dF<   |d3         dGk    rt          dHt          dd          |dI<   t          dJt          dd          |dK<   |dK         rt;                      st=          dL          t          dMt          dd          }|rt          dNt                    |dO<   |dk    rdP}t          |t>          dQ           |dR<   |dR         t>          d         k    r|t          dSt                    |dT<   t          dUt          dd          }|rt          dVt                    |dW<   t          dXt          dd          }|rt          dYt                    |dZ<   i }| t          j        t          j        t          j        t          j        t          j        fv rt          d[t          dd          }|rt          j         } | t          j         k    rd\}t          |tB          d]           |d^<   t          d_t          dd          |d`<   da}t          |tD          db           |dc<   |dc         tD          d         k    r3t          ddt          dd          } | st          det                    |df<   n1|dc         tD          d         k    rt          dgt          dh	          |di<   dj}!t          |!tF          dk           |dl<   dm}"t          |"tH          dn d!	          |do<   t          dpt          dd          |dq<   t          drt          dd          |ds<   t          dtt          dd          |du<   |du         rd|dv<   nt          dwt          dd          |dv<   t          dxt          dd          |dy<   i }#| t          j        fv r't          dzt          dd          }$|$rt          j%        } | t          j%        k    rd{}t          d|t          dd}          |#|d~z   <   |#|d~z            dk    rt          dt          dd          |#|dz   <   t          dt          dd}          |#|dz   <   |#|dz            dk    rt          dt          dd}          |#|dz   <   t          dt          dd          |#|dz   <   t          dt          dd          |#|dz   <   t          dt8          dE	          |#|dFz   <   d }%d }&d}'g }(d })d }*d }+d},d}-| t          j        t          j        t          j        t          j        t          j        t          j        t          j        fv rst          |           &                    d          d         '                    dd          }.|.dk    r|.dz  }.n|.dk    rd}.n|.dz  }.t          d|. dt          dd}          }n@| t          j         t          j        t          j%        fv rt          dt          dd}          }nd}| t          j        k    r|dk    r|dk    rtQ          d|  d          | t          j        t          j        t          j        t          j        t          j        t          j        fv rN|
sL|sJtS                      rd}.n$tU                      rd}.ntW                      rd}.nd}.t          d|. dd	          }d}/| t          j        t          j        fv r|
s|st          dt          dd          }/d }0| t          j        k    rhd}1t          dd	          }2t          dt          dd          }-|-r8t          dd d          })t          dd d          }+t          ddd          },t          dt          dd          }3|3rt          dt          dd          }4|4r2t          dd d          }&t           j        ,                    |&          }&nTt[          d           g }%d}5|5r?|%.                    t          dd d                     t          dt          dd          }5|5?t          dd	          &                    d          }*t          dd	          &                    d          }(nd}2| t          j        k    r|rd }1nt          dg dt^                    }1|1dk    rta                      stQ          d          i }0t          dddgtb                    |0d"<   |0d"         dk    r'te                      stQ          d          t          dt          d	          |0d<   t          dt          d	          |0d<   t          dt          d	          |0d<   t          dddgdń d	          |0d<   t          dt          dȬ	          |0d<   t          dddgd̈́ d	          |0d<   t          dt          d	          |0d<   |0d         rMt          dt          d	          }6t          dt          d	          }7t          dt          d	          }8|6|7|8f|0d<   nB|0d"         dk    r6tg                      stQ          dզ          t          dddgdل d	          |0d<   |r|1dk    r|
st[          dۦ           | t          j        k    r|1dk    rt          dd	          }'ti          di dtj          j6        d| d|d|d|1d|'d|d|d|d|d|2d|0d|d|d|#d|d|d|
d|d|d|%d|&d|(d|)d|*d|+d|,d|-d|d|	d|/S )Nz$Which type of machine are you using?)zNo distributed trainingz	multi-CPUz	multi-XPUz	multi-GPUz	multi-NPUz	multi-MLUz
multi-MUSATPUr   r   staticTFzYHow many different machines will you use (use more than 1 for multi-node training)? [1]: )defaultz!What is the rank of this machine?zGWhat is the IP address of the machine that will host the main process? zDWhat is the port you will use to communicate with the main process? zAre all the machines on the same local network? Answer `no` if nodes are on the cloud and/or on different network hosts [YES/no]: zPlease enter yes or no.)r#   error_messagez?What rendezvous backend will you use? ('static', 'c10d', ...): zShould distributed operations be checked while running for errors? This can avoid timeout issues but will be slower. [yes/NO]: zxDo you want to run your training on CPU only (even if a GPU / Apple Silicon / Ascend NPU device is available)? [yes/NO]:zXDo you want to use Intel PyTorch Extension (IPEX) to speed up training on CPU? [yes/NO]:ipexz3Do you want accelerate to launch mpirun? [yes/NO]: zGPlease enter the path to the hostfile to use with mpirun [~/hostfile]: z
~/hostfilempirun_hostfilez/Enter the number of oneCCL worker threads [1]: 
mpirun_cclzDDo you want to use XPU plugin to speed up training on XPU? [yes/NO]:use_xpuz?Do you wish to optimize your script with torch dynamo?[yes/NO]:dynamo_z+Which dynamo backend would you like to use?c                 6    g | ]}|                                 S  )lower).0xs     ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/accelerate/commands/config/cluster.py
<listcomp>z%get_cluster_input.<locals>.<listcomp>   s     0001QWWYY000       backendzGDo you want to customize the defaults sent to torch.compile? [yes/NO]: zWhich mode do you want to use?c                 6    t           t          |                    S N)r   intr.   s    r/   <lambda>z#get_cluster_input.<locals>.<lambda>   s    ,SVV4 r1   modez\Do you want the fullgraph mode or it is ok to break model into several subgraphs? [yes/NO]: use_fullgraphz7Do you want to enable dynamic shape tracing? [yes/NO]: use_dynamicz(Do you want to use DeepSpeed? [yes/NO]: zRDeepSpeed is not installed => run `pip3 install deepspeed` or build it from sourcezDDo you want to specify a json file to a DeepSpeed config? [yes/NO]: z9Please enter the path to the json DeepSpeed config file: nonedeepspeed_config_filez8What should be your DeepSpeed's ZeRO optimization stage?)r   r   r2   r   
zero_stage)r<   cpunvmez"Where to offload optimizer states?c                 .    t          |                    S r5   r6   r.   deepspeed_devicess    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    [lmpqrmsms[t r1   offload_optimizer_devicezWhere to offload parameters?c                 .    t          |                    S r5   rB   rC   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    UfgjklgmgmUn r1   offload_param_devicer@   z Nvme Path to offload parameters?z/nvmeoffload_param_nvme_pathz&Nvme Path to offload optimizer states?offload_optimizer_nvme_pathzIHow many gradient accumulation steps you're passing in your script? [1]: gradient_accumulation_stepsz0Do you want to use gradient clipping? [yes/NO]: z,What is the gradient clipping value? [1.0]: g      ?gradient_clippingr   zLDo you want to save 16-bit model weights when using ZeRO Stage-3? [yes/NO]: zero3_save_16bit_modelzoDo you want to enable `deepspeed.zero.Init` when using ZeRO Stage-3 for constructing massive models? [yes/NO]: zero3_init_flagzpWhen `zero3_init_flag` is set, it requires Transformers to be installed. Please run `pip3 install transformers`.zCDo you want to enable Mixture-of-Experts training (MoE)? [yes/NO]: zSpecify the comma-separated list of transformers MoE layer class names (case-sensitive), e.g :  `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ... : deepspeed_moe_layer_cls_namesz*Which Type of launcher do you want to use?c                 6    t           t          |                    S r5   )r   r6   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>U  s    ;CFFC r1   deepspeed_multinode_launcheraE  DeepSpeed configures multi-node compute resources with hostfile. Each row is of the format `hostname slots=[num_gpus]`, e.g., `localhost slots=2`; for more information please refer official [documentation](https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node). Please specify the location of hostfile: deepspeed_hostfilez:Do you want to specify exclusion filter string? [yes/NO]: z#DeepSpeed exclusion filter string: deepspeed_exclusion_filterz:Do you want to specify inclusion filter string? [yes/NO]: z#DeepSpeed inclusion filter string: deepspeed_inclusion_filterz7Do you want to use FullyShardedDataParallel? [yes/NO]: z&What should be your sharding strategy?c                 6    t           t          |                    S r5   )r   r6   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>      0Q8 r1   fsdp_sharding_strategyzBDo you want to offload parameters and gradients to CPU? [yes/NO]: fsdp_offload_paramsz%What should be your auto wrap policy?c                 6    t           t          |                    S r5   )r   r6   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    /A7 r1   fsdp_auto_wrap_policyul   Do you want to use the model's `_no_split_modules` to wrap. Only applicable for 🤗 Transformers [yes/NO]: zSpecify the comma-separated list of transformer layer class names (case-sensitive) to wrap ,e.g, :`BertLayer`, `GPTJBlock`, `T5Block`, `BertLayer,BertEmbeddings,BertSelfOutput` ...? : "fsdp_transformer_layer_cls_to_wrapzaWhat should be your FSDP's minimum number of parameters for Default Auto Wrapping Policy? [1e8]: i fsdp_min_num_paramsz4What should be your FSDP's backward prefetch policy?c                 6    t           t          |                    S r5   )r   r6   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  rU   r1   fsdp_backward_prefetchz+What should be your FSDP's state dict type?c                 6    t           t          |                    S r5   )r   r6   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    .s1vv6 r1   fsdp_state_dict_typez@Do you want to enable FSDP's forward prefetch policy? [yes/NO]: fsdp_forward_prefetchzBDo you want to enable FSDP's `use_orig_params` feature? [YES/no]: fsdp_use_orig_paramsuo   Do you want to enable CPU RAM efficient model loading? Only applicable for 🤗 Transformers models. [YES/no]: fsdp_cpu_ram_efficient_loadingfsdp_sync_module_stateszsDo you want each individually wrapped FSDP unit to broadcast module parameters from rank 0 at the start? [YES/no]: z?Do you want to enable FSDP activation checkpointing? [yes/NO]: fsdp_activation_checkpointingz+Do you want to use Megatron-LM ? [yes/NO]: megatron_lm_z0What is the Tensor Parallelism degree/size? [1]:zPlease enter an integer.	tp_degreez6Do you want to enable Sequence Parallelism? [YES/no]: sequence_parallelismz2What is the Pipeline Parallelism degree/size? [1]:	pp_degreez)What is the number of micro-batches? [1]:num_micro_batcheszDDo you want to enable selective activation recomputation? [YES/no]: recompute_activationszzDo you want to use distributed optimizer which shards optimizer state and gradients across data parallel ranks? [YES/no]: use_distributed_optimizerzSWhat is the gradient clipping value based on global L2 Norm (0 to disable)? [1.0]: no.MULTI_ r!   z coresCPU	processesz(s)z	How many z. should be used for distributed training? [1]:z=How many GPU(s) should be used for distributed training? [1]:zSpecified distributed type zy but only using 1 GPU on a single machine. Please select `No distributed training` for the type of machine you are using.zNPU(s)zMLU(s)zMUSA(s)zGPU(s)zWhat zV (by id) should be used for training on this machine as a comma-seperated list? [all]:allzcWould you like to enable numa efficiency? (Currently only supported on NVIDIA hardware). [yes/NO]: ziWhat is the name of the function in your script that should be launched in all parallel scripts? [main]: mainz'Are you using a TPU cluster? [yes/NO]: z&What is the name of your TPU cluster? z*Please enter the name of your TPU cluster.z&What is the zone of your TPU cluster? z*Please enter the zone of your TPU cluster.zFTo run a python script in a TPU pod, should `sudo` be used? [yes/NO]: zCDo you have code you wish to run on startup in each pod? [yes/NO]: z1Is this code located in a bash script? [yes/NO]: z&What is the path to your bash script? z*Please enter the path to your bash script.zLPlease enter each command seperately you wish to run on startup in each pod.z(Please enter a single command to be ran zTPlease enter the commands you wish to run on startup in each pod as a single string.z.Do you wish to add another command? [yes/NO]: zqIf not using an instance group, what are the names of the Compute VM instances to be used, seperated by a comma: ,zQWhat environment variables do you wish to set in each pod, seperated by a comma: z#Do you wish to use mixed precision?)rl   fp16bf16fp8rw   zJFP8 (either Transformer Engine or MSAMP) is not installed on this machine.z%Which FP8 backend do you want to use?temsampTEzITransformersEngine was selected, but it is not installed on this machine.zuDo you want to use FP8 autocast during eval mode? Generally better metrics are found when this is disabled [yes/NO]: use_autocast_during_evalz6What margin should be used for gradient scaling? [0]: marginzVWhat interval should be used for for how often the scaling factor is recomputed? [1]: intervalz#Which weight format should be used?HYBRIDE4M3c                     | dk    rdndS )Nr   r~   r   r+   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    a1ff((& r1   
fp8_formatzWWhat length of history should be used for the amax scaling factor computation? [1024]: i   amax_history_lengthzGWhich algorithm should be used for the amax scaling factor computation?maxmost_recentc                     | dk    rdndS )Nr   r   r   r+   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    166%%} r1   amax_compute_algorithmz]Do you want to to execute `fprop`, `dgrad`, and `wgrad` GEMMS in higher precision? [yes/NO]: override_linear_precisionz:Should `fprop` be executed in higher precision? [yes/NO]: z:Should `dgrad` be executed in higher precision? [yes/NO]: z:Should `wgrad` be executed in higher precision? [yes/NO]: MSAMPz<MSAMP was selected, but it is not installed on this machine.z(Which optimization level should be used?O1O2c                     | dk    rdndS )Nr   r   r   r+   r7   s    r/   r8   z#get_cluster_input.<locals>.<lambda>  s    !q&&$$d r1   optimization_levelzTorch dynamo used without mixed precision requires TF32 to be efficient. Accelerate will enable it by default when launching your scripts.rv   zWShould `torch.float` be cast as `bfloat16` and `torch.double` remain `float32` on TPUs?compute_environmentdistributed_typenum_processesgpu_idsmixed_precisiondowncast_bf16machine_ranknum_machinesmain_process_ipmain_process_portmain_training_function
fp8_configdeepspeed_configfsdp_configmegatron_lm_configipex_configmpirun_configuse_cpurdzv_backendsame_networkcommandscommand_filetpu_envtpu_nametpu_vmtpu_zonetpu_use_sudotpu_use_clusterdynamo_configdebugenable_cpu_affinityr+   )7r   r   r   	MULTI_GPU	MULTI_MLU
MULTI_MUSA	MULTI_NPU	MULTI_XPU	MULTI_CPUr   r6   listranger   NOstrospath
expanduserstripr   XLAr   r   r   r	   	DEEPSPEEDr   floatr   	Exceptionr   FSDPr   r   r   r   MEGATRON_LMsplitreplace
ValueErrorr   r   r   abspathprintappendr   r   r   r   r
   r   r   LOCAL_MACHINE):r   r   r   r   r   r   r   r   r   r   r   r   r   
use_mpirunr&   r   
use_dynamoprefixuse_custom_optionsuse_mpsr   use_deepspeeduse_deepspeed_configuse_gradient_clippinguse_moelauncher_queryis_exclusion_filteris_inclusion_filterr   use_fsdpsharding_strategy_queryfsdp_wrap_queryuse_no_split_modulesfsdp_backward_prefetch_queryfsdp_state_dict_type_queryr   use_megatron_lmtpu_commandstpu_command_filetpu_downcast_bf16r   r   r   r   r   r   machine_typer   r   r   r   run_commandsuse_command_fileanother_commandfpropdgradwgradrD   s:                                                            @r/   get_cluster_inputr   6   s   #.		
 		
 		
 	"  LLMGOLLE!!"!!!   "g
 
 

 !'3U<(()) L
 )Y O !+V! ! & U'7	  L   )U_g       N#3	
 
 
 ?--- G#3	
 
 
 
_6	6	6KM w(f#3	
 
 
F 888#E'7	  J  w",](# # #
 4673E3EoF[F[F]F]3^3^/0.89jtu.v.v.vl+

 %%%&

 
 ",R#3	"
 "
 "
I MI/	  J  !,8900000#	-
 -
 -
fy() (U#3	
 
 
  	-90"44	. . .M&6/* 7An'7	7 7 7M&?23 5?I'7	5 5 5M&=01 k0.00G%%%%&
	
 	
 	
 #6#3	
 
 
  	d.8&((d dcd d d 888#-V'7	$ $ $  $ 9<FO"= = = !899 2>N LL	2 2 2 . %<$;$;!#L1Q66CO<>OQtQtQtQtD D$%?@ @L68IKnKnKnKn@ @$%;< ((>?6IIFP>$+G G G()BC
 ((BCvMMJTD$+K K K()FG
 CM_C C C !>?
 )3F+!";	) ) )% ) <FF #= = =$%89
 $L1Q66AKf/ %&?	B B B$%=> 3= B'7	3 3 3./   12 022 #B   !U'7	  G  DNoE E !@A a!MCO"1CCD D !?@ $$BCGdefGggg=GD
 > >$%9: +5T/ %&?	+ + +' + ISAJ J()EF
 +5T/ %&?	+ + +' + ISAJ J()EF
 K!!!"!   E#3	
 
 
  	4.3333&N#4@'&885 5K01
 2<T'7	2 2 2K-. FO3?%774 4K/0
 237LQ7OOO'1 C+!";	( ( ($ , HRqI IK DE
 459Nq9QQQ5?w%6 6 612
 ,b(4@,&885 5K01
 *W&2>*$66	3 3 3K./ 4>R'7	4 4 4K/0 3=T'7	3 3 3K./ =G B'7	= = =K89 ;< 9=5669C J+ ";	: : :56 <FQ'7	< < <K78 O5666$9#3	
 
 
  	;.::::#F7AB8	8 8 8v34 "&;"67!;;FPL+ ";	G G G"6,B#BC 8BD8	8 8 8v34 "&;"67!;;CM?"<	D D D"6,?#?@ DNV'7	D D Dv(??@ HRd'7H H Hv(CCD @Je@ @ @v(;;< LGHFHLO!!!!"!   +,,22377:BB8RPP5  H$LLU""&LLE!L"TTTT4	
 
 
 
o2O4MOjk	k	k"K4	
 
 
 O555LA<M<MTaefTfTf v*:  v  v  v
 
 	

 	%%&%%
	
 	
 	
 	
  	$#LL 	$#LL   	$$LL#LxLxxx
 
 
  O.0IJJJSZJcjJ(q#3	
 
 
 J?...!+w"
 "
 "
 %5#3	
 
 
  =	!8J  H
 "8J  H
 &X7  L
 &U'7	  L  #-G+!";	$ $ $  $ '1@ $&R( ( ($
 (*w7G'H'H$$hiii#%L&*O) $++& J(, /E     +5L3$)*C	+ + + *    D   eCjj  !c   eCjj  "(888=Q8"OO*5---( O
 %''')) s$%qrrr
(4;7O() )
9%
 i(D00:<< v()tuuu=G P/ %> > >J9:
 ,6P !, , ,Jx(
 .8p !. . .Jz*
 0<=!6*@@ !	0 0 0J|, 9Cq $9 9 9J45
 <Ha.DD !	< < <J78 ?Iw/ %? ? ?J:;
 ""=> X *X3$)! ! !
 !+X3$)! ! !
 !+X3$)! ! !
 DI%QVBW
#>?	*g55-// i()ghhh7CBt:: !	8 8 8J34  
o--g- Y	
 	
 	
 ?...?f3L3L&eos
 
 
       .<< ))  $m  	 
 (  ('  "\  "\  (  ,+   65  :  *)   K  .-    K! " $m# $ % & "\' ( "\) * + , &%- . / 0 1 2 v3 4 5 6 "\7 8 (9 : $m; < e= > 0/?   r1   )!r   utilsr   r   r   r   r   r	   r
   r   r   r   r   r   utils.constantsr   r   r   r   r   r   config_argsr   config_utilsr   r   r   r   r   r   r   r   r   r+   r1   r/   <module>r      s  " 
			                                           ' & & & & &	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	F F F F Fr1   