
    קg              
       f   U d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ ddgZ e            s:d dlZ G d	 d
          Zd Zeej        d         _        eej        d         _        dS d dlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%  e j&        e'          Z(er(	 d dl)m*Z* n # e+$ r e(,                    d           Y nw xY w G d dej-                  Z. e.            Z/e.e0d<   dde1fdZ2 G d d          Zddde1dee3df         de
ee1df                  defdZdS )    N)reduce)chain)DictListOptionalTupleTYPE_CHECKINGUnion)is_available)not_noneinit_device_mesh
DeviceMeshc                       e Zd ZdS )_DeviceMeshStubN)__name__
__module____qualname__     Y/var/www/html/ai-engine/env/lib/python3.11/site-packages/torch/distributed/device_mesh.pyr   r      s        r   r   c                      d S Nr   r   r   r   _init_device_mesh_stubr      s    r   ztorch.distributed.device_mesh)_find_pg_by_ranks_and_tag_get_default_group_get_group_tagget_backendget_process_group_ranksget_rankget_world_sizeinit_process_groupis_initialized	new_groupProcessGroup)	ArrayLikezCDeviceMesh requires numpy >= 1.21 to be installed for type checkingc            	       x   e Zd ZddZddZdddeedf         d	eeedf                  ddfd
Z		 dddde
e         ddfdZddZddde
e         fdZededefd            Zededefd            ZdddedefdZ	 ddedede
ej                 ddfdZdeeedf                  fdZdddeded         fdZdS )_MeshEnvreturnNc                 L    g | _         i | _        i | _        i | _        i | _        d S r   )
mesh_stackchild_to_root_mappingmesh_dim_group_optionsroot_to_flatten_mappingflatten_name_to_root_dimsselfs    r   __init__z_MeshEnv.__init__A   s=    02DOGID&  ' UWD(  ***r   r   c                 j    t          | j                  dk    rt          d          | j        d         S )Nr   z#No device mesh is currently active!)lenr*   RuntimeErrorr/   s    r   get_current_meshz_MeshEnv.get_current_meshM   s3    4?##q(("#HIII?2&&r   device_meshsubmesh_dim_names.submesh_dimsc                 p   fd|D             }j         }g }g }d}t          ||          D ]\  }	}
t          |	          dk    r|                    |	d         |z
  |	d         |z
            }|                    |	d         |z
             |t          |	          dz
  z  }|                    | j                 |
         j        d                    |                    |	d         |z
             |                    j        |	d                             t          t          |j	                            }|D ]}|
                    |             |j        g ||R  j        dg|R  }                                }|D ] }t          j        ||d          }||v r|}!||_        | j        |<   |S )Nc                     g | ]H}t          |          d k    rt          fd|          nj                            |d                   IS )   c                 n    j                             |           j                             |          z  S r   )meshsize)xyr7   s     r   <lambda>z5_MeshEnv.create_sub_mesh.<locals>.<listcomp>.<lambda>^   s/    !1!6!6q!9!9K<L<Q<QRS<T<T!T r   r   )r4   r   r>   r?   ).0mesh_dimr7   s     r   
<listcomp>z,_MeshEnv.create_sub_mesh.<locals>.<listcomp>\   sv         x==1$$	 TTTT  
 !%**8A;77  r   r   r<   r3   )	start_dimend_dimFmesh_dim_names_init_backend)r>   zipr4   flattenappendr-   _dim_group_infoslistrangendimremovepermutereshaper   r   device_typer+   )r0   r7   r8   r9   slice_dim_sizemesh_tensorslice_dim_idxslice_dim_group_infonum_dims_flattenmesh_dim_indicesmesh_dim_namemesh_dims_remained_idxidxpg_ranks_by_dimcur_rankmesh_ndsubmeshres_submeshs    `                r   create_sub_meshz_MeshEnv.create_sub_meshR   sy       !-  N &*KM#%   !36|EV3W3W  / - '((1,,"-"5"5"21"58H"H 0 47G G #6 # #K "(()9!)<?O)OPPP$,<(=(=(AA$(//4[A)*1.    "(()9!)<?O)OPPP(//#45Ea5HI   
 &*%0@*A*A%B%B"$ 3 3&--c2222
1k1 '*7  b+)+ + +O #++--H* * *$+#4"'	   w&&")K+?K(6AD&{3r   r\   c                 <   t                               |          fdt          |j                  D             }|s!d                    fd|D                       }| j                            i            t          g t          t          j                            | j                 	                                R  }||v rt          | d dd| d          | j        v r"|| j                 v r| j                 |         S t          j        |j                                                  }t          t!          j        j                            }|D ]}|                    |            j        j        g ||R                      d|          }                                }	|D ] }
t-          j        |
|f	          }|	|
v r|}!| j        |<   || j                            i           |<   t3          |          | j                 |<   |S )
Nc                 ^    g | ])}t          j                                      |          *S r   )r   rI   index)rC   flattened_mesh_dim_name	root_meshs     r   rE   z0_MeshEnv.create_flatten_mesh.<locals>.<listcomp>   sC     $ $ $+ 122889PQQ$ $ $r   _c                 D    g | ]}t          j                  |         S r   )r   rI   )rC   dimri   s     r   rE   z0_MeshEnv.create_flatten_mesh.<locals>.<listcomp>   s9        !!9::3?  r   z# already exists for submesh of the . z5The mesh_dim_names of submesh and flattened mesh are z-. Please specify another valid mesh_dim_name.r3   rI   )_mesh_resourcesget_root_meshr   rI   joinr.   
setdefaultr   rO   keysr5   r-   mathprodr>   r?   rP   rQ   rR   rS   rT   r   r   rU   r+   tuple)r0   r7   r\   flatten_dims_in_rootinvalid_dim_namesflattened_mesh_dim_sizeremained_dims_in_rootflatten_dim_in_rootr_   r`   ra   flattened_meshres_flattened_meshri   s                @r   create_flatten_meshz_MeshEnv.create_flatten_mesh   s    (55kBBI$ $ $ $/78R/S/S$ $ $ 
 !  #   #7  ! ! *55iDDD % !hy78899!/	:??AA! ! !  111"$VVVVVCL] C C C   T999!T%A)%LLL3I>}MM&*i0@0E0E0G0G&H&H#$(y~/B)C)C$D$D!'; B B#%,,-@AAAA4in4 &)=  gb122  !))++H* 8 8!+)$1#3" " "
 w&&)7&=FD&'9:TfD(33IrBB=QGLMaGbGbD*95mD%%r   c                 D    | j                             |d           }|s|n|S r   )r+   get)r0   r7   ri   s      r   rp   z_MeshEnv.get_root_mesh   s+     266{DIII&/>;;Y>r   c                     |                      |          }|j        }|r=|r;t          |          dk    s
J d            |d         }|                     ||          S dS )z
            Returns the index of the mesh dim in the root mesh.
            The device_mesh passed in needs to be sliced out from the root mesh
            or submesh of the root mesh.
            r<   z"The submesh can only be a 1D mesh.r   N)rp   rI   r4   get_mesh_dim_by_name)r0   r7   ri   child_mesh_dim_nameschild_mesh_dim_names        r   get_root_mesh_dimz_MeshEnv.get_root_mesh_dim   s~     **;77I#.#=  Q1 Q,--2227 322&:1&=#00<OPPP4r   rU   c                 D    t          |                                           S r   )_get_device_handledevice_countrU   s    r   num_devices_per_hostz_MeshEnv.num_devices_per_host   s    %k22??AAAr   c                 T    t                      t                              |           z  S r   )r    r'   r   r   s    r   	num_hostsz_MeshEnv.num_hosts   s#     "##x'D'D['Q'QQQr   c                     |j         t          |j                   dk    rt          d          ||j         vrt          d| dd|j                    t          |j                             |                    S )Nr   zNo `mesh_dim_names` found.zMesh dimension 'z' does not exist.z.Available mesh dimensions are: mesh_dim_names=)rI   r4   KeyErrorr   rg   )r0   r7   r\   s      r   r   z_MeshEnv.get_mesh_dim_by_name  s     *2{122a770   K$>>>G}GGGa[E_aa   K6<<]KKLLLr   rl   backend
pg_optionsc                     ||f| j         |<   d S r   )r,   )r0   rl   r   r   s       r   _set_mesh_dim_group_optionsz$_MeshEnv._set_mesh_dim_group_options  s     18/DD',,,r   c                 (  	 ||                      |          k    rt          d          | j                            |i            | j        |         }g |j        |	t          	fd|D                       st          d| d	 d          d}g }|D ]}}||v r&||         }|d         }|                    |           n0|j                            |          }|                    |f           ||k    rt          d| dd| d	d
          |}~|S )z
            Validate whether the mesh_dim_names is valid for slicing the given device_mesh.
            If valid, return dim indexes of the slice mesh in the device mesh.
            z'Cannot create a submesh from a submesh.c              3       K   | ]}|v V  	d S r   r   )rC   r\   valid_mesh_dim_namess     r   	<genexpr>z0_MeshEnv._get_slice_mesh_dims.<locals>.<genexpr>1  s=        ! !55     r   zInvalid mesh_dim_names z% specified. Valid mesh_dim_names are .r3   z specified. z!Found mesh dim indices to slice: rm   z.Mesh dim indices should be in ascending order.)	rp   r5   r.   rr   rI   allr   rM   rg   )
r0   r7   rI   r.   curr_idxslice_mesh_dimsr\   mesh_indicesnext_idxr   s
            @r   _get_slice_mesh_dimsz_MeshEnv._get_slice_mesh_dims  s    d00===="#LMMM *55k2FFF(,(F{(S%$+$*$ 
     %3      Hn H H0DH H H   H O!/ $ $ $===#<]#KL  ,B/H#**<8888*9??NNH#**H;777x''"N.NNNOOOOOH  
 $""r   c                    |                      ||          }|j                            d|                              d|j                            |                    }|                                }g }|D ]I}t          |j        ||fd          }||v r|j        |         gng |_        |	                    |           J|S )z`
            Return all the submeshes of a given mesh dimension of the device mesh.
            r3   FrH   )
r   r>   swapdimsrT   r?   r   r   rU   rN   rM   )	r0   r7   r\   rD   r_   r`   res_submeshesmesh_1drb   s	            r   _get_all_submeshesz_MeshEnv._get_all_submeshesR  s     00mLLH).77HEEMMK$))(33 O #++--HM* . .$+$1#3"'	    7** !1(;<< (
 $$W----  r   r(   Nr(   r   r   )r7   r   r(   r   )r   r   r   r1   r6   r   strr   intrd   r   r~   rp   r   staticmethodr   r   r   r$   Optionsr   r   r   r   r   r   r'   r'   @   s^       
	 
	 
	 
		' 	' 	' 	'
N	%N	  %S#XN	 uS#X/	N	
 N	 N	 N	 N	b MQ@	& @	&+@	&<DSM@	&@	& @	& @	& @	&D	? 	? 	? 	?	 	(3- 	 	 	 	  
	Bc 	Bc 	B 	B 	B 
	B 
	R3 	R3 	R 	R 	R 
	R
	M+	M<?	M	M 	M 	M 	M* :>		E 	E	E 	E !!56		E
 	E 	E 	E 	E2	#%S/"2	# 2	# 2	# 2	#h	!+	!<?	!,	! 	! 	! 	! 	! 	!r   r'   ro   cudarU   c                 .    t          t          | d          S )a:  
        Get the module corresponding to the device_type which is cuda or cuda-like device.
        For example, when the device_type is cuda, the module `torch.cuda` is returned.
        Return None when there is no corresponding module for device_type, otherwise
        return the corresponding module.
        N)getattrtorchr   s    r   r   r   q  s     uk4000r   c                      e Zd ZU dZeed<   ej        ed<   ee	edf                  ed<   ddddede
ej        d	f         dee	edf                  d
eddf
dZd Zd Zd$dZd%dZdefdZd ZdedefdZde
ee	edf         f         dd fdZd&dee
eef                  defdZdee         fdZe	 d&ddde
eee         f         dedee
ej        d	f                  dee	edf                  dd f
d            Zd&dee         defdZedefd            Zede	edf         fd            Z defdZ!d&dee
eef                  defd Z"deee                  fd!Z#d&d"ee         dd fd#Z$dS )'r   a  
        DeviceMesh represents a mesh of devices, where layout of devices could be
        represented as a n-d dimension array, and each value of the n-d dimensional
        array is the global id of the default process group ranks.

        DeviceMesh could be used to describe the layout of devices across the cluster,
        and serves as a proxy for communication among the device lists within the cluster.

        DeviceMesh can be used as a context manager.

        .. note::
            DeviceMesh follows SPMD programming model, which means the same PyTorch Python program
            is running on all processes/ranks in the cluster. Therefore, users need to make sure the
            `mesh` array (which describes the layout of devices) should be identical across all ranks.
            Inconsistent `mesh` will lead to silent hang.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
            mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout
                of devices, where the IDs are global IDs of the default process group.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        The following program runs on each process/rank in an SPMD manner. In this example, we have 2
        hosts with 4 GPUs each.
        A reduction over the first dimension of mesh will reduce across
        columns (0, 4), .. and (3, 7), a reduction over the second dimension
        of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import DeviceMesh
            >>>
            >>> # Initialize device mesh as (2, 4) to represent the topology
            >>> # of cross-host(dim 0), and within-host (dim 1).
            >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
        rU   r>   .rI   NTrH   r%   rJ   r(   c                   || _         t          |t          j                  r"|j        j        dk    rt          d|           t          |t          j                  r2|                                                    t          j	                  n t          j
        |dt          j	                  | _        |rt          |          nd | _        t          | j                                                                                  | _        d | _        |dk    r|r(|                                  |                                  t)                      r*t+                      dk    rt-          j                    | _        | j        t1                      k                                    }|                    d          dv sJ |                    d          dk    r|d                                         nd | _        d S d S )	Ncpuz!`mesh` must be a CPU tensor, got dtypedevicer   xlathreadedr   )r   r<   )rU   
isinstancer   Tensorr   type
ValueErrordetachtor   tensorr>   rv   rI   rL   tolist_flatten_mesh_list
_thread_id_get_or_create_default_group_init_process_groupsr"   r   	threading	get_identr   nonzeror?   _coordinate_on_dim)r0   rU   r>   rI   rJ   rank_coordss         r   r1   zDeviceMesh.__init__  s     +D$-- M$+2Be2K2K !KT!K!KLLL dEL11G  uy 111\$uEIFFF I
 <J"S%"7"7"7tD ',DI,=,=,?,?,F,F,H,H&I&ID#"DO e## ! 055777--///!## <(C(C&/&9&;&;DO  $yHJJ6??AA"''**f4444/:/?/?/B/BQ/F/FKN))+++D ''' $#r   c           	         t                      }|st                       t                      }| j                                        |k    r-t          d| d| j                                         d          t          | j                  }|sg|re|                                }||k    r'||z  dk    rt          d| d| d| j         d          |	                    t                      |z             t                      S )	Nz2Mesh should not be bigger than default world size z, but found z ranks!r   z8DeviceMesh only support homogeneous hardware, but found z ranks and  z	 devices!)r"   r!   r    r>   numelr5   r   rU   r   
set_devicer   r   )r0   default_initialized
world_sizedevice_handler   s        r   r   z'DeviceMesh._get_or_create_default_group  sJ   "0"2"2& %"$$$'))Jy  :--"{{{aeajapaparar{{{   /t/?@@M& L= L (5'A'A'C'C$!555"%99Q>>&e%e e2Fe eIMIYe e e   ((6J)JKKK%'''r   c           	      X   g }| j         j        dk    r| j                                         t                      k    rt	                      }t          t          t                                          }t          j        	                                r$t          |          dk    rt          d|          n|}|                    t          |          ||j        f           nAt          | j         j                  D ]&}| j                             d|                              d| j                             |                    }|D ]}|                                }|t&          j        v rt&          j        |         \  }	}
nd\  }	}
t          ||	|
          }|                                 |v rrt-          |          |k    r(t/          d|                                  d	| d
          |                    t          t1          |                    ||j        f           ڐ(|| _        d S )Nr<   gloozcpu:gloo,cuda:nccl)r   ranksr3   )NN)r   r   r   zFEach device mesh dimension should get only one process group, but got z in !)r>   rQ   r   r    r   rO   rP   r   r   r   r   r#   rM   r   
group_namer   rT   r?   r   ro   r,   r   r4   r5   r   rN   )r0   dim_group_infosdefault_groupr   	dim_grouprl   r_   dim_meshsubgroup_ranksr   r   s              r   r   zDeviceMesh._init_process_groups  sk    ACOy~""ty'8'8N<L<L'L'L !3 4 4U>#3#34455 z..00'#M22f<< &:%HHHH '	   &&&y11!,    !00 + +C '+i&8&8S&A&A&I&IDINN3//' 'O
 %4 # #)1):): /"HHH !0 Fs K ' *
 3=/GZ
 %."0$+'1% % %	  ==??n<<"?33c99&2%<mqmzmzm|m| %< %<*8%< %< %<'" '" !" ,22$28I3F3F$G$G$2$-$8!"  ;#H %4D!!!r   c                 D    t           j                            |            | S r   )ro   r*   rM   r/   s    r   	__enter__zDeviceMesh.__enter__8  s    &--d333Kr   c                 B    t           j                                         d S r   )ro   r*   pop)r0   exc_type	exc_valueexc_tracebacks       r   __exit__zDeviceMesh.__exit__>  s    &**,,,,,r   c                     | j         s%d| j         d| j                                         dn,d| j         d| j                                         d| j          d}|S )NzDeviceMesh('z', )z, mesh_dim_names=)rI   rU   r>   r   )r0   device_mesh_reprs     r   __repr__zDeviceMesh.__repr__B  sz     *uIt/IIDI4D4D4F4FIIIItD$4tt9I9I9K9Ktt^b^qttt 
 $#r   c                     t          | dd           | _        | j        s7t          | j        | j        j        | j        | j        | j        f          | _        | j        S )N_hash)	r   r   hashr   r>   shaperU   rI   r   r/   s    r   __hash__zDeviceMesh.__hash__J  s]     w55DJ: 	!/	(+ 
 :r   otherc                 (   t          |t                    sdS t          |           t          |          k    rdS | j        |j        k    oI| j        j        |j        j        k    o/| j        |j        k    o| j        |j        k    o| j        |j        k    S )NFT)	r   r   idr   r>   r   rU   rI   r   )r0   r   s     r   __eq__zDeviceMesh.__eq__Y  s    eZ00 u$xx2e99$$t +u/GG <	5:+;;<(E,==< +u/CC< 5+;;r   c                     | j         st          d          t          |t                    r|fn|}|| j         k    r| S t                              | |          }t                              | ||          }|S )aU
  
            Slice the current DeviceMesh based on the mesh_dim_names given to create a submesh.
            The submesh created consists of the dimensions and the communicators indicated by
            ``mesh_dim_names``

            Args:
                mesh_dim_names (Union[str, Tuple[str]]): the name or the tuple of names of the
                mesh dimension of the DeviceMesh to create the submesh for.
            Returns:
                A :class:`DeviceMesh` object

            The following program runs on each process/rank in an SPMD manner in a world size of 8.
            In the first example:
                Calling mesh_2d["tp"] on rank 0, 1, 2, 3 returns a 1D submesh of DeviceMesh:([0, 1, 2, 3]).
                Calling mesh_2d["tp"] on rank 4, 5, 6, 7 returns a 1D submesh of  DeviceMesh:([4, 5, 6, 7]).
                Calling mesh_2d["dp"] on rank 0, 4 returns a 1D submesh of  DeviceMesh:([0, 4]).
                Calling mesh_2d["dp"] on rank 1, 5 returns a 1D submesh of  DeviceMesh:([1, 5]).
                Calling mesh_2d["dp"] on rank 2, 6 returns a 1D submesh of  DeviceMesh:([2, 6]).
                Calling mesh_2d["dp"] on rank 3, 7 returns a 1D submesh of  DeviceMesh:([3, 7]).

            In the second example:
                Calling mesh_3d["dp", "cp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 1], [4, 5]]).
                Calling mesh_3d["dp", "cp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 3], [6, 7]]).
                Calling mesh_3d["cp", "dp"] on rank 0, 1, 4, 5 returns a 2D submesh of DeviceMesh:([[0, 4], [1, 5]]).
                Calling mesh_3d["cp", "dp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 6], [3, 7]]).

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize a 2D device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh_2d = init_device_mesh(device_type="cuda", (2,4), mesh_dim_names=("dp", "tp"))
                >>> tp_mesh = mesh_2d["tp"]
                >>> dp_mesh = mesh_2d["dp"]
                >>>
                >>> # Initialize a 3D mesh.
                >>> mesh_3d = init_device_mesh(device_type="cuda", (2,2,2), mesh_dim_names=("dp", "pp", "cp"))
                >>> # The order of the mesh_dim_names provided deteremines the order of dimensions in the submesh.
                >>> dp_cp_mesh = mesh_3d["dp", "cp"]
                >>> cp_dp_mesh = mesh_3d["cp", "dp"]
            z1Cannot slice a DeviceMesh without mesh_dim_names!)rI   r5   r   r   ro   r   rd   )r0   rI   r   rb   s       r   __getitem__zDeviceMesh.__getitem__g  s    Z & X"#VWWW &0%D%DX!!.  !444"1"F"F.# # *99./  r   rD   c                    t          | d          st          d          | j        j        dk    r!|t          d| j        j         ddd          | j        j        dk    r+|)t	          t          | j        d	         dd
                    S t                              |           }t          j	        
                    |d          }|rG||                                v r1||         j        d	         dd
         }t	          t          |           S t          |t                    rt                              | |          n|}t	          t          | j        |         dd
                    S )a  
            Returns the single ProcessGroup specified by mesh_dim, or, if mesh_dim is not specified and the
            DeviceMesh is 1-dimensional, returns the only ProcessGroup in the mesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                A :class:`ProcessGroup` object.
            rN   z*DeviceMesh process groups not initialized!r<   NFound the DeviceMesh have  dimensionsJOptional kwarg `mesh_dim` needs to be specified when device_mesh.ndim > 1.zmIf you want to get the list of all the ProcessGroups in the DeviceMesh,please use `get_all_groups()` instead.r      )hasattrr5   r>   rQ   r   r   rN   ro   rp   r-   r   rs   r   r   r   )r0   rD   ri   r-   r   s        r   	get_groupzDeviceMesh.get_group  s    4!344 Q"#OPPPy~!!h&6"LLLL`=   y~""x'7-t/DQ/G/KL   (55d;;I&5&M&Q&Q4' '# ' 87N7S7S7U7U+U+U"9("C"TUV"WXZYZXZ"[ 9? KLLL "(C00"O88xHHH! 
  -t/DX/NrPQr/RS  r   c                 N      fdt           j        j                  D             S )z
            Returns a list of ProcessGroups for all mesh dimensions.

            Returns:
                A list of :class:`ProcessGroup` object.
            c                 :    g | ]}                     |          S r   )r   )rC   ir0   s     r   rE   z-DeviceMesh.get_all_groups.<locals>.<listcomp>  s%    EEE!DNN1%%EEEr   )rP   r>   rQ   r/   s   `r   get_all_groupszDeviceMesh.get_all_groups  s,     FEEEuTY^/D/DEEEEr   rn   groupc                   t          | t                    rt          |           }t          |t          j                  r|                                |k    s|(||k    r"t          dt          |           d|           t          j        |dt          j	                  }t          |||d          }t          |           || j        fg|_        |S t          |           }t          |          dk    rt          d	          |t          d
          t          |t          j                  r3|                                                    t          j	        d          n t          j        |dt          j	                  }|j        t          |          k    r5t          d|                                 dt          |           d          t          |||d          }d |D             |_        |S )aM  
            Constructs a :class:`DeviceMesh` with ``device_type`` from an
            existing :class:`ProcessGroup`.

            The constructed device mesh has number of dimensions equal to the
            number of groups passed. If more than one group is passed, then the
            ``mesh`` argument is required.
            NzInvalid mesh z for ProcessGroup with ranks r   r   FrH   r   z.Expects at least one ProcessGroup to be passedz0Must pass mesh if passing multiple ProcessGroups)r   r   zEExpects mesh with ndim equal to number of ProcessGroups but got mesh z and z ProcessGroupsc                 V    g | ]&}t          |          t          |          |j        f'S r   )r   r   r   )rC   r   s     r   rE   z)DeviceMesh.from_group.<locals>.<listcomp>  sH     , , , 	 #5))+E22$, , ,r   )r   r$   r   r   r   r   r   r   r   r   r   r   r   rN   rO   r4   r   r   rQ   )r   rU   r>   rI   group_ranksr7   groupss          r   
from_groupzDeviceMesh.from_group  s&     %.. #5e<<tU\227;{{}}7S7S&4;+>+>$]D		]]P[]]   |KUYOOO(#1"'	   $E**K9IJ0, #"%[[F6{{a !QRRR| !STTT dEL11G  uy ???\$uEIFFF 
 yCKK'' L KKMML L03FL L L   %T.PU  K, , $, , ,K( r   c                 l    || j                                         n| j                             |          S r   )r>   r   r?   )r0   rD   s     r   r?   zDeviceMesh.size  s,    (0(849??$$$dinnX>V>VVr   c                     | j         j        S r   )r>   rQ   r/   s    r   rQ   zDeviceMesh.ndim  s    9>!r   c                 4    t          | j        j                  S r   )rv   r>   r   r/   s    r   r   zDeviceMesh.shape#  s    )))r   c                     t                      S )z:
            Returns the current global rank.
            )r   r/   s    r   r   zDeviceMesh.get_rank'  s     ::r   c                    | j         dk    r |t          d| j        j          dd          |d}t          |                     |                    }t          |t                    s
J d            t          t          |                    S )a{  
            Returns the local rank of the given mesh_dim of the DeviceMesh.

            Args:
                mesh_dim (str/int, optional): it can be the name of the mesh dimension or the index
                of the mesh dimension. Default is None.

            Returns:
                An integer denotes the local rank.

            The following program runs on each process/rank in an SPMD manner. In this example, we have 2
            hosts with 4 GPUs each.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 0, 1, 2, 3 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=0) on rank 4, 5, 6, 7 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 0, 4 would return 0.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 1, 5 would return 1.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 2, 6 would return 2.
            Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.

            Example::
                >>> # xdoctest: +SKIP("no rank")
                >>> from torch.distributed.device_mesh import DeviceMesh
                >>>
                >>> # Initialize device mesh as (2, 4) to represent the topology
                >>> # of cross-host(dim 0), and within-host (dim 1).
                >>> mesh = DeviceMesh(device_type="cuda", mesh=[[0, 1, 2, 3],[4, 5, 6, 7]])
            r<   Nr   r   r   r   z1We expect ProcessGroup before calling `get_rank`!)rQ   r5   r>   r   r   r   r$   r   )r0   rD   mesh_dim_groups      r   get_local_rankzDeviceMesh.get_local_rank-  s    8 y1}}!1"LLLL`   !%dnnX&>&>??N  C CBC C C H^44555r   c                 "    | j         r| j         ndS )z
            Return the relative indices of this rank relative to all
            dimensions of the mesh. If this rank is not part of the mesh, return None.
            N)r   r/   s    r   get_coordinatezDeviceMesh.get_coordinateW  s    
 /3.EO4**4Or   r\   c                 d    | j         st          d          t                              | |          S )a\  
            Returns a 1D DeviceMesh by flattening the current DeviceMesh.

            If no mesh_dim_name is provided, the default is a string concatentaing the mesh_dim_names of the
            given submesh with each mesh_dim_name separated by "_". For example, if we have a 3D mesh
            DeviceMesh([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], mesh_dim_names=("dp", "cp", "tp")), calling
            mesh_3d["dp", "cp"]._flatten() will create a 1D submesh DeviceMesh([0, 1, 2, 3], mesh_dim_names=("dp_cp",))
            on rank 0, 1, 2, 3 and a 1D submesh DeviceMesh([4, 5, 6, 7], mesh_dim_names=("dp_cp",)) on rank 4, 5, 6, 7.

            After the flattened dimension is created, to access the flattened dimesnion in mesh_3d, one can use the
            existing slicing method to obtain the flattened mesh through calling mesh_3d["dp_cp"].
            z3Cannot flatten a DeviceMesh without mesh_dim_names!)rI   r5   ro   r~   )r0   r\   s     r   _flattenzDeviceMesh._flatten^  s<     & "I   #66t]KKKr   r   r   r   )%r   r   r   __doc__r   __annotations__r   r   r   r   r
   boolr1   r   r   r   r   r   r   objectr   r   r   r$   r   r   r   r   r   r?   propertyrQ   r   r   r  r  r
  r   r   r   r   r   z  s        %	 %	N l sCx1111 9="&(	 (	 (	(	 k12(	
 %U38_5(	  (	 (	 (	 (	 (	T	( 	( 	(:I	4 I	4 I	4V	 	 	 		- 	- 	- 	-	$c 	$ 	$ 	$ 	$	 	 		 	4 	 	 	 	=	"'U38_(<"==	=	 =	 =	 =	~,	 ,	huS#X&? ,	< ,	 ,	 ,	 ,	\	FD$6 	F 	F 	F 	F 
 @D<	
 9=<	 <	 <	tL'99:<	<	 5{!:;<<	
 %U38_5<	 <	 <	 <	 
<	|	W 	W# 	W# 	W 	W 	W 	W 
	"# 	" 	" 	" 
	" 
	*5c? 	* 	* 	* 
	*	c 	 	 	 	(	6 (	68E#s(O+D (	6PS (	6 (	6 (	6 (	6T	PHT#Y$7 	P 	P 	P 	P	L 	L(3- 	L< 	L 	L 	L 	L 	L 	Lr   rn   
mesh_shape.rI   r(   c          	         |t          t          |                    t          |          k    rt          dd|           t          |          t          |          k    r1t          ddt          |           dt          |           d          | r(|                                 st          d|  d	d
          t	          j        d          5  t	          j        t          j        |          t          j	                  
                    |          }ddd           n# 1 swxY w Y   t          | ||          }|S )a  
        Initializes a `DeviceMesh` based on `device_type`, `mesh_shape`, and `mesh_dim_names` parameters.

        This creates a DeviceMesh with an n-dimensional array layout, where `n` is the length of `mesh_shape`.
        If `mesh_dim_names` is provided, each dimension is labeled as `mesh_dim_names[i]`.

        .. note::
            `init_device_mesh` follows SPMD programming model, meaning the same PyTorch Python program
            runs on all processes/ranks in the cluster. Ensure `mesh_shape` (the dimensions of the nD array
            describing device layout) is identical across all ranks. Inconsistent `mesh_shape` may lead to hanging.

        .. note::
            If no process group is found, init_device_mesh will initialize distributed process group/groups
            required for distributed communications behind the scene.

        Args:
            device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like".
                Passing in a device type with a GPU index, such as "cuda:0", is not allowed.
            mesh_shape (Tuple[int]): A tuple defining the dimensions of the multi-dimensional array
                describing the layout of devices.
            mesh_dim_names (Tuple[str], optional): A tuple of mesh dimension names to assign to each dimension
                of the multi-dimensional array describing the layout of devices. Its length must match the length
                of `mesh_shape`. Each string in `mesh_dim_names` must be unique.

        Returns:
            DeviceMesh: A :class:`DeviceMesh` object representing the device layout.

        Example::
            >>> # xdoctest: +SKIP("no rank")
            >>> from torch.distributed.device_mesh import init_device_mesh
            >>>
            >>> mesh_1d = init_device_mesh("cuda", mesh_shape=(8,))
            >>> mesh_2d = init_device_mesh("cuda", mesh_shape=(2, 8), mesh_dim_names=("dp", "tp"))

        Nz"Each mesh_dim_name must be unique.z/Found repeated mesh_dim_name in mesh_dim_names z6mesh_shape and mesh_dim_names should have same length!zFound len(mesh_dim_names): z and len(mesh_shape):r   z4Device type with GPU index is not supported but got rm   zUIf you maintained a 'torch.device' object, it's recommended to pass in 'device.type'.r   r   )rU   r>   rI   )r4   setr5   isalphar   r   arangert   ru   r   viewr   )rU   r  rI   r>   r7   s        r   r   r   r  s   R %3~&&''3~+>+>>>"8VnVV  
 :#n"5"555"Ln#n2E2Enn\_`j\k\knnn    	{2244 	V{VVVg   \%   	Y 	Y<	* 5 5UYGGGLLZXXD	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y #)
 
 
 s   AD$$D(+D()r   )4loggingrt   r   	functoolsr   	itertoolsr   typingr   r   r   r   r	   r
   r   torch.distributedr   torch.utils._typing_utilsr   __all__sysr   r   modulesr   r   "torch.distributed.distributed_c10dr   r   r   r   r   r   r    r!   r"   r#   r$   	getLoggerr   loggernumpy.typingr%   ImportErrorwarninglocalr'   ro   r  r   r   r   r   r   r   <module>r&     sE                     D D D D D D D D D D D D D D D D  * * * * * * . . . . . . |
, |~~ gJJJ          ?NCK/0; 0 K'                          Wx((F  	....... 	 	 	NNU    	
m! m! m! m! m!9? m! m! m!^	 !)

OX***1 1 1 1 1 1vL vL vL vL vL vL vL vLx 59	G G GG#s(OG !sCx1	G
 
G G G G G Gs   /B6 6CC