
    Ng8                    T    d Z ddlmZ ddlZddlmZmZ  G d dej                  ZdS )zDomain-model for file-types.    )annotationsN)Iterablecastc            
      (   e Zd ZU dZded<   	 ded<   	 ded<   	 ded<   	 ded	<   	 ded
<   	 ddZddZedd            Zedd            Z	e
dd            Ze
dd            Ze
dd            Ze
dd             Ze
dd!            Ze
dd"            Ze
dd#            Zd$d%d&gd%d'gd( eee         g           fZd)d)d*gd)d+gd,g d-fZd.d.d/gd.d0gd1 eee         g           fZd/d/d/gd/d2gd3 eee         g           fZd4d5 eee         g           d6d7d8gd9 eee         g           fZd:d:d;gd:d<gd=d>gfZd?d%d&gd%d@gdA eee         g           fZdBdB eee         g           d6dCdDgdE eee         g           fZdFd%d&gd%dGdHgdI eee         g           fZdJdJ eee         g           d6dKgdL eee         g           fZdMdMdNgdMdOgdPdQgfZdRdRdSgdRdTgdU eee         g           fZ dVdVd/d;gdVdWgdX eee         g           fZ!dYdYd;gdYdZgd[ eee         g           fZ"d\d\g d]d\d^gd_ eee         g           fZ#d`d%d&gd%dagdb eee         g           fZ$dcdcddgdcdegdf eee         g           fZ%ddddddgdddggdh eee         g           fZ&didid;gdidjgdk eee         g           fZ'dldld;gdldmgdndogfZ(dpd%d&gd%dqgdr eee         g           fZ)dsdsd*gdsdtdugdv eee         g           fZ*dwdx eee         g           d6g dydzg d{fZ+d|d6 eee         g           d6d}gd~g dfZ,ddd*dgddgd eee         g           fZ-ddd*dgddgd eee         g           fZ.dd eee         g           d6dgddgfZ/dd6 eee         g           d6dgd eee         g           fZ0dd6 eee         g           d6 eee         g           d eee         g           fZ1dd6 eee         g           d6 eee         g           d eee         g           fZ2d6S )FileTypezThe collection of file-types recognized by `unstructured`.

    Note not all of these can be partitioned, e.g. WAV and ZIP have no partitioner.
    
str | None_partitioner_shortnametuple[str, ...] _importable_package_dependencies_extra_name_extensionsstr_canonical_mime_type_alias_mime_typesvaluepartitioner_shortnameimportable_package_dependenciesIterable[str]
extra_name
extensionscanonical_mime_typealias_mime_typesc                    t                               |           }||_        ||_        t	          |          |_        ||_        t	          |          |_        ||_        t	          |          |_	        |S )N)
object__new___value_r	   tupler   r   r   r   r   )	clsr   r   r   r   r   r   r   selfs	            Y/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/file_utils/model.pyr   zFileType.__new__!   sk     ~~c""&;#056U0V0V-% ,,$7!!&'7!8!8    otherreturnboolc                "    | j         |j         k     S )zMakes `FileType` members comparable with relational operators, at least with `<`.

        This makes them sortable, in particular it supports sorting for pandas groupby functions.
        )name)r   r"   s     r    __lt__zFileType.__lt__5   s    
 y5:%%r!   	extensionFileType | Nonec                d    |dv rdS | j                                         D ]}||j        v r|c S dS )aP  Select a FileType member based on an extension.

        `extension` must include the leading period, like `".pdf"`. Extension is suitable as a
        secondary file-type identification method but is unreliable for primary identification.

        Returns `None` when `extension` is not registered for any supported file-type.
        )N .N)__members__valuesr   )r   r(   ms      r    from_extensionzFileType.from_extension<   sS     ''4 '')) 	 	AAM)) *tr!   	mime_typec                v    |dS | j                                         D ]}||j        k    s	||j        v r|c S dS )zSelect a FileType member based on a MIME-type.

        Returns `None` when `mime_type` is `None` or does not map to the canonical MIME-type of a
        `FileType` member or one of its alias MIME-types.
        N)r-   r.   r   r   )r   r1   r/   s      r    from_mime_typezFileType.from_mime_typeN   s]     4 '')) 	 	AA222i1CV6V6V 7Wtr!   c                    | j         S )a  The `pip` "extra" that must be installed to provide this file-type's dependencies.

        Like "image" for PNG, as in `pip install "unstructured[image]"`.

        `None` when partitioning this file-type requires only the base `unstructured` install.
        )r   r   s    r    r   zFileType.extra_name^   s     r!   c                    | j         S )a  Packages that must be importable for this file-type's partitioner to work.

        In general, these are the packages provided by the `pip install` "extra" for this file-type,
        like `pip install "unstructured[docx]"` loads the `python-docx` package.

        Note that these names are the ones used in an `import` statement, which is not necessarily
        the same as the _distribution_ package name used by `pip`. For example, the DOCX
        distribution package name is `"python-docx"` whereas the _importable_ package name is
        `"docx"`. This latter name as it appears like `import docx` is what is provided by this
        property.

        The return value is an empty tuple for file-types that do not require optional dependencies.

        Note this property does not complain when accessed on a non-partitionable file-type, it
        simply returns an empty tuple because file-types that are not partitionable require no
        optional dependencies.
        )r   r5   s    r    r   z(FileType.importable_package_dependenciesh   s    & 44r!   c                *    t          | j                  S )a~  True when there is a partitioner for this file-type.

        Note this does not check whether the dependencies for this file-type are installed so
        attempting to partition a file of this type may still fail. This is meant for
        distinguishing file-types like WAV, ZIP, EMPTY, and UNK which are legitimate file-types
        but have no associated partitioner.
        )r$   r	   r5   s    r    is_partitionablezFileType.is_partitionable}   s     D/000r!   c                    | j         S )aL  The canonical MIME-type for this file-type, suitable for use in metadata.

        This value is used in `.metadata.filetype` for elements partitioned from files of this
        type. In general it is the "offical", "recommended", or "defacto-standard" MIME-type for
        files of this type, in that order, as available.
        )r   r5   s    r    r1   zFileType.mime_type   s     ((r!   c                N    | j         x}t          d| j         d          d| S )zName of partitioner function for this file-type. Like "partition_docx".

        Raises when this property is accessed on a file-type that is not partitionable. Use
        `.is_partitionable` to avoid exceptions when partitionability is unknown.
        Nz;`.partitioner_function_name` is undefined because FileType.b is not partitionable. Use `.is_partitionable` to determine whether a `FileType` is partitionable.
partition_r	   
ValueErrorr&   r   	shortnames     r    partitioner_function_namez"FileType.partitioner_function_name   sL     44I=&di & & &  
 (I'''r!   c                N    | j         x}t          d| j         d          d| S )zFully-qualified name of module providing partitioner for this file-type.

        e.g. "unstructured.partition.docx" for FileType.DOCX.
        Nz:`.partitioner_module_qname` is undefined because FileType.r;   zunstructured.partition.r=   r?   s     r    partitioner_module_qnamez!FileType.partitioner_module_qname   sL     44I=&TY & & &  
 5444r!   c                    | j         S )az  Familiar name of partitioner, like "image" for file-types that use `partition_image()`.

        One use is to determine whether a file-type is one of the five image types, all of which
        are processed by `partition_image()`.

        `None` for file-types that are not partitionable, although `.is_partitionable` is the
        preferred way of discovering that.
        )r	   r5   s    r    r   zFileType.partitioner_shortname   s     **r!   bmpimageunstructured_inferencez.bmpz	image/bmpcsvpandasz.csvztext/csv)zapplication/csvzapplication/x-csvztext/comma-separated-valuesztext/x-comma-separated-valuesz
text/x-csvdocdocxz.doczapplication/mswordz.docxzGapplication/vnd.openxmlformats-officedocument.wordprocessingml.documentemlemailNz.emlz.p7szmessage/rfc822epubpypandocz.epubzapplication/epubzapplication/epub+zipheicz.heicz
image/heichtmlz.htmlz.htmz	text/htmljpgz.jpegz.jpgz
image/jpegjsonz.jsonzapplication/jsonmdmarkdownz.mdztext/markdownztext/x-markdownmsgoxmsgz.msgzapplication/vnd.ms-outlookodtz.odtz'application/vnd.oasis.opendocument.textorgz.orgztext/orgpdf)	pdf2imagepdfminerPILz.pdfzapplication/pdfpngz.pngz	image/pngpptpptxz.pptzapplication/vnd.ms-powerpointz.pptxzIapplication/vnd.openxmlformats-officedocument.presentationml.presentationrstz.rstz
text/x-rstrtfz.rtfztext/rtfzapplication/rtftiffz.tiffz
image/tifftsvz.tabz.tsvztext/tsvtxttext)z.txtz.textz.cz.ccz.cppz.csz.cxxz.goz.javaz.jsz.logz.phpz.pyz.rbz.swiftz.tsz.yamlz.ymlz
text/plain)z	text/yamlzapplication/x-yamlzapplication/yamlztext/x-yamlwavz.wavz	audio/wav)zaudio/vnd.wavzaudio/vnd.wavez
audio/wavezaudio/x-pn-wavzaudio/x-wavxlsxlsxopenpyxlz.xlszapplication/vnd.ms-excelz.xlsxzAapplication/vnd.openxmlformats-officedocument.spreadsheetml.sheetxmlz.xmlzapplication/xmlztext/xmlzipz.zipzapplication/zipunkzapplication/octet-streamemptyzinode/x-empty)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r"   r   r#   r$   )r(   r   r#   r)   )r1   r   r#   r)   )r#   r   )r#   r
   )r#   r$   )r#   r   )3__name__
__module____qualname____doc____annotations__r   r'   classmethodr0   r3   propertyr   r   r8   r1   rA   rC   r   r   listr   BMPCSVDOCDOCXEMLEPUBHEICHTMLJPGJSONMDMSGODTORGPDFPNGPPTPPTXRSTRTFTIFFTSVTXTWAVXLSXLSXXMLZIPUNKEMPTY r!   r    r   r   	   s         
 '&&&c5555^T    bH&&&&<   (& & & &    ["    [       X  5 5 5 X5( 1 1 1 X1 ) ) ) X) ( ( ( X(" 5 5 5 X5  	+ 	+ 	+ X	+ 		!"	T#YC 		
		
 	
 	
C %&56(4H$$tTWyZ\J]J]
^C			QT#YD 	T#Y	T#YC 					 D 		!"		T#YD 	T#Y	&T#YD 		!"	&T#YC 	T#Y		T#YD zlD5'?EVDW	XB			$T#YC 			1T#YC %*uvh
DDcTVDWDW
XC(((	T#YC 		!"	T#YC 			'T#YC 				ST#YD %*uvhdd4PS9VXFYFY
ZC%*uvh
EVDW
XC	!"		T#YD %(UVV,<j$$tTWyZ\J]J]
^CT#Y	
 	
 	
* 		
 	
 	
7#CJ 	T#Y		
 	
 	
C  		:	"T#YC 		:		KT#YD %d3i,,dVH>OR\Q]
^C$T#Y++TF8=NPTPTUYZ]U^`bPcPc
dC 	T#YT#Y"T#YC 	T#YT#YT#YEEEr!   r   )	rr   
__future__r   enumtypingr   r   Enumr   r   r!   r    <module>r      s~    " " " " " " " "  ! ! ! ! ! ! ! !m m m m mty m m m m mr!   