
    Ng,                    (   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ 	 d#dddddd$dZ  G d d          Z! G d d           Z" G d! d"          Z#dS )%    )annotationsN)IOAnyIteratorOptional)Message)
Attachment)ElementElementMetadata)FileType)logger)UnsupportedFileFormatError)get_last_modified_date)partition_html)partition_text)is_temp_file_pathlazypropertyT)filemetadata_filenamemetadata_last_modifiedprocess_attachmentsfilenameOptional[str]r   Optional[IO[bytes]]r   r   r   boolkwargsr   returnlist[Element]c               z    t          || ||||          }t          t                              |                    S )a  Partitions a MSFT Outlook .msg file

    Parameters
    ----------
    filename
        A string defining the target filename path.
    file
        A file-like object using "rb" mode --> open(filename, "rb").
    metadata_filename
        The filename to use for the metadata.
    metadata_last_modified
        The last modified date for the document.
    process_attachments
        If True, partition_email will process email attachments in addition to
        processing the content of the email itself.
    )r   	file_pathmetadata_file_pathr   partition_attachmentsr   )MsgPartitionerOptionslist_MsgPartitioneriter_message_elements)r   r   r   r   r   r   optss          V/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/partition/msg.pypartition_msgr)      sH    2 !,51  D 55d;;<<<    c                      e Zd ZdZddZedd            Zedd            Zedd            Zedd            Z	edd            Z
edd            Zed d            Zedd            Zed!d            ZdS )"r#   zVEncapsulates partitioning option validation, computation, and application of defaults.r   IO[bytes] | Noner    
str | Noner!   r   r"   r   r   dict[str, Any]c               Z    || _         || _        || _        || _        || _        || _        d S N)_file
_file_path_metadata_file_path_metadata_last_modified_partition_attachments_kwargs)selfr   r    r!   r   r"   r   s          r(   __init__zMsgPartitionerOptions.__init__=   s5     
##5 '=$&;#r*   r   r   c                ,   | j         }|j        x}rd |                    d          D             nd}d |j        D             pd}|j                            d          x}rd |                    d          D             nd}|j                            d          x}rd |                    d          D             nd}|j                            d	          x}	rt          j        d
d|	          }	t          |||	|||j	        pd          }
d|
_
        |
S )a]  ElementMetadata suitable for use on an element formed from message content.

        These are only the metadata fields specific to email messages. The remaining metadata
        fields produced by the delegate partitioner are used as produced.

        None of these metadata fields change based on the element, so we just compute it once.
        c                6    g | ]}|                                 S  strip).0ss     r(   
<listcomp>z<MsgPartitionerOptions.extra_msg_metadata.<locals>.<listcomp>Y   s     :::1QWWYY:::r*   ,Nc                    g | ]	}|j         
S r;   )email_address)r>   rs     r(   r@   z<MsgPartitionerOptions.extra_msg_metadata.<locals>.<listcomp>Z   s    ;;;q1?;;;r*   Bccc                6    g | ]}|                                 S r;   r<   r>   cs     r(   r@   z<MsgPartitionerOptions.extra_msg_metadata.<locals>.<listcomp>\   s     ///1QWWYY///r*   Ccc                6    g | ]}|                                 S r;   r<   rG   s     r(   r@   z<MsgPartitionerOptions.extra_msg_metadata.<locals>.<listcomp>_   s     ...1QWWYY...r*   z
Message-Idz^<|>$ )bcc_recipientcc_recipientemail_message_id	sent_fromsent_tosubjectmsg)rR   sendersplit
recipientsmessage_headersgetresubr   rQ   detection_origin)r7   rR   rS   rO   rP   bccrL   ccrM   rN   element_metadatas              r(   extra_msg_metadataz(MsgPartitionerOptions.extra_msg_metadataN   sU    hIL?Sv^::S(9(9::::Z^	;;CN;;;Ct;>;N;R;RSX;Y;Y4YCd//		#////`d 	 :=9L9P9PQU9V9V3V2a......]a 	  #266|DDD 	F!vh4DEE*'%-K'4
 
 
 -2)r*   c                F    d| j         j                            dd          v S )zTrue when message is encrypted.	encryptedzContent-TyperK   )rR   rV   rW   r7   s    r(   is_encryptedz"MsgPartitionerOptions.is_encryptedp   s$     dh6::>2NNNNr*   c                    | j         p| j        S )zBest available path for MSG file.

        The value is the caller supplied `metadata_filename` if present, falling back to the
        source file-path if that was provided, otherwise `None`.
        )r3   r2   ra   s    r(   r!   z(MsgPartitionerOptions.metadata_file_path|   s     ':4?:r*   c                j    | j         j        x}r|                                nd}| j        p|p| j        S )zLCaller override for `.metadata.last_modified` to be applied to all elements.N)rR   	sent_date	isoformatr4   _last_modified)r7   re   
email_dates      r(   r   z,MsgPartitionerOptions.metadata_last_modified   sA     =AH<N/NyYY((***UY
+PzPT=PPr*   r   c                4    t          j        | j                  S )z8The `oxmsg.Message` object loaded from file or filename.)r   load	_msg_filera   s    r(   rR   zMsgPartitionerOptions.msg   s     |DN+++r*   c                    | j         S )z9True when message attachments should also be partitioned.)r5   ra   s    r(   r"   z+MsgPartitionerOptions.partition_attachments   s     **r*   c                    | j         S )zThe "extra" keyword arguments received by `partition_msg()`.

        These are passed along to delegate partitioners which extract keyword args like
        `chunking_strategy` etc. in their decorators to control metadata behaviors, etc.
        )r6   ra   s    r(   partitioning_kwargsz)MsgPartitionerOptions.partitioning_kwargs   s     |r*   c                d    | j         rt          | j                   rdS t          | j                   S )zNThe best last-modified date available from source-file, None if not available.N)r2   r   r   ra   s    r(   rg   z$MsgPartitionerOptions._last_modified   s5      	"3DO"D"D 	4%do666r*   str | IO[bytes]c                L    | j         x}r|S | j        x}r|S t          d          )zRThe source for the bytes of the message, either a file-path or a file-like object.z6one of `file` or `filename` arguments must be provided)r2   r1   
ValueError)r7   r    r   s      r(   rk   zMsgPartitionerOptions._msg_file   s<     '9 	:4 	KQRRRr*   N)r   r,   r    r-   r!   r-   r   r-   r"   r   r   r.   )r   r   )r   r   r   r-   )r   r   )r   r.   )r   rp   )__name__
__module____qualname____doc__r8   r   r^   rb   r!   r   rR   r"   rn   rg   rk   r;   r*   r(   r#   r#   :   s]       ``   "       \ B 	O 	O 	O \	O ; ; ; \; Q Q Q \Q
 , , , \, + + + \+    \ 7 7 7 \7 S S S \S S Sr*   r#   c                  Z    e Zd ZdZddZedd            ZddZedd
            Z	ddZ
dS )r%   z-Partitions Outlook email message (MSG) files.r'   r#   c                    || _         d S r0   )_opts)r7   r'   s     r(   r8   z_MsgPartitioner.__init__   s    


r*   r   Iterator[Element]c              #     K   |j         rt          j        d           dS  | |                                          E d{V  dS )?Partition MS Outlook email messages (.msg files) into elements.z@Encrypted email detected. Partitioner will return an empty list.N)rb   r   warning_iter_message_elements)clsr'   s     r(   r&   z%_MsgPartitioner.iter_message_elements   s[        	N]^^^F3t993355555555555r*   c              #     K   |                                  E d{V  | j        j        sdS | j        D ](}t                              || j                  E d{V  )dS )r}   N)_iter_message_body_elementsrz   r"   _attachments_AttachmentPartitioneriter_elements)r7   
attachments     r(   r   z&_MsgPartitioner._iter_message_elements   s      33555555555z/ 	F+ 	T 	TJ-;;J
SSSSSSSSSS	T 	Tr*   tuple[Attachment, ...]c                >    t          | j        j        j                  S )z;The `oxmsg.attachment.Attachment` objects for this message.)tuplerz   rR   attachmentsra   s    r(   r   z_MsgPartitioner._attachments   s     TZ^/000r*   c              #    K   | j         j        }|j        x}r:t          d|| j         j        t
          j        | j         j        d| j         j        }nH|j	        r?t          d|j	        | j         j        t
          j        | j         j        d| j         j        }ng }| j         j        }|D ] }|j                            |           |V  !dS )z5Partition the message body (but not the attachments).)textr   metadata_file_typer   Nr;   )rz   rR   	html_bodyr   r!   r   MSGr   rn   bodyr   r^   metadataupdate)r7   rR   r   elementsemail_specific_metadataes         r(   r   z+_MsgPartitioner._iter_message_body_elements   s      jn%9 	)% "&*"?#+<'+z'H	 
 *0 HH X 		)% X"&*"?#+<'+z'H	 
 *0 HH ')H #'*"? 	 	AJ5666GGGG	 	r*   N)r'   r#   )r'   r#   r   r{   r   r{   )r   r   )rt   ru   rv   rw   r8   classmethodr&   r   r   r   r   r;   r*   r(   r%   r%      s        77    6 6 6 [6T T T T 1 1 1 \1     r*   r%   c                      e Zd ZdZddZedd	            Zdd
Zedd            Z	edd            Z
edd            ZdS )r   z'Partitions an attachment to a MSG file.r   r	   r'   r#   c                "    || _         || _        d S r0   )_attachmentrz   )r7   r   r'   s      r(   r8   z_AttachmentPartitioner.__init__   s    %


r*   r   r{   c                >     | ||                                           S )zUPartition an `oxmsg.attachment.Attachment` from an Outlook email message (.msg file).)_iter_elements)r   r   r'   s      r(   r   z$_AttachmentPartitioner.iter_elements   s"    
 s:t$$33555r*   c              #    K   ddl m} t          j                    5 }t          j                            || j                  }t          |d          5 }|	                    | j
                   ddd           n# 1 swxY w Y   	  ||f| j        | j        d| j        j        }n# t          $ r Y ddd           dS w xY w|D ]}| j        j        |j        _        |V  	 ddd           dS # 1 swxY w Y   dS )zEPartition the file in an `oxmsg.attachment.Attachment` into elements.r   )	partitionwbN)r   r   )unstructured.partition.autor   tempfileTemporaryDirectoryospathjoin_attachment_file_nameopenwrite_file_bytes_attachment_last_modifiedrz   rn   r   r!   r   attached_to_filename)r7   r   tmp_dir_pathdetached_file_pathfr   r   s          r(   r   z%_AttachmentPartitioner._iter_elements   s     999999(** 	l!#lD<V!W!W($// *1()))* * * * * * * * * * * * * * *$9&&*&@+/+I  j4	  .   	 	 	 	 	 	 	 	   26*2O
/#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	sY   6C/A9-C/9A=	=C/ A=	C/ B&%C/&
B?0C/>B??"C//C36C3strc                    | j         j        pdS )zThe original name of the attached file, no path.

        This value is 'unknown' if it is not present in the MSG file (not expected).
        unknown)r   	file_namera   s    r(   r   z,_AttachmentPartitioner._attachment_file_name  s     )6Y6r*   r-   c                ^    | j         j        x}r|                                S | j        j        S )a  ISO8601 string timestamp of attachment last-modified date.

        This value generally available on the attachment and will be the most reliable last-modifed
        time. There are fallbacks for when it is not present, ultimately `None` if we have no way
        of telling.
        )r   last_modifiedrf   rz   r   )r7   r   s     r(   r   z0_AttachmentPartitioner._attachment_last_modified  s4     !,::= 	- **,,,z00r*   bytesc                    | j         j        pdS )zThe bytes of the attached file.r*   )r   
file_bytesra   s    r(   r   z"_AttachmentPartitioner._file_bytes)  s     *1c1r*   N)r   r	   r'   r#   )r   r	   r'   r#   r   r{   r   )r   r   rs   )r   r   )rt   ru   rv   rw   r8   r   r   r   r   r   r   r   r;   r*   r(   r   r      s        11    6 6 6 [6   2 7 7 7 \7 	1 	1 	1 \	1 2 2 2 \2 2 2r*   r   r0   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )$
__future__r   r   rX   r   typingr   r   r   r   oxmsgr   oxmsg.attachmentr	   unstructured.documents.elementsr
   r   unstructured.file_utils.modelr   unstructured.loggerr   unstructured.partition.commonr   &unstructured.partition.common.metadatar   unstructured.partition.htmlr   unstructured.partition.textr   unstructured.utilsr   r   r)   r#   r%   r   r;   r*   r(   <module>r      s   " " " " " " 				 				  . . . . . . . . . . . .       ' ' ' ' ' ' D D D D D D D D 2 2 2 2 2 2 & & & & & & D D D D D D I I I I I I 6 6 6 6 6 6 6 6 6 6 6 6 > > > > > > > > #"= !%'+,0 $"= "= "= "= "= "=JuS uS uS uS uS uS uS uSp9 9 9 9 9 9 9 9x>2 >2 >2 >2 >2 >2 >2 >2 >2 >2r*   