
    Ngl                    h   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlmZmZ d dlm Z  erd dl!m"Z"m#Z# ee$e$e$e$f         Z%d	e&d
<   ee$e$f         Z'd	e&d<   ee'df         Z(d	e&d<   dZ) ed          Z* ed          Z+dcdZ,dddZ- G d dee*                   Z.ded$Z/dfd%Z0	 dgdhd+Z1did-Z2djd1Z3dkd2Z4dld5Z5d6 Z6dmd<Z7dnd@Z8dodBZ9dpdqdHZ:	 drdsdNZ;	 dtdudWZ<dvdYZ=	 	 dwdxd\Z>	 	 dwdyd`Z? G da db          Z@dS )z    )annotationsN)wraps)combinations)TYPE_CHECKINGAnyCallableGenericIterableIteratorListOptionalTupleTypeVarcast)	ParamSpec	TypeAlias)__version__)ElementTextr   BoxPoint.Points)z%Y-%m-%dz%Y-%m-%dT%H:%M:%Sz%Y-%m-%d+%H:%M:%Sz%Y-%m-%dT%H:%M:%S%z_T_PfuncCallable[_P, List[Element]]args_P.argskwargs	_P.kwargsreturndict[str, Any]c           
        t          j        |           }t          di t          t          |j        |                    |}|j                                        D ](}|j        |vr|j        |j        ur|j        ||j        <   )|S )zMMap both explicit and default arguments of decorated func call by param name. )	inspect	signaturedictzip
parametersvaluesnamedefaultempty)r   r   r   sig	call_argsargs         N/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/utils.pyget_call_args_applying_defaultsr2   0   s     
D
!
!C $ Q QtC,E,E'F'F Q& Q QI~$$&& . .89$$CI)E)E"%+Ich    	file_pathstrboolc                N    |                      t          j                              S )zTrue when file_path is in the Python-defined tempdir.

    The Python-defined temp directory is platform dependent (macOS != Linux != Windows)
    and can also be determined by an environment variable (TMPDIR, TEMP, or TMP).
    )
startswithtempfile
gettempdir)r4   s    r1   is_temp_file_pathr;   >   s!      3 5 5666r3   c                  ,    e Zd ZdZddZdddZddZdS )lazypropertya$  Decorator like @property, but evaluated only on first access.

    Like @property, this can only be used to decorate methods having only a `self` parameter, and
    is accessed like an attribute on an instance, i.e. trailing parentheses are not used. Unlike
    @property, the decorated method is only evaluated on first access; the resulting value is
    cached and that same value returned on second and later access without re-evaluation of the
    method.

    Like @property, this class produces a *data descriptor* object, which is stored in the __dict__
    of the *class* under the name of the decorated method ('fget' nominally). The cached value is
    stored in the __dict__ of the *instance* under that same name.

    Because it is a data descriptor (as opposed to a *non-data descriptor*), its `__get__()` method
    is executed on each access of the decorated attribute; the __dict__ item of the same name is
    "shadowed" by the descriptor.

    While this may represent a performance improvement over a property, its greater benefit may be
    its other characteristics. One common use is to construct collaborator objects, removing that
    "real work" from the constructor, while still only executing once. It also de-couples client
    code from any sequencing considerations; if it's accessed from more than one location, it's
    assured it will be ready whenever needed.

    Loosely based on: https://stackoverflow.com/a/6849299/1902513.

    A lazyproperty is read-only. There is no counterpart to the optional "setter" (or deleter)
    behavior of an @property. This is critically important to maintaining its immutability and
    idempotence guarantees. Attempting to assign to a lazyproperty raises AttributeError
    unconditionally.

    The parameter names in the methods below correspond to this usage example::

        class Obj(object)

            @lazyproperty
            def fget(self):
                return 'some result'

        obj = Obj()

    Not suitable for wrapping a function (as opposed to a method) because it is not callable.
    fgetCallable[..., _T]r!   Nonec                V    || _         |j        | _        t          j        | |           dS )aY  *fget* is the decorated method (a "getter" function).

        A lazyproperty is read-only, so there is only an *fget* function (a regular
        @property can also have an fset and fdel function). This name was chosen for
        consistency with Python's `property` class which uses this name for the
        corresponding parameter.
        N)_fget__name___name	functoolsupdate_wrapper)selfr>   s     r1   __init__zlazyproperty.__init__r   s-     
]
 t,,,,,r3   Nobjr   typer   c                    || S |j                             | j                  }|$|                     |          }||j         | j        <   t	          t
          |          S )a  Called on each access of 'fget' attribute on class or instance.

        *self* is this instance of a lazyproperty descriptor "wrapping" the property
        method it decorates (`fget`, nominally).

        *obj* is the "host" object instance when the attribute is accessed from an
        object instance, e.g. `obj = Obj(); obj.fget`. *obj* is None when accessed on
        the class, e.g. `Obj.fget`.

        *type* is the class hosting the decorated getter method (`fget`) on both class
        and instance attribute access.
        )__dict__getrD   rB   r   r   )rG   rI   rJ   values       r1   __get__zlazyproperty.__get__   sY     ;K   ,,= JJsOOE',CL$Br3   rN   c                     t          d          )a  Raises unconditionally, to preserve read-only behavior.

        This decorator is intended to implement immutable (and idempotent) object
        attributes. For that reason, assignment to this property must be explicitly
        prevented.

        If this __set__ method was not present, this descriptor would become a
        *non-data descriptor*. That would be nice because the cached value would be
        accessed directly once set (__dict__ attrs have precedence over non-data
        descriptors on instance attribute lookup). The problem is, there would be
        nothing to stop assignment to the cached value, which would overwrite the result
        of `fget()` and break both the immutability and idempotence guarantees of this
        decorator.

        The performance with this __set__() method in place was roughly 0.4 usec per
        access when measured on a 2.8GHz development machine; so quite snappy and
        probably not a rich target for optimization efforts.
        zcan't set attribute)AttributeError)rG   rI   rN   s      r1   __set__zlazyproperty.__set__   s    & 2333r3   )r>   r?   r!   r@   N)rI   r   rJ   r   r!   r   )rI   r   rN   r   r!   r@   )rC   
__module____qualname____doc__rH   rO   rR   r$   r3   r1   r=   r=   G   s`        ( (T- - - -    :4 4 4 4 4 4r3   r=   datalist[dict[str, Any]]filenamer@   c                    t          |d          5 }|                    d | D                        d d d            d S # 1 swxY w Y   d S )Nzw+c              3  D   K   | ]}t          j        |          d z   V  dS )
N)jsondumps).0datums     r1   	<genexpr>z save_as_jsonl.<locals>.<genexpr>   s1      JJEtz%0047JJJJJJr3   )open
writelines)rW   rY   output_files      r1   save_as_jsonlre      s    	h		 KJJTJJJJJJK K K K K K K K K K K K K K K K K Ks    >AAc                l    t          |           5 }d |D             cd d d            S # 1 swxY w Y   d S )Nc                6    g | ]}t          j        |          S r$   )r]   loads)r_   lines     r1   
<listcomp>z#read_from_jsonl.<locals>.<listcomp>   s"    888T
4  888r3   )rb   )rY   
input_files     r1   read_from_jsonlrl      s    	h 9:88Z8889 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9s   )--dependenciesstr | list[str]extrasOptional[str].Callable[[Callable[_P, _T]], Callable[_P, _T]]c                H     t           t                    r g d fd}|S )Nr   Callable[_P, _T]r!   c                     fdt                     d fd            }t                     d fd            }t          j                   r|S |S )	Nc                    g } D ]&}t          |          s|                     |           't          |           dk    rHt          dd                    |            drd dndd                    |            d	z             d S )
Nr   z$Following dependencies are missing: z, . z5Please install them using `pip install "unstructured[z]"`.z'Please install them using `pip install  z`.)dependency_existsappendlenImportErrorjoin)missing_depsdeprm   ro   s     r1   	run_checkz;requires_dependencies.<locals>.decorator.<locals>.run_check   s    &(L# - -(-- - '',,,<  1$$!V499\;R;RVVV "b`RX````asxxP\G]G]aaa	   %$r3   r   r   r   r    c                 (                   | i |S rS   r$   r   r   r   r   s     r1   wrapperz9requires_dependencies.<locals>.decorator.<locals>.wrapper   s#    IKKK4((((r3   c                 8   K                  | i | d {V S rS   r$   r   s     r1   wrapper_asyncz?requires_dependencies.<locals>.decorator.<locals>.wrapper_async   s9      IKKKt.v.........r3   )r   r   r   r    )r   asyncioiscoroutinefunction)r   r   r   r   rm   ro   s   `  @r1   	decoratorz(requires_dependencies.<locals>.decorator   s    	 	 	 	 	 	 
t	) 	) 	) 	) 	) 	) 
	) 
t	/ 	/ 	/ 	/ 	/ 	/ 
	/ &t,, 	!  r3   )r   rs   r!   rs   )
isinstancer5   )rm   ro   r   s   `` r1   requires_dependenciesr      sJ     ,$$ &$~      < r3   
dependencyc                    	 t          j        |            n.# t          $ r!}| t          |          v rY d }~dS Y d }~nd }~ww xY wdS )NFT)	importlibimport_moduler{   repr)r   es     r1   rx   rx      sp    
++++   a  55555 !     4s    
A=AitIterable[_T]Tuple[_T, Iterator[_T]]c                    t          |           }	 t          |          }n# t          $ r t          d          w xY w||fS )NzYExpected at least 1 element in iterable from which to retrieve first, got empty iterable.)iternextStopIteration
ValueError)r   iteratorouts      r1   _first_and_remaining_iteratorr      s]    BxxH
8nn 
 
 

 
 	


 =s   ! ;c                *    t          |           \  }}|S )zRReturns the first item from an iterable. Raises an error if the iterable is empty.)r   )r   r   _s      r1   firstr      s    *2..FCJr3   Iterable[Any]r   c                z    t          |           \  }}t          d |D                       rt          d          |S )zqReturns the only element from a singleton iterable.

    Raises an error if the iterable is not a singleton.
    c              3     K   | ]}d V  dS )TNr$   r_   r   s     r1   ra   zonly.<locals>.<genexpr>  s"      
"
"A4
"
"
"
"
"
"r3   zRExpected only 1 element in passed argument, instead there are at least 2 elements.)r   anyr   )r   r   r   s      r1   onlyr     sP    
 2"55MC

"
"
"
"
""" 
`
 
 	
 Jr3   c                    	 t          j        d           d} n# t          $ r d} Y nw xY wd                    t	          j                                        d          d d                   }	 t          j        d          dk    rt          j        d          dk    rd	t          v rht          j        d
t          z   dz   t	          j                    z   dz   |z   dz   t	          j                    z   dz   t          |           z   dz              d S t          j        d
t          z   dz   t	          j                    z   dz   |z   dz   t	          j                    z   dz   t          |           z   dz              d S d S d S # t          $ r Y d S w xY w)Nz
nvidia-smiTF.   SCARF_NO_ANALYTICStrueDO_NOT_TRACKdevz:https://packages.unstructured.io/python-telemetry?version=z
&platform=z&pythonz&arch=z&gpu=z	&dev=truez
&dev=false)
subprocesscheck_output	Exceptionr|   platformpython_versionsplitosgetenvr   requestsrM   systemmachiner5   )gpu_presentr   s     r1   scarf_analyticsr     s   ---    XXh577==cBB2A2FGGN9)**f44>9R9RV\9\9\##P!""# o''(  	 
 %%  &(()  +&&	' "
"     P!""# o''(  	 
 %%  &(()  +&&	' #
#    ! 549\9\:    s&    ((-BE: A&E: :
FFs	list[str]nintlist[tuple[str, ...]]c                    g }t          t          |           |z
  dz             D ]V}g }t          |          D ] }|                    | ||z                       !|                    t          |                     W|S )zSGenerate n-grams from a list of strings where `n` (int) is the size of each n-gram.   )rangerz   ry   tuple)r   r   ngrams_listingramjs         r1   ngramsr   :  s     *,K3q66A:>"" ) )q 	# 	#ALL1q5""""5<<((((r3   first_stringsecond_string"tuple[float, set[tuple[str, ...]]]c                \   |sdt                      fS t          |                                 |          }t          |                                |          }|sdt                      fS t          |          t          |          z  }t          |          t          |          z  dz  }||fS )zTCalculate the percentage of common_ngrams between string A and B with reference to Ar   d   )setr   r   rz   )r   r   r   first_string_ngramssecond_string_ngramscommon_ngrams
percentages          r1   !calculate_shared_ngram_percentager   F  s      #%%x !3!3!5!5q99!-"5"5"7"7;; #%%x+,,s3G/H/HHMm$$s+>'?'??3FJ}$$r3   'tuple[float, set[tuple[str, ...]], str]c                   t                      }t          |                                           t          |                                          k     r%t          |                                           dz
  }n(t          |                                          dz
  }|| }} d}|s"t          | ||          \  }}|dk    rn|dz  }|"t	          |d          |t          |dz             fS )aG  From two strings, calculate the shared ngram percentage.

    Returns a tuple containing...
        - The largest n-gram percentage shared between the two strings.
        - A set containing the shared n-grams found during the calculation.
        - A string representation of the size of the largest shared n-grams found.
    r   r   r   )r   rz   r   r   roundr5   )r   r   shared_ngramsr   ngram_percentages        r1   "calculate_largest_ngram_percentager   Y  s     +.%%M
<  3}':':'<'<#=#===""$$%%)##%%&&*&3\m 	*K+
 +
'-
 66FA  	 !1%%}c!a%jj@@r3           parent_targetchild_targetaddfloatc                d   t          |           dk    rdS g d}|rbt          |           dk    rOt          |           }|dxx         |z  cc<   |dxx         |z  cc<   |dxx         |z  cc<   |dxx         |z  cc<   t          |          dk    rJ|d         |d         k    r8|d         |d         k    r&|d         |d         k    r|d         |d         k    rdS t          |          dk    oC|d         |d         cxk    o|d         k    nc o!|d         |d         cxk    o|d         k    nc S )	zTrue if the child_target bounding box is nested in the parent_target.

    Box format: [x_bottom_left, y_bottom_left, x_top_right, y_top_right].
    The parameter 'add' is the pixel error tolerance for extra pixels outside the parent region
       Fr   r   r   r   r   r   r      T)rz   list)r   r   r   parent_targetss       r1   is_parent_boxr   x  s    =Qu!\\N
 !s=!!Q&&m,,qS qS qS qS  	LQ!_q 111l1oXYIZ6Z6Z!_q 111l1oXYIZ6Z6Zt|! q\!_AAAAq0AAAAA 	F1aEEEEN14EEEEEr3   totalbox1box2intersection_ratio_method!tuple[float, float, float, float]c                T   | d         \  }}| d         \  }}|d         \  }}|d         \  }	}
||z
  ||z
  z  }|	|z
  |
|z
  z  }t          ||          }t          ||          }t          ||	          }t          ||
          }t          d||z
            t          d||z
            z  }t          ||          }t          ||          }||z   }|dk    r|dk    rdS ||z  dz  }n0|dk    r|dk    rdS ||z  dz  }n||z   dk    rdS |||z   |z
  z  dz  }t          |d          |||fS )a7  Calculate the percentage of overlapped region.

    Calculate the percentage with reference to
    the biggest element-region (intersection_ratio_method="parent"),
    the smallest element-region (intersection_ratio_method="partial"), or
    the disjunctive union region (intersection_ratio_method="total")
    r   r   parentr   r   partial)maxminr   )r   r   r   x1y1x2y2x3y3x4y4	area_box1	area_box2x_intersection1y_intersection1x_intersection2y_intersection2intersection_areamax_areamin_area
total_areaoverlap_percentages                         r1   calculate_overlap_percentager     s    !WFB!WFB!WFB!WFBbR"W%IbR"W%I"bkkO"bkkO"bkkO"bkkOA@AAC	/)E E  9i((H9i((HY&J H,,q==:/(:cA	"i	/	/q==:/(:cA 	!a'':/9y3HK\3\]add#Q''8ZGGr3         $@box_pair$list[Points] | tuple[Points, Points]
label_pairlist[str] | tuple[str, str]	text_pairix_pairsm_overlap_thresholdc                   d\  }}}}| \  }	}
|\  }}|\  }}|\  }}t          |	|
d          \  }}}}||k     r| d| d| d| dg}d}n|s| d| d| d| dg}d| }n|s| d| d| d| dg}d| }n||v s||v r| d| d| d| dg}d}nt          ||          \  }}}t          |d	          }|s| d| d| d| dg}d
}nl| d| d| d| dg}t          |                                          t          |                                          k     r|n|}d|z   d| dz   }d| d| }|||||||fS )a  Classifies the overlapping case for an element_pair input.

    There are 5 cases of overlapping:
        'Small partial overlap'
        'Partial overlap with empty content'
        'Partial overlap with duplicate text (sharing 100% of the text)'
        'Partial overlap without sharing text'
        'Partial overlap sharing {calculate_largest_ngram_percentage(...)}% of the text'

    Returns:
    overlapping_elements: List[str] - List of element types with their `ix` value.
        Ex: ['Title(ix=0)']
    overlapping_case: str - See list of cases above
    overlap_percentage: float
    largest_ngram_percentage: float
    max_area: float
    min_area: float
    total_area: float
    )NNNNr   r   (ix=)zSmall partial overlapz&partial overlap with empty content in z#partial overlap with duplicate textr   z$partial overlap without sharing textzof the text from(z-gram)zpartial overlap sharing z% )r   r   r   rz   r   )r  r  r  r  r  overlapping_elementsoverlapping_caser   largest_ngram_percentager   r   type1type2text1text2ix_element1ix_element2r   r   r   r   	largest_nref_types                          r1   identify_overlapping_caser    s   4\X*,>@X JD$LE5LE5&K;W"+< < <88Z
 000((+(((((+((( 
 3  (	e,,k,,,,,k,,,$   POO !	e,,k,,,,,k,,,$   POOe^^u~~,,k,,,,,k,,,$   E 6Xu6 62$a (--Eq'I'I$+ e00+00000+000($ $J   00+00000+000($ %($6$6U[[]]9K9K$K$K55QV-8;Py;P;P;PP#d>V#d#dZb#d#d   r3   coordinatesc                :    | d         \  }}| d         \  }}||||fS )zAccepts a set of Points and returns the lower-left and upper-right coordinates.

    Expects four coordinates representing the corners of a rectangle, listed in this order:
    bottom-left, top-left, top-right, bottom-right.
    r   r   r$   )r  x_bottom_left_1y_bottom_left_1x_top_right_1y_top_right_1s        r1   _convert_coordinates_to_boxr   *  s/     (31~$O_#.q> M=O]MIIr3      nested_error_tolerance_pxc           	     J   | \  }}|\  }}d                     d |D                       }	d                     d |D                       }
|dd                                         }|dd                                         }t          |          }t          |          }|\  }}}}|\  }}}}||k     o||k    }||k     o||k    }d\  }}}}}}d\  }}}|r|rt          ||d	          \  }} } } t          ||d
	          \  }}}}t	          |||          r#| d|	 d| d|
 dg}d| d| }d}| d|	 d}nSt	          |||          r#| d|
 d| d|	 dg}d| d| }d}| d|
 d}nt          | |||	|
f|          \  }}}}}}}||||pd|pd|pd|pd|pd|pdf	S )a  Identify if overlapping or nesting elements exist and, if so, the type of overlapping case.

    Returns:
    overlapping_elements: List[str] - List of element types & their `ix` value. Ex: ['Title(ix=0)']
    overlapping_case: str - See list of cases above
    overlap_percentage: float
    overlap_percentage_total: float
    largest_ngram_percentage: float
    max_area: float
    min_area: float
    total_area: float
     c                :    g | ]}|                                 |S r$   	isnumericr_   chs     r1   rj   z8identify_overlapping_or_nesting_case.<locals>.<listcomp>K  %    @@@"@2@@@r3   c                :    g | ]}|                                 |S r$   r&  r(  s     r1   rj   z8identify_overlapping_or_nesting_case.<locals>.<listcomp>L  r*  r3   r   N)NNNNNN)NNNr   r
  r   )r   r  r  znested z in r   )r  r   )r|   stripr   r   r   r  )!r  r  r  r"  r  r   r   r  r  r  r  box1_cornersbox2_cornersr  r  r  r  x_bottom_left_2y_bottom_left_2x_top_right_2y_top_right_2horizontal_overlapvertical_overlapr  parent_elementr  r   overlap_percentage_totalr  r   r   r   r   s!                                    r1   $identify_overlapping_or_nesting_caser7  6  s   & JD$LE5''@@@@@AAK''@@@@@AAK!""IOOE!""IOOE.t44L.t44LEQBO_m]EQBO_m](=8\]_=\&6Z=?;Z	   &6"Hh
 -. -,H&--
 -
 -
) !Q
 >Z&.>
 >
 >
:Hh
 |9RSSS !	,,k,,,,,k,,,$   <;;E;;!$ %99;999NN<;TUUU 	,,k,,,,,k,,,$   <;;E;;!$ %99;999NN *k*%9  $ "( 	a %A %AAAa
 
r3   elementslist['Text']!tuple[bool, list[dict[str, Any]]]c                   | d         j         j        pd}d t          |          D             }d t          |          D             }d t          |          D             }t          |           D ]\  }}|j         j        pd}	|	dz
  }
|j         j        rRt          t          |j         j                                        d                   }||
                             |           ||
                             | d|j	                    ||
                             |j
                   d	}g }t          t          |||          d
          D ]\  }	\  }}}t          t          |d                    }t          t          |d                    }t          t          |d                    }t          |||          D ]\  }}}t          |||||          \	  }}}}}}}}}|r\|                    |||| d|| dt          |d           dt          |d           dt          |d           ddd           d}||fS )zLCatch overlapping and nested bounding boxes cases across a list of elements.r   c                    g | ]}g S r$   r$   r   s     r1   rj   z7catch_overlapping_and_nested_bboxes.<locals>.<listcomp>  s    *H*H*H!2*H*H*Hr3   c                    g | ]}g S r$   r$   r   s     r1   rj   z7catch_overlapping_and_nested_bboxes.<locals>.<listcomp>  s    #A#A#A1B#A#A#Ar3   c                    g | ]}g S r$   r$   r   s     r1   rj   z7catch_overlapping_and_nested_bboxes.<locals>.<listcomp>  s    $B$B$BAR$B$B$Br3   r   pointsrv   F)startr   %u   pxˆ2)r  r6  r   r   r   )r  r5  r  r   metadataT)rC  page_numberr   	enumerater  r   r   to_dictry   categorytextr(   r   r   r7  r   ) r8  r"  r  	num_pagespages_of_bboxestext_labelstext_contentixelementrD  n_page_to_ixboxdocument_with_overlapping_flagoverlapping_casespage_bboxespage_labels	page_textpage_bboxes_combinationspage_labels_combinationstext_content_combinationsr  r  r  r  r5  r  r   r6  r  r   r   r   s                                    r1   #catch_overlapping_and_nested_bboxesrY    s    %16QI*H*HuY7G7G*H*H*HO#A#Ai0@0@#A#A#AK$B$By1A1A$B$B$BL ** 8 8G&27a"Q' 	6vw/;CCEEhOPPCL)00555L!((B)D)D'2B)D)DEEE\"))',7777%*".0>GO[,77? ? ? /6 /6::k;	 $([!(D(D#E#E #'[!(D(D#E#E $(i)C)C$D$D!/2$$%0
 0
 '	6 '	6+Hj) 5)$ 
$ "((   6!((0D*8,<1C.F.F.F8P;S8V8V8V+01+=+=(D(D(D+01+=+=(D(D(D-2:q-A-A*H*H*H% %    26.O'	6R *+<<<r3   c                  *    e Zd ZddZd ZddZd	 Zd
S )FileHandlerr4   r5   c                D    || _         t          j                    | _        d S rS   )r4   	threadingLocklock)rG   r4   s     r1   rH   zFileHandler.__init__  s    "N$$			r3   c                    | j         5  t          | j                  5 }|                                }d d d            n# 1 swxY w Y   |cd d d            S # 1 swxY w Y   d S rS   )r_  rb   r4   read)rG   filerW   s      r1   	read_filezFileHandler.read_file  s    Y 	 	dn%% #yy{{# # # # # # # # # # # # # # #	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s1   A>AA	AA	AAArW   r!   r@   c                    | j         5  t          | j        d          5 }|                    |           d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nw)r_  rb   r4   write)rG   rW   rb  s      r1   
write_filezFileHandler.write_file  s    Y 	! 	!dnc** !d

4   ! ! ! ! ! ! ! ! ! ! ! ! ! ! !	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	! 	!s3   AA A A	AA	AAAc                    | j         5  t          j                            | j                  rt          j        | j                   d d d            d S # 1 swxY w Y   d S rS   )r_  r   pathexistsr4   remove)rG   s    r1   cleanup_filezFileHandler.cleanup_file  s    Y 	* 	*w~~dn-- *	$.)))	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	* 	*s   >AAAN)r4   r5   )rW   r5   r!   r@   )rC   rT   rU   rH   rc  rg  rl  r$   r3   r1   r[  r[    sZ        % % % %  ! ! ! !
* * * * *r3   r[  )r   r   r   r   r   r    r!   r"   )r4   r5   r!   r6   )rW   rX   rY   r5   r!   r@   )rY   r5   r!   rX   rS   )rm   rn   ro   rp   r!   rq   )r   r5   )r   r   r!   r   )r   r   r!   r   )r   r   r!   r   )r   r   r   r   r!   r   )r   r5   r   r5   r   r   r!   r   )r   r5   r   r5   r!   r   )r   )r   r   r   r   r   r   r!   r6   )r   )r   r   r   r   r   r5   r!   r   )r  )
r  r  r  r  r  r  r  r  r  r   )r  r   )r!  r  )
r  r  r  r  r  r  r"  r   r  r   )r8  r9  r"  r   r  r   r!   r:  )A
__future__r   r   rE   r   r%   r]   r   r   r   r9   r]  r   	itertoolsr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   typing_extensionsr   r   unstructured.__version__r   unstructured.documents.elementsr   r   r   r   __annotations__r   r   DATE_FORMATSr   r   r2   r;   r=   re   rl   r   rx   r   r   r   r   r   r   r   r   r   r  r   r7  rY  r[  r$   r3   r1   <module>ru     s2   " " " " " " "            				                 " " " " " "                           2 2 2 2 2 2 2 2 0 0 0 0 0 0 >======== ueUE12 2 2 2 2& & & & &%*% % % % %\WT]]Yt__   7 7 7 7j4 j4 j4 j4 j472; j4 j4 j4ZK K K K
9 9 9 9 !% % % % %P   	 	 	 	   
 
 
 
( ( (V	 	 	 	% % % %&A A A A>    > &-.H .H .H .H .Hl #'b b b b bJJ J J J  &'"&i i i i i\ &'"&I= I= I= I= I=X* * * * * * * * * *r3   