
    Ng                     B   U d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlZddlmZmZ ddlmZmZmZ ddlmZmZmZmZmZmZmZ ddlmZ d	Zd
ed<   	 dZ ded<   	 eege!f         Z"ded<   	 dZ#ded<   	 e$e%e%f         Z&ded<    G d d          Z' G d d          Z( G d d          Z) G d d          Z* G d d          Z+ G d d          Z, G d  d!          Z- G d" d#          Z. G d$ d%          Z/ G d& d'          Z0 G d( d)          Z1d/d+Z2d0d.Z3dS )1z@Chunking objects not specific to a particular chunking strategy.    )annotationsN)AnyCallableDefaultDictIterableIteratorcast)Self	TypeAlias)HtmlCellHtmlRow	HtmlTable)CompositeElementConsolidationStrategyElementElementMetadataTable
TableChunkTitle)lazypropertyi  intCHUNK_MAX_CHARS_DEFAULTTboolCHUNK_MULTI_PAGE_DEFAULTr   BoundaryPredicatezTablePreChunk | TextPreChunkPreChunkTextAndHtmlc                  *   e Zd ZdZddZedd            Zedd	            Zedd            Z	edd            Z
edd            Zedd            Zedd            Zedd            Zed d            Zed!d            Zed"d            Zd#dZdS )$ChunkingOptionsaY  Specifies parameters of optional chunking behaviors.

    Parameters
    ----------
    max_characters
        Hard-maximum text-length of chunk. A chunk longer than this will be split mid-text and be
        emitted as two or more chunks.
    new_after_n_chars
        Preferred approximate chunk size. A chunk composed of elements totalling this size or
        greater is considered "full" and will not be enlarged by adding another element, even if it
        will fit within the remaining `max_characters` for that chunk. Defaults to `max_characters`
        when not specified, which effectively disables this behavior. Specifying 0 for this
        argument causes each element to appear in a chunk by itself (although an element with text
        longer than `max_characters` will be still be split into two or more chunks).
    combine_text_under_n_chars
        Provides a way to "recombine" small chunks formed by breaking on a semantic boundary. Only
        relevant for a chunking strategy that specifies higher-level semantic boundaries to be
        respected, like "section" or "page". Recursively combines two adjacent pre-chunks when the
        first pre-chunk is smaller than this threshold. "Recursively" here means the resulting
        pre-chunk can be combined with the next pre-chunk if it is still under the length threshold.
        Defaults to `max_characters` which combines chunks whenever space allows. Specifying 0 for
        this argument suppresses combining of small chunks. Note this value is "capped" at the
        `new_after_n_chars` value since a value higher than that would not change this parameter's
        effect.
    overlap
        Specifies the length of a string ("tail") to be drawn from each chunk and prefixed to the
        next chunk as a context-preserving mechanism. By default, this only applies to split-chunks
        where an oversized element is divided into multiple chunks by text-splitting.
    overlap_all
        Default: `False`. When `True`, apply overlap between "normal" chunks formed from whole
        elements and not subject to text-splitting. Use this with caution as it entails a certain
        level of "pollution" of otherwise clean semantic chunk boundaries.
    text_splitting_separators
        A sequence of strings like `("
", " ")` to be used as target separators during
        text-splitting. Text-splitting only applies to splitting an oversized element into two or
        more chunks. These separators are tried in the specified order until one is found in the
        string to be split. The default separator is `""` which matches between any two characters.
        This separator should not be specified in this sequence because it is always the separator
        of last-resort. Note that because the separator is removed during text-splitting, only
        whitespace character sequences are suitable.
    kwargsr   c                    || _         d S N)_kwargs)selfr    s     V/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/chunking/base.py__init__zChunkingOptions.__init__d   s        returnr
   c                >     | di |}|                                  |S )zUReturn instance or raises `ValueError` on invalid arguments like overlap > max_chars. )	_validate)clsr    r$   s      r%   newzChunkingOptions.newg   s*     s}}V}}r'   tuple[BoundaryPredicate, ...]c                    dS )zThe semantic-boundary detectors to be applied to break pre-chunks.

        Overridden by sub-typs to provide semantic-boundary isolation behaviors.
        r*   r*   r$   s    r%   boundary_predicatesz#ChunkingOptions.boundary_predicatesn   s	     rr'   r   c                B    | j                             d          }||ndS )a   Combine two consecutive text pre-chunks if first is smaller than this and both will fit.

        Default applied here is `0` which essentially disables chunk combining. Must be overridden
        by subclass where combining behavior is supported.
        combine_text_under_n_charsNr   r#   getr$   	arg_values     r%   r3   z*ChunkingOptions.combine_text_under_n_charsv   s)     L$$%ABB	%1yyq8r'   c                L    | j                             d          }||nt          S )zThe maximum size for a chunk.

        A pre-chunk will only exceed this size when it contains exactly one element which by itself
        exceeds this size. Such a pre-chunk is subject to mid-text splitting later in the chunking
        process.
        max_characters)r#   r5   r   r6   s     r%   hard_maxzChunkingOptions.hard_max   s*     L$$%566	%1yy7NNr'   r   c                \    | j                             d          }|dnt          |          S )zWhen True, add original elements from pre-chunk to `.metadata.orig_elements` of chunk.

        Default value is `True`.
        include_orig_elementsNT)r#   r5   r   r6   s     r%   r<   z%ChunkingOptions.include_orig_elements   s/     L$$%<==	 (ttd9oo=r'   c                L    | j                             d          }|r| j        ndS )zCharacters of overlap to add between chunks.

        This applies only to boundaries between chunks formed from whole elements and not to
        text-splitting boundaries that arise from splitting an oversized element.
        overlap_allr   )r#   r5   overlap)r$   overlap_all_args     r%   inter_chunk_overlapz#ChunkingOptions.inter_chunk_overlap   s*     ,**=99.5t||A5r'   c                >    | j                             d          }|pdS )zThe number of characters to overlap text when splitting chunks mid-text.

        The actual overlap will not exceed this number of characters but may be less as required to
        respect splitting-character boundaries.
        r?   r   r4   )r$   overlap_args     r%   r?   zChunkingOptions.overlap   s$     l&&y11ar'   c                `    | j         }| j                            d          }||S ||k    r|S |S )zA pre-chunk of this size or greater is considered full.

        Note that while a value of `0` is valid, it essentially disables chunking by putting
        each element into its own chunk.
        new_after_n_chars)r:   r#   r5   )r$   r:   new_after_n_chars_args      r%   soft_maxzChunkingOptions.soft_max   sI     = $ 0 01D E E !(O !8++O %$r'    Callable[[str], tuple[str, str]]c                     t          |           S )a  A text-splitting function suitable for splitting the text of an oversized pre-chunk.

        The function is pre-configured with the chosen chunking window size and any other applicable
        options specified by the caller as part of this chunking-options instance.
        )_TextSplitterr0   s    r%   splitzChunkingOptions.split   s     T"""r'   strc                    dS )a;  The string to insert between elements when concatenating their text for a chunk.

        Right now this is just "

" (a blank line in plain text), but having this here rather
        than as a module-level constant provides a way for us to easily make it user-configurable
        in future if we want to.
        z

r*   r0   s    r%   text_separatorzChunkingOptions.text_separator   s	     vr'   tuple[str, ...]c                \    | j                             d          }|dnt          |          S )zLSequence of text-splitting target strings to be used in order of preference.text_splitting_separatorsN)
 )r#   r5   tuple)r$   text_splitting_separators_args     r%   rQ   z)ChunkingOptions.text_splitting_separators   s;     )-(8(89T(U(U% -4 K455	
r'   Nonec                    | j         }|dk    rt          d|           | j                            d          }||dk     rt          d|           | j        |k    rt          d| j         d|           dS )z5Raise ValueError if requestion option-set is invalid.r   z+'max_characters' argument must be > 0, got rE   Nz/'new_after_n_chars' argument must be >= 0, got z;'overlap' argument must be less than `max_characters`, got z >= )r:   
ValueErrorr#   r5   r?   )r$   r9   rE   s      r%   r+   zChunkingOptions._validate   s    Q_~__``` !L,,-@AA(->-B-BYFWYY  
 <>));; ;*8; ;   *)r'   N)r    r   )r    r   r(   r
   r(   r.   r(   r   )r(   r   )r(   rH   r(   rL   )r(   rO   )r(   rV   )__name__
__module____qualname____doc__r&   classmethodr-   r   r1   r3   r:   r<   rA   r?   rG   rK   rN   rQ   r+   r*   r'   r%   r   r   9   s       ( (T       [    \ 9 9 9 \9 O O O \O > > > \> 6 6 6 \6       \  % % % \%( # # # \#    \ 
 
 
 \
     r'   r   c                  Z    e Zd ZdZddZedd	            Zdd
Zedd            Z	ddZ
dS )
PreChunkeraq  Gathers sequential elements into pre-chunks as length constraints allow.

    The pre-chunker's responsibilities are:

    - **Segregate semantic units.** Identify semantic unit boundaries and segregate elements on
      either side of those boundaries into different sections. In this case, the primary indicator
      of a semantic boundary is a `Title` element. A page-break (change in page-number) is also a
      semantic boundary when `multipage_sections` is `False`.

    - **Minimize chunk count for each semantic unit.** Group the elements within a semantic unit
      into sections as big as possible without exceeding the chunk window size.

    - **Minimize chunks that must be split mid-text.** Precompute the text length of each section
      and only produce a section that exceeds the chunk window size when there is a single element
      with text longer than that window.

    A Table element is placed into a section by itself. CheckBox elements are dropped.

    The "by-title" strategy specifies breaking on section boundaries; a `Title` element indicates
    a new "section", hence the "by-title" designation.
    elementsIterable[Element]optsr   c                "    || _         || _        d S r"   )	_elements_opts)r$   rc   re   s      r%   r&   zPreChunker.__init__  s    !


r'   r(   Iterator[PreChunk]c                >     | ||                                           S )zEGenerate pre-chunks from the element-stream provided on construction.)_iter_pre_chunks)r,   rc   re   s      r%   iter_pre_chunkszPreChunker.iter_pre_chunks  s"    
 s8T""33555r'   c              #  ,  K   t          | j                  }| j        D ][}|                     |          s|                    |          s|                                E d{V  |                    |           \|                                E d{V  dS )a  Generate pre-chunks from the element-stream provided on construction.

        A *pre-chunk* is the largest sub-sequence of elements that will both fit within the
        chunking window and respects the semantic boundary rules of the chunking strategy. When a
        single element exceeds the chunking window size it is placed in a pre-chunk by itself and
        is subject to mid-text splitting in the second phase of the chunking process.
        N)PreChunkBuilderrh   rg   _is_in_new_semantic_unitwill_fitflushadd_element)r$   pre_chunk_builderelements      r%   rk   zPreChunker._iter_pre_chunks  s       ,DJ77~ 	3 	3G,,W55 5=N=W=WX_=`=` 5,22444444444 ))'2222 %**,,,,,,,,,,,r'   r.   c                    | j         j        S )zBThe semantic-boundary detectors to be applied to break pre-chunks.)rh   r1   r0   s    r%   _boundary_predicateszPreChunker._boundary_predicates.  s     z--r'   rt   r   r   c                H    fd| j         D             }t          |          S )zITrue when `element` begins a new semantic unit such as a section or page.c                &    g | ]} |          S r*   r*   ).0predrt   s     r%   
<listcomp>z7PreChunker._is_in_new_semantic_unit.<locals>.<listcomp>8  s!    SSSttG}}SSSr'   )rv   any)r$   rt   semantic_boundariess    ` r%   ro   z#PreChunker._is_in_new_semantic_unit3  s2    
 TSSS9RSSS&'''r'   N)rc   rd   re   r   )rc   rd   re   r   r(   ri   r(   ri   rY   rt   r   r(   r   )r\   r]   r^   r_   r&   r`   rl   rk   r   rv   ro   r*   r'   r%   rb   rb      s         ,    6 6 6 [6- - - -, . . . \.( ( ( ( ( (r'   rb   c                  j    e Zd ZdZddZdd	ZddZddZedd            Z	ddZ
edd            ZdS )rn   a  An element accumulator suitable for incrementally forming a pre-chunk.

    Provides the trial method `.will_fit()` a pre-chunker can use to determine whether it should add
    the next element in the element stream.

    `.flush()` is used to build a PreChunk object from the accumulated elements. This method
    returns an iterator that generates zero-or-one `TextPreChunk` or `TablePreChunk` object and is
    used like so:

        yield from builder.flush()

    If no elements have been accumulated, no `PreChunk` instance is generated. Flushing the builder
    clears the elements it contains so it is ready to build the next pre-chunk.
    re   r   r(   rV   c                ~    || _         t          |j                  | _        g | _        d| _        g | _        d| _        d S )N r   )rh   lenrN   _separator_lenrg   _overlap_prefix_text_segments	_text_lenr$   re   s     r%   r&   zPreChunkBuilder.__init__L  s@    
!$"566(* %')+r'   rt   r   c                    | j                             |           |j        rC| j                            |j                   | xj        t          |j                  z  c_        dS dS )zAdd `element` to this section.N)rg   appendtextr   r   r   r$   rt   s     r%   rr   zPreChunkBuilder.add_elementX  sc    g&&&< 	0&&w|444NNc',///NNNN	0 	0r'   ri   c              #  >  K   | j         sdS t          | j         d         t                    r&t          | j         d         | j        | j                  n,t          t          | j                   | j        | j                  }|                     |j	                   |V  dS )a  Generate zero-or-one `PreChunk` object and clear the accumulator.

        Suitable for use to emit a PreChunk when the maximum size has been reached or a semantic
        boundary has been reached. Also to clear out a terminal pre-chunk at the end of an element
        stream.
        Nr   )
rg   
isinstancer   TablePreChunkr   rh   TextPreChunklist_reset_stateoverlap_tailr$   	pre_chunks     r%   rq   zPreChunkBuilder.flush_  s       ~ 	F $.+U33VM$.+T-A4:NNN d4>22D4H$*UU	 	 	)0111r'   r   c                   t          | j                  dk    rdS t          |t                    rdS t          | j        d         t                    rdS | j        | j        j        k    rdS | j        t          |j                  k      S )aU  True when `element` can be added to this prechunk without violating its limits.

        There are several limits:
        - A `Table` element will never fit with any other element. It will only fit in an empty
          pre-chunk.
        - No element will fit in a pre-chunk that already contains a `Table` element.
        - A text-element will not fit in a pre-chunk that already exceeds the soft-max
          (aka. new_after_n_chars).
        - A text-element will not fit when together with the elements already present it would
          exceed the hard-max (aka. max_characters).
        r   TF)	r   rg   r   r   _text_lengthrh   rG   _remaining_spacer   r   s     r%   rp   zPreChunkBuilder.will_fitt  s     t~!##4gu%% 	5dnQ'// 	5tz2225(3w|+<+<<<<r'   r   c                h    | j         t          | j                  z  }| j        j        | j        z
  |z
  S )zMMaximum text-length of an element that can be added without exceeding maxlen.)r   r   r   rh   r:   r   )r$   separators_lens     r%   r   z PreChunkBuilder._remaining_space  s3     ,s43F/G/GGz"T^3nDDr'   overlap_prefixrL   c                    || _         | j                                         |r|gng | _        t	          |          | _        dS )zMSet working-state values back to "empty", ready to accumulate next pre-chunk.N)r   rg   clearr   r   r   )r$   r   s     r%   r   zPreChunkBuilder._reset_state  sF    -2@H~..b^,,r'   c                `    t          | j                  }|r|dz
  nd}| j        || j        z  z   S )a  Length of the text in this pre-chunk.

        This value represents the chunk-size that would result if this pre-chunk was flushed in its
        current state. In particular, it does not include the length of a trailing separator (since
        that would only appear if an additional element was added).

        Not suitable for judging remaining space, use `.remaining_space` for that value.
           r   )r   r   r   r   )r$   nseparator_counts      r%   r   zPreChunkBuilder._text_length  s;     #$$#$+!a%%!~43F!FGGr'   Nre   r   r(   rV   )rt   r   r(   rV   r~   r   rZ   )r   rL   r(   rV   )r\   r]   r^   r_   r&   rr   rq   rp   propertyr   r   r   r*   r'   r%   rn   rn   <  s         
  
  
  
 0 0 0 0   *= = = =6 E E E XE- - - - H H H XH H Hr'   rn   c                      e Zd ZdZdd
ZddZedd            Zedd            Zedd            Z	ddZ
ddZed d            Zed!d            Zedd            Zedd            ZdS )"r   z/A pre-chunk composed of a single Table element.tabler   r   rL   re   r   r(   rV   c                0    || _         || _        || _        d S r"   )_tabler   rh   )r$   r   r   re   s       r%   r&   zTablePreChunk.__init__  s    -


r'   Iterator[Table | TableChunk]c              #    K   | j         sdS | j        j        }t          | j                  |k    rGt          | j                  |k    r/| j        }| j        pd|_        t          | j        |          V  dS | j        r| j        j        dk     r| 	                                E d{V  dS | 
                                E d{V  dS )zLSplit this pre-chunk into `Table` or `TableChunk` objects maxlen or smaller.Nr   metadata2   )_table_textrh   r:   r   _text_with_overlap_html	_metadatatext_as_htmlr   _iter_text_only_table_chunks _iter_text_and_html_table_chunks)r$   maxlenr   s      r%   iter_chunkszTablePreChunk.iter_chunks  s        	F $t&''611c$*oo6O6O~H$(J$6$H!T4xHHHHHHF z 	TZ025588:::::::::F 88:::::::::::r'   c                f    | j         j        }|r"| j        | d                                         ndS a6  The portion of this chunk's text to be repeated as a prefix in the next chunk.

        This value is the empty-string ("") when either the `.overlap` length option is `0` or
        `.overlap_all` is `False`. When there is a text value, it is stripped of both leading and
        trailing whitespace.
        Nr   )rh   rA   r   stripr$   r?   s     r%   r   zTablePreChunk.overlap_tail  s8     *0=DLt&xyy177999"Lr'   c                &    | j         x}sdS |j        S )zThe compactified HTML for this table when it has text-as-HTML.

        The empty string when table-structure has not been captured, perhaps because
        `infer_table_structure` was set `False` in the partitioning call.
        r   )_html_tablehtml)r$   
html_tables     r%   r   zTablePreChunk._html  s!     #..
 	2r'   HtmlTable | Nonec                    | j         j        j        x}dS |                                }|sdS t	          j        |          S )zThe `lxml` HTML element object for this table.

        `None` when the `Table` element has no `.metadata.text_as_html`.
        N)r   r   r   r   r   from_html_text)r$   r   s     r%   r   zTablePreChunk._html_table  sK     !K0==LF4#))++ 	4'555r'   Iterator[TableChunk]c              #     K   | j         x}t          d          d}t                              || j                  D ]1\  }}| j        }||_        |pd|_        d}t          ||          V  2dS )zSplit table into chunks where HTML corresponds exactly to text.

        `.metadata.text_as_html` for each chunk is a parsable `<table>` HTML fragment.
        Nz<this method is undefined for a table having no .text_as_htmlFTr   )	r   rX   _TableSplitteriter_subtablesrh   r   r   is_continuationr   )r$   r   r   r   r   r   s         r%   r   z.TablePreChunk._iter_text_and_html_table_chunks  s      
 **J3[\\\(77
DJOO 	; 	;JD$~H$(H!'6'>$H$"O$:::::::	; 	;r'   c              #     K   | j         }| j        j        }d}|r7 ||          \  }}| j        }|pd|_        d}t          ||          V  |5dS dS )z>Split oversized text-only table (no text-as-html) into chunks.FNTr   )r   rh   rK   r   r   r   )r$   text_remainderrK   r   
chunk_textr   s         r%   r   z*TablePreChunk._iter_text_only_table_chunks  s      0
  	A).~)>)>&J~H'6'>$H$"O*x@@@@@@  	A 	A 	A 	A 	Ar'   r   c                   t           t          j        | j        j                  }fd                                                                D             }|D ]}t          ||d           | j        j	        r| j
        |_        |S )a
  The base `.metadata` value for chunks formed from this pre-chunk.

        The term "base" here means that other metadata fields will be added, depending on the
        chunk. In particular, `.metadata.text_as_html` will be different for each text-split chunk
        and `.metadata.is_continuation` must be added for second-and-later text-split chunks.

        Note this is a fresh copy of the metadata on each call since it will need to be mutated
        differently for each chunk formed from this pre-chunk.
        c                ,    g | ]\  }}|j         u |S r*   )DROP)ry   
field_namestrategyCSs      r%   r{   z+TablePreChunk._metadata.<locals>.<listcomp>1  s4     
 
 
$
H27"" """r'   N)r   copydeepcopyr   r   field_consolidation_strategiesitemssetattrrh   r<   _orig_elementsorig_elements)r$   r   drop_field_namesr   r   s       @r%   r   zTablePreChunk._metadata!  s     #=!566
 
 
 
(*(I(I(K(K(Q(Q(S(S
 
 

 + 	0 	0JHj$////:+ 	9%)%8H"r'   list[Element]c                R    t          j        | j                  }d|j        _        |gS )a,  The `.metadata.orig_elements` value for chunks formed from this pre-chunk.

        Note this is not just the `Table` element, it must be adjusted to strip out any
        `.metadata.orig_elements` value it may have when it is itself a chunk and not a direct
        product of partitioning.
        N)r   r   r   r   r   )r$   
orig_tables     r%   r   zTablePreChunk._orig_elements=  s(     ]4;//
,0
)|r'   c                d    d                     | j        j                                                  S )zMThe text in this table, not including any overlap-prefix or extra whitespace.rS   )joinr   r   rK   r0   s    r%   r   zTablePreChunk._table_textL  s'     xx(..00111r'   c                d    | j         }| j        j                                        }|r|dz   |z   n|S )zCThe text for this chunk, including the overlap-prefix when present.rR   )r   r   r   r   )r$   r   
table_texts      r%   r   z TablePreChunk._text_with_overlapQ  s<     -[%++--
5CS~$z11Sr'   N)r   r   r   rL   re   r   r(   rV   )r(   r   r[   )r(   r   )r(   r   r(   r   r(   r   )r\   r]   r^   r_   r&   r   r   r   r   r   r   r   r   r   r   r   r   r*   r'   r%   r   r     sX       99   
; ; ; ;8 M M M \M 	 	 	 \	 6 6 6 \6; ; ; ;&A A A A     X6    \ 2 2 2 \2 T T T \T T Tr'   r   c                      e Zd ZdZd#d
Zd$dZd%dZd&dZd'dZe	d(d            Z
e	d)d            Ze	d*d            Ze	d*d            Zd+dZe	d,d            Ze	d-d             Ze	d(d!            Zd"S ).r   ao  A sequence of elements that belong to the same semantic unit within a document.

    The name "section" derives from the idea of a document-section, a heading followed by the
    paragraphs "under" that heading. That structure is not found in all documents and actual section
    content can vary, but that's the concept.

    This object is purposely immutable.
    rc   rd   r   rL   re   r   r(   rV   c                J    t          |          | _        || _        || _        d S r"   )r   rg   r   rh   )r$   rc   r   re   s       r%   r&   zTextPreChunk.__init__d  s$     h-


r'   otherr   r   c                p    t          |t                    sdS | j        |j        k    o| j        |j        k    S )NF)r   r   r   rg   )r$   r   s     r%   __eq__zTextPreChunk.__eq__k  s:    %.. 	5#u'<<bSXSbAbbr'   r   c                    t          | j                  | j        j        k    rdS t          |                     |          j                  }|| j        j        k    S )zRTrue when `pre_chunk` can be combined with this one without exceeding size limits.F)r   _textrh   r3   combiner:   )r$   r   combined_lens      r%   can_combinezTextPreChunk.can_combinep  sM    tz??djCCC5
 4<<	22899tz222r'   other_pre_chunkc                T    t          | j        |j        z   | j        | j                  S )zCReturn new `TextPreChunk` that combines this and `other_pre_chunk`.)r   re   )r   rg   r   rh   )r$   r   s     r%   r   zTextPreChunk.combine|  s3     N_66/
 
 
 	
r'   Iterator[CompositeElement]c              #     K   | j         sdS | j        j        } || j                   \  }}t          || j                  V  |r* ||          \  }}t          || j                  V  |(dS dS )zSSplit this pre-chunk into one or more `CompositeElement` objects maxlen or smaller.Nr   )r   rh   rK   r   _consolidated_metadata_continuation_metadata)r$   rK   s	remainders       r%   r   zTextPreChunk.iter_chunks  s       z 	F
  uTZ((9A0KLLLLLL  	Q 5++LAy"D4OPPPPPP  	Q 	Q 	Q 	Q 	Qr'   c                f    | j         j        }|r"| j        | d                                         ndS r   )rh   rA   r   r   r   s     r%   r   zTextPreChunk.overlap_tail  s7     *007?tz7())$**,,,R?r'   dict[str, list[Any]]c                    dd}t          j        t                    }| j        D ]3} ||j                  D ] \  }}||                             |           !4t          |          S )a  Collection of all populated metadata values across elements.

        The resulting dict has one key for each `ElementMetadata` field that had a non-None value in
        at least one of the elements in this pre-chunk. The value of that key is a list of all those
        populated values, in element order, for example:

            {
                "filename": ["sample.docx", "sample.docx"],
                "languages": [["lat"], ["lat", "eng"]]
                ...
            }

        This preprocessing step provides the input for a specified consolidation strategy that will
        resolve the list of values for each field to a single consolidated value.
        r   r   r(   Iterator[tuple[str, Any]]c                H    d | j                                         D             S )zM(field_name, value) pair for each non-None field in single `ElementMetadata`.c              3  (   K   | ]\  }}|||fV  d S r"   r*   )ry   r   values      r%   	<genexpr>zSTextPreChunk._all_metadata_values.<locals>.iter_populated_fields.<locals>.<genexpr>  s=        %J$ U#$$$$ r'   )known_fieldsr   )r   s    r%   iter_populated_fieldsz@TextPreChunk._all_metadata_values.<locals>.iter_populated_fields  s2     )1)>)D)D)F)F   r'   )r   r   r(   r   )collectionsdefaultdictr   rg   r   r   dict)r$   r   field_valueser   r   s         r%   _all_metadata_valuesz!TextPreChunk._all_metadata_values  s    $	 	 	 	 5@4KD4Q4Q  	7 	7A%:%:1:%F%F 7 7!
EZ(//66667 L!!!r'   r   c                X    t          di | j        }| j        j        r| j        |_        |S )a  Metadata applicable to this pre-chunk as a single chunk.

        Formed by applying consolidation rules to all metadata fields across the elements of this
        pre-chunk.

        For the sake of consistency, the same rules are applied (for example, for dropping values)
        to a single-element pre-chunk too, even though metadata for such a pre-chunk is already
        "consolidated".
        r*   )r   _meta_kwargsrh   r<   r   r   )r$   consolidated_metadatas     r%   r   z#TextPreChunk._consolidated_metadata  s;     !0 D D$2C D D:+ 	F262E!/$$r'   c                F    t          j         | j                  }d|_        |S )a  Metadata applicable to the second and later text-split chunks of the pre-chunk.

        The same metadata as the first text-split chunk but includes `.is_continuation = True`.
        Unused for non-oversized pre-chunks since those are not subject to text-splitting.
        T)r   r   r   )r$   continuation_metadatas     r%   r   z#TextPreChunk._continuation_metadata  s&     !%	$*E F F04-$$r'   Iterator[str]c              #  `   K   | j         r	| j         V  | j        D ]}|j        s
|j        V  dS )zuGenerate overlap text and each element text segment in order.

        Empty text segments are not included.
        N)r   rg   r   )r$   r   s     r%   _iter_text_segmentsz TextPreChunk._iter_text_segments  sW      
  	'&&&& 	 	A6 &LLLL	 	r'   dict[str, Any]c                z     t           t          j                    d fd}t           |                      S )a$  The consolidated metadata values as a dict suitable for constructing ElementMetadata.

        This is where consolidation strategies are actually applied. The output is suitable for use
        in constructing an `ElementMetadata` object like `ElementMetadata(**self._meta_kwargs)`.
        r(   r   c            	   3    K   j                                         D ]\  } }                    |           }|j        u r| |d         fV  0|j        u r#| t          |t          dg                     fV  \|j        u r2d |D             }| t          |	                                          fV  |j
        u r$| d                    d |D                       fV  |j        u rt          dt          |            d          dS )	zKGenerate (field-name, value) pairs for each field in consolidated metadata.r   z	list[Any]c                    i | ]
}|D ]}|d S r"   r*   )ry   val_listkeys      r%   
<dictcomp>zGTextPreChunk._meta_kwargs.<locals>.iter_kwarg_pairs.<locals>.<dictcomp>  s)    *`*`*`W_*`*`PS3*`*`*`*`r'   rS   c              3  >   K   | ]}|                                 V  d S r"   )r   )ry   vals     r%   r   zFTextPreChunk._meta_kwargs.<locals>.iter_kwarg_pairs.<locals>.<genexpr>
  s*      .M.Mssyy{{.M.M.M.M.M.Mr'   zmetadata field z& has no defined consolidation strategyN)r  r   r5   FIRSTLIST_CONCATENATEsumr	   LIST_UNIQUEr   keysSTRING_CONCATENATEr   r   NotImplementedErrorrepr)r   valuesr   ordered_unique_keysr   r   r$   s       r%   iter_kwarg_pairsz3TextPreChunk._meta_kwargs.<locals>.iter_kwarg_pairs  sc     &*&?&E&E&G&G  "
F9==jIIrx''$fQi/////!444$c&${B2G2G&H&HHHHHH//*`*`V*`*`*`'$d+>+C+C+E+E&F&FFFFFF!666$chh.M.Mf.M.M.M&M&MMMMMM(( .b$z*:*:bbb  ' r'   )r(   r   )r   r   r   )r$   r  r   r   s   ` @@r%   r  zTextPreChunk._meta_kwargs  s]     #)>)])_)_&	 	 	 	 	 	 	 	2 $$&&'''r'   r   c                <      fd}t           |                      S )zJThe `.metadata.orig_elements` value for chunks formed from this pre-chunk.c               3     K   j         D ]7} | j        j        | V  t          j        |           }d |j        _        |V  8d S r"   )rg   r   r   r   )r   orig_elementr$   s     r%   iter_orig_elementsz7TextPreChunk._orig_elements.<locals>.iter_orig_elements  sd      ^ 
# 
#:+3GGG  $y|| 7;%3"""""
# 
#r'   )r   )r$   r!  s   ` r%   r   zTextPreChunk._orig_elements  s6    	# 	# 	# 	# 	# &&(()))r'   c                h    | j         j        }|                    |                                           S )zThe concatenated text of all elements in this pre-chunk.

        Each element-text is separated from the next by a blank line ("

").
        )rh   rN   r   r	  )r$   rN   s     r%   r   zTextPreChunk._text)  s.     2""4#;#;#=#=>>>r'   N)rc   rd   r   rL   re   r   r(   rV   )r   r   r(   r   r   r   r(   r   )r   r   r(   r   )r(   r   r[   )r(   r   r   r(   r  )r(   r
  r   )r\   r]   r^   r_   r&   r   r   r   r   r   r   r  r   r   r	  r  r   r   r*   r'   r%   r   r   Z  s           c c c c

3 
3 
3 
3
 
 
 
Q Q Q Q& @ @ @ \@  "  "  " \ "D % % % \% % % % \%
 
 
 
 "( "( "( \"(H * * * \*$ ? ? ? \? ? ?r'   r   c                  J    e Zd ZdZddZedd	            Zdd
ZddZddZ	dS )r   ap  Produces (text, html) pairs for a `<table>` HtmlElement.

    Each chunk contains a whole number of rows whenever possible. An oversized row is split on an
    even cell boundary and a single cell that is by itself too big to fit in the chunking window
    is divided by text-splitting.

    The returned `html` value is always a parseable HTML `<table>` subtree.
    table_elementr   re   r   c                "    || _         || _        d S r"   )_table_elementrh   )r$   r&  re   s      r%   r&   z_TableSplitter.__init__B  s    +


r'   r(   Iterator[TextAndHtml]c                >     | ||                                           S )a  Generate (text, html) pair for each split of this table pre-chunk.

        Each split is on an even row boundary whenever possible, falling back to even cell and even
        word boundaries when a row or cell is by itself oversized, respectively.
        )_iter_subtables)r,   r&  re   s      r%   r   z_TableSplitter.iter_subtablesF  s"     s=$''77999r'   c              #    K   t          | j        j                  }| j                                        D ]w}|                    |          s|                                E d{V  |                    |          r|                    |           \|                     |          E d{V  x|                                E d{V  dS )zGenerate (text, html) pairs containing as many whole rows as will fit in window.

        Falls back to splitting rows into whole cells when a single row is by itself too big to
        fit in the chunking window.
        r   N)	_RowAccumulatorrh   r:   r(  	iter_rowsrp   rq   add_row_iter_row_splits)r$   accumrows      r%   r+  z_TableSplitter._iter_subtablesQ  s        tz':;;;&0022 	6 	6C>>#&& ) ;;==(((((((~~c"" 6c""""005555555555;;==         r'   r3  r   c              #    K   t          | j        j                  }|                                D ]w}|                    |          s|                                E d{V  |                    |          r|                    |           \|                     |          E d{V  x|                                E d{V  dS )zQSplit oversized row into (text, html) pairs containing as many cells as will fit.r-  N)_CellAccumulatorrh   r:   
iter_cellsrp   rq   add_cell_iter_cell_splits)r$   r3  r2  cells       r%   r1  z_TableSplitter._iter_row_splitse  s       
(;<<<NN$$ 	8 	8D>>$'' ) ;;==(((((((~~d## 8t$$$$11$7777777777;;==         r'   r9  r   c              #     K   t          | j        j        dz
            }t          |          } ||j                  \  }}|d| dfV  |r ||          \  }}|d| dfV  |dS dS )zDSplit a single oversized cell into sub-sub-sub-table HTML fragments.!   )r9   z<table><tr><td>z</td></tr></table>N)r   rh   r:   rJ   r   )r$   r9  re   rK   r   r   s         r%   r8  z _TableSplitter._iter_cell_splitsu  s       tz/BR/GIIId##%	**i>d>>>>>>>  	C#eI..OD)B$BBBBBBB  	C 	C 	C 	C 	Cr'   N)r&  r   re   r   )r&  r   re   r   r(   r)  r(   r)  )r3  r   r(   r)  )r9  r   r(   r)  )
r\   r]   r^   r_   r&   r`   r   r+  r1  r8  r*   r'   r%   r   r   8  s             : : : [:! ! ! !(! ! ! ! C C C C C Cr'   r   c                  B    e Zd ZdZddZdd	Zedd            ZddZdS )rJ   a  Provides a text-splitting function configured on construction.

    Text is split on the best-available separator, falling-back from the preferred separator
    through a sequence of alternate separators.

    - The separator is removed by splitting so only whitespace strings are suitable separators.
    - A "blank-line" ("

") is unlikely to occur in an element as it would have been used as an
      element boundary during partitioning.

    This is a *callable* object. Constructing it essentially produces a function:

        split = _TextSplitter(opts)
        fragment, remainder = split(s)

    This allows it to be configured with length-options etc. on construction and used throughout a
    chunking operation on a given element-stream.
    re   r   c                    || _         d S r"   )rh   r   s     r%   r&   z_TextSplitter.__init__  s    


r'   r   rL   r(   tuple[str, str]c                   | j         j        }t          |          |k    r|dfS | j        D ]k\  }}|                     |||          \  }}|r t          |          t          |          k    rB|                                |                                fc S |d|                                         ||| j         j        z
  d                                         fS )a  Return pair of strings split from `s` on the best match of configured patterns.

        The first string is the split, the second is the remainder of the string. The split string
        will never be longer than `maxlen`. The separators are tried in order until a match is
        found. The last separator is "" which matches between any two characters so there will
        always be a split.

        The separator is removed and does not appear in the split or remainder.

        An `s` that is already less than the maximum length is returned unchanged with no remainder.
        This allows this function to be called repeatedly with the remainder until it is consumed
        and returns a remainder of "".
        r   N)rh   r:   r   	_patterns_split_from_maxlenrstriplstripr?   )r$   r   r   psep_lenfragmentr   s          r%   __call__z_TextSplitter.__call__  s     $q66Vb5L. 	9 	9JAw #'"9"9!Wa"H"HHi  y>>SVV++??$$i&6&6&8&88888
 &z  ""Aftz/A&A&C&C$D$K$K$M$MMMr'   *tuple[tuple[regex.Pattern[str], int], ...]c                L    | j         j        }t          d |D                       S )a  Sequence of (pattern, len) pairs to match against.

        Patterns appear in order of preference, those following are "fall-back" patterns to be used
        if no match of a prior pattern is found.

        NOTE these regexes search *from the end of the string*, which is what the "(?r)" bit
        specifies. This is much more efficient than starting at the beginning of the string which
        could result in hundreds of matches before the desired one.
        c              3  b   K   | ]*}t          j        d |           t          |          fV  +dS )z(?r)N)regexcompiler   )ry   seps     r%   r   z*_TextSplitter._patterns.<locals>.<genexpr>  s=      SSemL3LL113s88<SSSSSSr'   )rh   rQ   rT   )r$   
separatorss     r%   rA  z_TextSplitter._patterns  s+     Z9
SS
SSSSSSr'   patternregex.Pattern[str]rF  r   c                   | j         j        | j         j        }}|                    ||dz   ||z             }|d|fS |                                \  }}d}	|d|                                         }
||d                                         }|t          |	          k    r|
|fS |t          |	          z
  }|
| d                                         }||	z   |z   }|
|fS )a  Return (split, remainder) pair split from `s` on the right-most match before `maxlen`.

        Returns `"", s` if no suitable match was found. Also returns `"", s` if splitting on this
        separator produces a split shorter than the required overlap (which would produce an
        infinite loop).

        `split` will never be longer than `maxlen` and there is no longer split available using
        `pattern`.

        The separator is removed and does not appear in either the split or remainder.
        r   )posendposNr   rS   )rh   r:   r?   searchspanrC  rD  r   )r$   rP  rF  r   r   r?   matchmatch_start	match_end	separatorrG  raw_remaindertail_lentailoverlapped_remainders                  r%   rB  z _TextSplitter._split_from_maxlen  s     *-tz/A qgk&7:JKK=q5L "'Y	 \k\?))++)**,,..c)nn$$]** S^^+	

#**,,#i/-?---r'   N)re   r   )r   rL   r(   r?  )r(   rI  )rP  rQ  rF  r   r   rL   r(   r?  )	r\   r]   r^   r_   r&   rH  r   rA  rB  r*   r'   r%   rJ   rJ     s         $   #N #N #N #NJ T T T \T(. (. (. (. (. (.r'   rJ   c                  R    e Zd ZdZddZdd	ZddZddZddZe	dd            Z
dS )r5  zIncrementally build `<table>` fragment cell-by-cell to maximally fill chunking window.

    Accumulate cells until chunking window is filled, then generate the text and HTML for the
    subtable composed of all those rows that fit in the window.
    r   r   c                "    || _         g | _        d S r"   )_maxlen_cellsr$   r   s     r%   r&   z_CellAccumulator.__init__  s    &(r'   r9  r   r(   rV   c                :    | j                             |           dS )zPAdd `cell` to this accumulation. Caller is responsible for ensuring it will fit.N)rb  r   r$   r9  s     r%   r7  z_CellAccumulator.add_cell  s    4     r'   r)  c              #     K   | j         sdS d                    |                                           }d                    d | j         D                       }d| d}| j                                          ||fV  dS )zFGenerate zero-or-one (text, html) pairs for accumulated sub-sub-table.NrS   r   c              3  $   K   | ]}|j         V  d S r"   r   ry   cs     r%   r   z)_CellAccumulator.flush.<locals>.<genexpr>  s$      66Q!&666666r'   z<table><tr>z</tr></table>)rb  r   _iter_cell_textsr   )r$   r   tds_strr   s       r%   rq   z_CellAccumulator.flush  s      { 	Fxx--//00''66$+666663W333Djr'   r   c                <    | j         t          |j                  k    S )zLTrue when `cell` will fit within remaining space left by accummulated cells.r   r   r   re  s     r%   rp   z_CellAccumulator.will_fit  s    $DI66r'   r  c              #  :   K   | j         D ]}|j        x}s|V  dS )zGenerate contents of each accumulated cell as a separate string.

        A cell that is empty or contains only whitespace does not generate a string.
        N)rb  r   )r$   r9  r   s      r%   rk  z!_CellAccumulator._iter_cell_texts  s@      
 K 	 	D I%D JJJJ	 	r'   c                T    | j         dz
  t          d | j        D                       z
  S )zKNumber of characters remaining when accumulated cells are formed into HTML.   c              3  >   K   | ]}t          |j                  V  d S r"   r   r   ri  s     r%   r   z4_CellAccumulator._remaining_space.<locals>.<genexpr>$  s*      &H&Hqs16{{&H&H&H&H&H&Hr'   )ra  r  rb  r0   s    r%   r   z!_CellAccumulator._remaining_space  s/    
 |b 3&H&HDK&H&H&H#H#HHHr'   Nr   r   )r9  r   r(   rV   r<  )r9  r   r(   r   r$  rZ   )r\   r]   r^   r_   r&   r7  rq   rp   rk  r   r   r*   r'   r%   r5  r5    s         ) ) ) )! ! ! !   7 7 7 7    I I I XI I Ir'   r5  c                  R    e Zd ZdZddZdd	ZddZddZddZe	dd            Z
dS )r.  zMaybe `SubtableAccumulator`.

    Accumulate rows until chunking window is filled, then generate the text and HTML for the
    subtable composed of all those rows that fit in the window.
    r   r   c                "    || _         g | _        d S r"   )ra  _rowsrc  s     r%   r&   z_RowAccumulator.__init__.  s    $&


r'   r3  r   r(   rV   c                :    | j                             |           dS )zOAdd `row` to this accumulation. Caller is responsible for ensuring it will fit.N)rw  r   r$   r3  s     r%   r0  z_RowAccumulator.add_row2  s    
#r'   r)  c              #     K   | j         sdS d                    |                                           }d                    d | j         D                       }d| d}| j                                          ||fV  dS )zBGenerate zero-or-one (text, html) pairs for accumulated sub-table.NrS   r   c              3  $   K   | ]}|j         V  d S r"   rh  ry   rs     r%   r   z(_RowAccumulator.flush.<locals>.<genexpr>;  s$      55Q!&555555r'   z<table>z</table>)rw  r   rk  r   )r$   r   trs_strr   s       r%   rq   z_RowAccumulator.flush6  s      z 	Fxx--//00''55$*55555****
Djr'   r   c                <    | j         t          |j                  k    S )zJTrue when `row` will fit within remaining space left by accummulated rows.rn  ry  s     r%   rp   z_RowAccumulator.will_fit@  s    $CH55r'   r  c              #  R   K   | j         D ]}|                                E d{V  dS )zGenerate contents of each row cell as a separate string.

        A cell that is empty or contains only whitespace does not generate a string.
        N)rw  iter_cell_texts)r$   r}  s     r%   rk  z _RowAccumulator._iter_cell_textsD  sJ      
  	+ 	+A((**********	+ 	+r'   c                T    | j         dz
  t          d | j        D                       z
  S )zJNumber of characters remaining when accumulated rows are formed into HTML.   c              3  >   K   | ]}t          |j                  V  d S r"   rs  r|  s     r%   r   z3_RowAccumulator._remaining_space.<locals>.<genexpr>P  s*      &G&Gqs16{{&G&G&G&G&G&Gr'   )ra  r  rw  r0   s    r%   r   z _RowAccumulator._remaining_spaceL  s/     |b 3&G&GDJ&G&G&G#G#GGGr'   Nrt  )r3  r   r(   rV   r<  )r3  r   r(   r   r$  rZ   )r\   r]   r^   r_   r&   r0  rq   rp   rk  r   r   r*   r'   r%   r.  r.  '  s         ' ' ' '      6 6 6 6+ + + + H H H XH H Hr'   r.  c                  "    e Zd ZdZddZdd	Zd
S )PreChunkCombinerzDFilters pre-chunk stream to combine small pre-chunks where possible.
pre_chunksIterable[PreChunk]re   r   c                "    || _         || _        d S r"   )_pre_chunksrh   )r$   r  re   s      r%   r&   zPreChunkCombiner.__init__[  s    %


r'   r(   ri   c              #  j  K   t          | j                  }| j        D ]z}t          |t                    r|                                E d{V  |V  6|                    |          s|                                E d{V  |                    |           {|                                E d{V  dS )zVGenerate pre-chunk objects, combining TextPreChunk objects when they'll fit in window.N)TextPreChunkAccumulatorrh   r  r   r   rq   rp   add_pre_chunk)r$   r2  r   s      r%   iter_combined_pre_chunksz)PreChunkCombiner.iter_combined_pre_chunks_  s      '
33) 	+ 	+I)]33  ;;==((((((( >>),, ) ;;==(((((((	****;;==         r'   N)r  r  re   r   r~   )r\   r]   r^   r_   r&   r  r*   r'   r%   r  r  X  sB        NN   ! ! ! ! ! !r'   r  c                  2    e Zd ZdZddZdd	ZddZddZdS )r  a  Accumulates, measures, and combines text pre-chunks.

    Used for combining pre-chunks for chunking strategies like "by-title" that can potentially
    produce undersized chunks and offer the `combine_text_under_n_chars` option. Note that only
    sequential `TextPreChunk` objects can be combined. A `TablePreChunk` is never combined with
    another pre-chunk.

    Provides `.add_pre_chunk()` allowing a pre-chunk to be added to the chunk and provides
    monitoring properties `.remaining_space` and `.text_length` suitable for deciding whether to add
    another pre-chunk.

    `.flush()` is used to combine the accumulated pre-chunks into a single `TextPreChunk` object.
    This method returns an interator that generates zero-or-one `TextPreChunk` objects and is used
    like so:

        yield from accum.flush()

    If no pre-chunks have been accumulated, no `TextPreChunk` is generated. Flushing the builder
    clears the pre-chunks it contains so it is ready to accept the next text-pre-chunk.
    re   r   r(   rV   c                "    || _         d | _        d S r"   )rh   
_pre_chunkr   s     r%   r&   z TextPreChunkAccumulator.__init__  s    
/3r'   r   r   c                V    | j         |n| j                             |          | _         dS )zPAdd a pre-chunk to the accumulator for possible combination with next pre-chunk.N)r  r   r   s     r%   r  z%TextPreChunkAccumulator.add_pre_chunk  s-     0IIdo6M6Mi6X6X 	r'   Iterator[TextPreChunk]c              #  <   K   | j         sdS | j         V  d| _         dS )zGenerate accumulated pre-chunk as a single combined pre-chunk.

        Does not generate a pre-chunk when none has been accumulated.
        N)r  r0   s    r%   rq   zTextPreChunkAccumulator.flush  s0        	For'   r   c                H    | j         dS | j                             |          S )a3  True when there is room for `pre_chunk` in accumulator.

        An empty accumulator always has room. Otherwise there is only room when `pre_chunk` can be
        combined with any other pre-chunks in the accumulator without exceeding the combination
        limits specified for the chunking run.
        NT)r  r   r   s     r%   rp   z TextPreChunkAccumulator.will_fit  s'     ?"4**9555r'   Nr   )r   r   r(   rV   )r(   r  r#  )r\   r]   r^   r_   r&   r  rq   rp   r*   r'   r%   r  r  s  sn         *4 4 4 4
 
 
 
   6 6 6 6 6 6r'   r  r(   c                      dddfd} | S )	a  Not a predicate itself, calling this returns a predicate that triggers on each new page.

    The lifetime of the returned callable cannot extend beyond a single element-stream because it
    stores current state (current page-number) that is particular to that element stream.

    The returned predicate tracks the "current" page-number, starting at 1. An element with a
    greater page number returns True, indicating the element starts a new page boundary, and
    updates the enclosed page-number ready for the next transition.

    An element with `page_number == None` or a page-number lower than the stored value is ignored
    and returns False.
    r   Trt   r   r(   r   c                P    | j         j        }r|pdddS |dS |k    rdS |dS )Nr   FT)r   page_number)rt   r  current_page_numberis_firsts     r%   page_number_incrementedz0is_on_next_page.<locals>.page_number_incremented  sZ     &2
  	"-"2H5
 5---5 *tr'   r   r*   )r  r  r  s    @@r%   is_on_next_pager    s<      !H      6 #"r'   rt   r   c                ,    t          | t                    S )z:True when `element` is a `Title` element, False otherwise.)r   r   )rt   s    r%   is_titler    s    gu%%%r'   )r(   r   r   )4r_   
__future__r   r   r   typingr   r   r   r   r   r	   rL  typing_extensionsr
   r   unstructured.common.html_tabler   r   r   unstructured.documents.elementsr   r   r   r   r   r   r   unstructured.utilsr   r   __annotations__r   r   r   r   rT   rL   r   r   rb   rn   r   r   r   rJ   r5  r.  r  r  r  r  r*   r'   r%   <module>r     s   F F F " " " " " "      G G G G G G G G G G G G G G G G  - - - - - - - - G G G G G G G G G G                  , + + + + +  #  " " " " "&  % % % %
  (	48  8 8 8 8 X4 4 4 4 4 3sCx ( ( ( (u u u u u u u uzC( C( C( C( C( C( C( C(LpH pH pH pH pH pH pH pHpcT cT cT cT cT cT cT cTLV? V? V? V? V? V? V? V?|IC IC IC IC IC IC IC ICXq. q. q. q. q. q. q. q.h,I ,I ,I ,I ,I ,I ,I ,I^)H )H )H )H )H )H )H )Hb! ! ! ! ! ! ! !686 86 86 86 86 86 86 86\+# +# +# +#\& & & & & &r'   