
    NgE                    V   U d Z ddlmZ ddlZddlZddlZddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ  ed
          Z G d de          Zd$dZd%dZd&dZ ej        d           G d d                       Z ee           ee          d!Zd"e d#<   dS )'zHandles dispatch of elements to a chunking-strategy by name.

Also provides the `@add_chunking_strategy` decorator which is the chief current user of "by-name"
chunking dispatch.
    )annotationsN)AnyCallableIterableOptionalProtocol)	ParamSpec)chunk_elements)chunk_by_title)Element)get_call_args_applying_defaultslazyproperty_Pc                      e Zd ZdZd
dZd	S )Chunkerz*Abstract interface for chunking functions.elementsIterable[Element]max_charactersOptional[int]returnlist[Element]c                   dS )a  A chunking function must have this signature.

        In particular it must minimally have an `elements` parameter and all chunkers will have a
        `max_characters` parameter (doesn't need to follow `elements` directly). All others can
        vary by chunker.
        N )selfr   r   s      Z/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/chunking/dispatch.py__call__zChunker.__call__   s	     	    N)r   r   r   r   r   r   )__name__
__module____qualname____doc__r   r   r   r   r   r      s.        44	 	 	 	 	 	r   r   funcCallable[_P, list[Element]]r   c                      j         r'd j        j        v rd j         vr xj         dz  c_         t          j                   d
 fd	            }|S )zDecorator for chunking text.

    Chunks the element sequence produced by the partitioner it decorates when a `chunking_strategy`
    argument is present in the partitioner call and it names an available chunking strategy.
    chunking_strategya  
chunking_strategy
	Strategy used for chunking text into larger or smaller elements.
	Defaults to `None` with optional arg of 'basic' or 'by_title'.
	Additional Parameters:
		multipage_sections
			If True, sections can span multiple pages. Defaults to True.
		combine_text_under_n_chars
			Combines elements (for example a series of titles) until a section
			reaches a length of n characters. Only applies to 'by_title' strategy.
		new_after_n_chars
			Cuts off chunks once they reach a length of n characters; a soft max.
		max_characters
			Chunks elements text and text_as_html (if present) into chunks
			of length n characters, a hard max.args_P.argskwargs	_P.kwargsr   r   c                      | i |}t          g| R i |}|                    dd          }||S t          ||fi |S )z1The decorated function is replaced with this one.r%   N)r   popchunk)r&   r(   r   	call_argsr%   r"   s        r   wrapperz&add_chunking_strategy.<locals>.wrapperE   su    
 4((( 4DJ4JJJ6JJ	%MM*=tDD $O X0>>I>>>r   )r&   r'   r(   r)   r   r   )r!   __code__co_varnames	functoolswraps)r"   r.   s   ` r   add_chunking_strategyr3   '   s     | 
t}888=PX\Xd=d=d<	
" _T? ? ? ? ? ?" Nr   r   r   r%   strr(   r   r   c                    t                               |          t          dt          |                     fd|                                D             } j        | fi |S )zQDispatch chunking of `elements` to the chunking function for `chunking_strategy`.Nzunrecognized chunking strategy c                .    i | ]\  }}|j         v ||S r   )kw_arg_names).0kvchunker_specs      r   
<dictcomp>zchunk.<locals>.<dictcomp>c   s,    YYY1!|?X:X:Xq!:X:X:Xr   )_chunker_registryget
ValueErrorrepritemschunker)r   r%   r(   chunking_kwargsr;   s       @r   r,   r,   Z   s|    $(():;;LT4@Q;R;RTTUUU ZYYYYYYO<<<O<<<r   namerB   Nonec                4    t          |          t          | <   dS )zVMake chunker available by using `name` as `chunking_strategy` arg in partitioner call.N)_ChunkerSpecr=   )rD   rB   s     r   register_chunking_strategyrH   h   s    *733dr   T)frozenc                  8    e Zd ZU dZded<   	 edd            ZdS )	rG   zA registry entry for a chunker.r   rB   r   tuple[str, ...]c                p    t          j        | j                  }t          d |j        D                       S )zKeyword arguments supported by this chunker.

        These are all arguments other than the required `elements: list[Element]` first parameter.
        c              3  &   K   | ]}|d k    |V  dS )r   Nr   )r8   keys     r   	<genexpr>z,_ChunkerSpec.kw_arg_names.<locals>.<genexpr>{   s,      HHScZ6G6GS6G6G6G6GHHr   )inspect	signaturerB   tuple
parameters)r   sigs     r   r7   z_ChunkerSpec.kw_arg_namest   s5     --HHCNHHHHHHr   N)r   rK   )r   r   r    r!   __annotations__r   r7   r   r   r   rG   rG   m   sO         ))NI I I \I I Ir   rG   )basicby_titlezdict[str, _ChunkerSpec]r=   )r"   r#   r   r#   )r   r   r%   r4   r(   r   r   r   )rD   r4   rB   r   r   rE   )!r!   
__future__r   dataclassesdcr1   rP   typingr   r   r   r   r   typing_extensionsr	   unstructured.chunking.basicr
   unstructured.chunking.titler   unstructured.documents.elementsr   unstructured.utilsr   r   r   r   r3   r,   rH   	dataclassrG   r=   rU   r   r   r   <module>rb      s     # " " " " "          > > > > > > > > > > > > > > ' ' ' ' ' ' 6 6 6 6 6 6 6 6 6 6 6 6 3 3 3 3 3 3 L L L L L L L LYt__    h   0 0 0 0f= = = =4 4 4 4
 TI I I I I I I I" \.))^,,. .       r   