
    Ng                         d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ  ej        e eed           g          d             Zd	 Zd
 Zd ZdS )    )partialN)chunk_elements)chunk_by_title)ElementMetadataNarrativeTextTextTitle)combine_text_under_n_chars)paramsc                     | j         S )N)param)requests    g/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/chunking/test_html_output.pychunking_fnr   
   s
    =    c                 j   d}d}d}d                     |||g          }t          dt          |                    t          dt          |                    t          d	t          |                    g} | |          }t	          |          d
k    sJ |d         j        j        |k    sJ d S )N%<h1 class="Title" id="1">Header </h1>z@<time class="CalendarDate" id="2">Date: October 30, 2023 </time>z<form class="Form" id="3"> <label class="FormField" for="company-name" id="4">Form field name </label><input class="FormFieldValue" id="5" value="Example value" /></form> Headertext_as_htmltextmetadatazDate: October 30, 2023zForm field name Example value   r   )joinr	   r   r   lenr   r   )r   
metadata_1
metadata_2
metadata_3combined_metadataelementschunkss          r   Htest_combining_html_metadata_when_multiple_elements_in_composite_elementr$      s    8JSJ	  *j*!EFF 	8o:&N&N&NOOO*_R\5]5]5]^^^0?Xb;c;c;c	
 	
 	
H [""Fv;;!!9*.???????r   c           	         d}d}d}t          dt          |                    t          dt          |d	                    t          d
t          |d	                    g} | |d          }t          |          dk    sJ |d         j        dk    sJ |d         j        d
k    sJ |d         j        j        |dz   |z   k    sJ |d         j        j        |k    sJ dS )aH  
    Ground truth
    <Document>
        <Page>
            <Section>
                <p>First</p>
                <p>Second</p>
            </Section>
        </Page>
    </Document>
    Elements: Document, Page, Section, Paragraph, Paragraph
    Chunk 1: Document, Page, Section, Paragraph

    Chunk 2:
        Paragraph
    z<div class="Section" id="1" />z&<p class="Paragraph" id="2">First </p>z'<p class="Paragraph" id="3">Second </p> r   r   First1)r   	parent_idSecond   max_characters   r   r   r   N)r   r   r   r   r   r   r   )r   r   r   r    r"   r#   s         r   Ftest_combining_html_metadata_with_nested_relationship_between_elementsr/   &   s$   $ 2J9J:J 	"JGGGHHH?
VY#Z#Z#Z	
 	
 	
 	OWZ$[$[$[	
 	
 	
H [!444Fv;;!!9>W$$$$!9>X%%%%!9*j3.>.KKKKK!9*j888888r   c                 :   d}t          dt          |                    g} | |d          }t          |          dk    sJ |d         j        d	k    sJ |d
         j        dk    sJ |d         j        j        dk    sJ |d
         j        j        dk    sJ dS )z2Mimic behaviour of elements with non-html metadatar   r   r   r      r,   r.   r   Hear   derN)r	   r   r   r   r   r   )r   r   r"   r#   s       r   ;test_html_metadata_exist_in_both_element_when_text_is_splitr4   N   s    8J8o:&N&N&NOOOH [!444Fv;;!!9>U""""!9>U""""!9*.UUUUU!9*.UUUUUUUr   )	functoolsr   pytestunstructured.chunking.basicr   unstructured.chunking.titler   unstructured.documents.elementsr   r   r   r	   fixturer   r$   r/   r4    r   r   <module>r<      s           6 6 6 6 6 6 6 6 6 6 6 6 W W W W W W W W W W W W [\(](](]^___  `_@ @ @.%9 %9 %9PV V V V Vr   