
    Ng                        d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	m
Z
  G d d          Z G d d	          Z G d
 d          Z G d d          ZdS )z@Unit-test suite for the `unstructured.common.html_table` module.    )annotationsN)fragment_fromstring)HtmlCellHtmlRow	HtmlTablehtmlify_matrix_of_cell_textsc                  *    e Zd ZdZd Zd Zd Zd ZdS )%Describe_htmlify_matrix_of_cell_textszTUnit-test suite for `unstructured.common.html_table.htmlify_matrix_of_cell_texts()`.c                <    t          g dg dg          dk    sJ d S )N)cell1 cell3)r   cell5r   zZ<table><tr><td>cell1</td><td/><td>cell3</td></tr><tr><td/><td>cell5</td><td/></tr></table>r   selfs    d/var/www/html/ai-engine/env/lib/python3.11/site-packages/test_unstructured/common/test_html_table.py'test_htmlify_matrix_handles_empty_cellszMDescribe_htmlify_matrix_of_cell_texts.test_htmlify_matrix_handles_empty_cells   sC    +-C-C-CEVEVEV,WXX
 
 
 
 
 
    c                6    t          ddgg          dk    sJ d S )Nz<>&"znewline
zI<table><tr><td>&lt;&gt;&amp;&quot;</td><td>newline<br/></td></tr></table>r   r   s    r   .test_htmlify_matrix_handles_special_characterszTDescribe_htmlify_matrix_of_cell_texts.test_htmlify_matrix_handles_special_characters   s6    +fk-B,CDDW
 
 
 
 
 
r   c                <    t          ddgddgg          dk    sJ d S )Nr   cell2r   cell4zY<table><tr><td>cell1</td><td>cell2</td></tr><tr><td>cell3</td><td>cell4</td></tr></table>r   r   s    r   3test_htmlify_matrix_handles_multiple_rows_and_cellszYDescribe_htmlify_matrix_of_cell_texts.test_htmlify_matrix_handles_multiple_rows_and_cells"   s?    +gw-?'7AS,TUU
 
 
 
 
 
r   c                0    t          g           dk    sJ d S )Nr   r   r   s    r   (test_htmlify_matrix_handles_empty_matrixzNDescribe_htmlify_matrix_of_cell_texts.test_htmlify_matrix_handles_empty_matrix*   s!    +B//2555555r   N)__name__
__module____qualname____doc__r   r   r   r    r   r   r
   r
      sV        ^^
 
 

 
 


 
 
6 6 6 6 6r   r
   c                      e Zd ZdZd Zej                            dg d          dd            Zd Z	d Z
ej                            dg d	          dd
            Zd Zd Zd Zd Zd ZdS )DescribeHtmlTablez?Unit-test suite for `unstructured.common.html_table.HtmlTable`.c                    t          j        d          }t          |t                     sJ |j        j        dk    sJ d S )N'<table><tr><td>foobar</td></tr></table>tabler   from_html_text
isinstance_tabletagr   
html_tables     r   it_can_construct_from_html_textz1DescribeHtmlTable.it_can_construct_from_html_text1   sF    -.WXX
*i00000 $//////r   	html_text)r&   z4<body><table><tr><td>foobar</td></tr></table></body>zA<html><body><table><tr><td>foobar</td></tr></table></body></html>strc                    t          j        |          }t          |t                     sJ |j        j        dk    sJ d S )Nr'   r(   r   r0   r.   s      r   6it_can_find_a_table_wrapped_in_an_html_or_body_elementzHDescribeHtmlTable.it_can_find_a_table_wrapped_in_an_html_or_body_element7   sG     -i88
*i00000 $//////r   c                    t          j        t          d          5  t          j        d           d d d            d S # 1 swxY w Y   d S )Nz)`html_text` contains no `<table>` element)matchz2<html><body><tr><td>foobar</td></tr></body></html>)pytestraises
ValueErrorr   r)   r   s    r   :but_it_raises_when_no_table_element_is_present_in_the_htmlzLDescribeHtmlTable.but_it_raises_when_no_table_element_is_present_in_the_htmlE   s    ]:-XYYY 	[ 	[$%YZZZ	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[ 	[s   >AAc                H    t          j        d          }|j        dk    sJ d S )NzB<table border="1", class="foobar"><tr><td>foobar</td></tr></table>r&   r   r)   htmlr-   s     r   6it_removes_any_attributes_present_on_the_table_elementzHDescribeHtmlTable.it_removes_any_attributes_present_on_the_table_elementI   s3    -P
 

 "KKKKKKKr   )z6<table><thead><tr><td>foobar</td></tr></thead></table>zE<table><thead><tr><td>foobar</td></tr></thead><tbody></tbody></table>zE<table><tbody><tr><td>foobar</td></tr></tbody><tfoot></tfoot></table>c                H    t          j        |          }|j        dk    sJ d S Nr&   r<   r3   s      r   Mit_removes_any_thead_tbody_or_tfoot_elements_present_within_the_table_elementz_DescribeHtmlTable.it_removes_any_thead_tbody_or_tfoot_elements_present_within_the_table_elementO   s.     -i88
"KKKKKKKr   c                H    t          j        d          }|j        dk    sJ d S )NzR<table>  <tr><th>a</th><th/><th>b</th></tr>  <tr><td/><td>c</td><td/></tr></table>zN<table><tr><td>a</td><td/><td>b</td></tr><tr><td/><td>c</td><td/></tr></table>r<   r-   s     r   Eit_changes_any_th_elements_to_td_elements_for_cell_element_uniformityzWDescribeHtmlTable.it_changes_any_th_elements_to_td_elements_for_cell_element_uniformity]   sA    -
 

 \
 
 
 
 
 
r   c                H    t          j        d          }|j        dk    sJ d S )NzB
  <table>
  <tr>
    <td>	abc   def
ghi </td>
  </tr>
</table>
  z,<table><tr><td>abc def ghi</td></tr></table>r<   r-   s     r   Rit_removes_any_extra_whitespace_between_elements_and_normalizes_whitespace_in_textzdDescribeHtmlTable.it_removes_any_extra_whitespace_between_elements_and_normalizes_whitespace_in_texth   s3    -X
 

 "PPPPPPPr   c                \    t          d          }t          |          }|j        dk    sJ d S r@   )r   r   r=   )r   r'   r.   s      r   3it_can_serialize_the_table_element_to_str_html_textzEDescribeHtmlTable.it_can_serialize_the_table_element_to_str_html_textn   s7    #$MNNu%%
"KKKKKKKr   c                
   t          j        d          }|                                }t          |          }t	          |t
                    sJ |j        dk    sJ t          |          }t	          |t
                    sJ |j        dk    sJ t          |          }t	          |t
                    sJ |j        dk    sJ t          j        t                    5  t          |           d d d            d S # 1 swxY w Y   d S )Nz<table>  <tr><td>abc</td><td>def</td><td>ghi</td></tr>  <tr><td>jkl</td><td>mno</td><td>pqr</td></tr>  <tr><td>stu</td><td>vwx</td><td>yz</td></tr></table>z-<tr><td>abc</td><td>def</td><td>ghi</td></tr>z-<tr><td>jkl</td><td>mno</td><td>pqr</td></tr>z,<tr><td>stu</td><td>vwx</td><td>yz</td></tr>)
r   r)   	iter_rowsnextr*   r   r=   r7   r8   StopIteration)r   r.   row_iterrows       r   $it_can_iterate_the_rows_in_the_tablez6DescribeHtmlTable.it_can_iterate_the_rows_in_the_tablet   sJ   -
 

 ''))8nn#w'''''xJJJJJ8nn#w'''''xJJJJJ8nn#w'''''xIIIII]=)) 	 	NNN	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   C88C<?C<c                H    t          j        d          }|j        dk    sJ d S )Nz<table>  <tr><th> a
 b  c  </th><th/><th>def</th></tr>  <tr><td>gh 	i</td><td/><td>
 jk l </td></tr>  <tr><td/><td> m n op
</td><td/></tr></table>za b c def gh i jk l m n op)r   r)   textr-   s     r   >it_provides_access_to_the_clear_concatenated_text_of_the_tablezPDescribeHtmlTable.it_provides_access_to_the_clear_concatenated_text_of_the_table   s5    -
 

 ">>>>>>>r   N)r0   r1   )r   r   r    r!   r/   r7   markparametrizer4   r:   r>   rA   rC   rE   rG   rN   rQ   r"   r   r   r$   r$   .   s0       II0 0 0 [	
 	
 	
 0 0 0 0[ [ [L L L [	
 	
 	
 L L L L	
 	
 	
Q Q QL L L  4? ? ? ? ?r   r$   c                  $    e Zd ZdZd Zd Zd ZdS )DescribeHtmlRowz=Unit-test suite for `unstructured.common.html_table.HtmlRow`.c                T    t          t          d                    j        dk    sJ d S )N"<tr><td>a</td><td>b</td><td/></tr>)r   r   r=   r   s    r    it_can_serialize_the_row_to_htmlz0DescribeHtmlRow.it_can_serialize_the_row_to_html   s;    *+OPPQQV0
 
 
 
 
 
r   c                   t          t          d                    }|                                }t          |          }t	          |t
                    sJ |j        dk    sJ t          |          }t	          |t
                    sJ |j        dk    sJ t          |          }t	          |t
                    sJ |j        dk    sJ t          j        t                    5  t          |           d d d            d S # 1 swxY w Y   d S )NrW   z
<td>a</td>z
<td>b</td><td/>)
r   r   
iter_cellsrJ   r*   r   r=   r7   r8   rK   )r   rM   	cell_itercells       r   #it_can_iterate_the_cells_in_the_rowz3DescribeHtmlRow.it_can_iterate_the_cells_in_the_row   sC   )*NOOPPNN$$	I$)))))yL((((I$)))))yL((((I$)))))yG####]=)) 	 	OOO	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   #D  DDc                <   t          t          d                    }|                                }t          |          dk    sJ t          |          dk    sJ t	          j        t                    5  t          |           d d d            d S # 1 swxY w Y   d S )NrW   ab)r   r   iter_cell_textsrJ   r7   r8   rK   )r   rM   	text_iters      r   0it_can_iterate_the_texts_of_the_cells_in_the_rowz@DescribeHtmlRow.it_can_iterate_the_texts_of_the_cells_in_the_row   s    )*NOOPP''))	I#%%%%I#%%%%]=)) 	 	OOO	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   4BBBN)r   r   r    r!   rX   r^   rd   r"   r   r   rU   rU      sG        GG
 
 

  (    r   rU   c                  f    e Zd ZdZd Zej                            dddg          dd	            Zd
S )DescribeHtmlCellz>Unit-test suite for `unstructured.common.html_table.HtmlCell`.c                T    t          t          d                    j        dk    sJ d S )Nz<td>a b c</td>)r   r   r=   r   s    r   !it_can_serialize_the_cell_to_htmlz2DescribeHtmlCell.it_can_serialize_the_cell_to_html   s/    +,<==>>CGWWWWWWWr   )	cell_htmlexpected_value)z<td>  Lorem ipsum  </td>zLorem ipsum)rZ   r   ri   r1   rj   c                T    t          t          |                    j        |k    sJ d S )N)r   r   rP   )r   ri   rj   s      r   it_knows_the_text_in_the_cellz.DescribeHtmlCell.it_knows_the_text_in_the_cell   s/    
 +I6677<NNNNNNr   N)ri   r1   rj   r1   )	r   r   r    r!   rh   r7   rR   rS   rl   r"   r   r   rf   rf      sr        HHX X X ['	4mD O O O	 O O Or   rf   )r!   
__future__r   r7   	lxml.htmlr   unstructured.common.html_tabler   r   r   r   r
   r$   rU   rf   r"   r   r   <module>rp      s5   G F " " " " " "  ) ) ) ) ) )           6 6 6 6 6 6 6 68h? h? h? h? h? h? h? h?V$ $ $ $ $ $ $ $NO O O O O O O O O Or   