
    Ng6                     X    d dl Z d dlmZmZmZ d dlZd dlZd dl	m
Z
  G d d          ZdS )    N)AnyDictList)compare_contents_as_dfc                      e Zd ZddefdZedeeeee	f                           dee         fd            Z
edeeeee	f                           deeeee	f                           dee         fd	            Zedeeee	f                  dej        fd
            Ze	 ddeeeee	f                           deeeee	f                           dee         dedeeef         f
d            ZdS )TableAlignment皙?cutoffc                     || _         d S )N)r
   )selfr
   s     f/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/metrics/table/table_alignment.py__init__zTableAlignment.__init__
   s        
table_datareturnc                     d | D             S )a6  Extracts and concatenates the content of cells from each table in a list of tables.

        Args:
          table_data: A list of tables, each table being a list of cell data dictionaries.

        Returns:
          List of strings where each string represents the concatenated content of one table.
        c                 L    g | ]!}d                      d |D                       "S ) c                 &    g | ]}d |v |d          S content ).0ds     r   
<listcomp>zCTableAlignment.get_content_in_tables.<locals>.<listcomp>.<listcomp>   s!    EEE1i1nn!I,nnnr   )join)r   tds     r   r   z8TableAlignment.get_content_in_tables.<locals>.<listcomp>   s3    \\\2EEEEEFF\\\r   r   )r   s    r   get_content_in_tablesz$TableAlignment.get_content_in_tables   s     ]\Q[\\\\r   predicted_table_dataground_truth_table_datac                    t                               |          }g }| D ]m}t                               |g          d         }t          j        ||dd          }|                    |r|                    |d                   nd           n|S )a  Compares predicted table data with ground truth data to find the best
        matching table index for each predicted table.

        Args:
          predicted_table_data: A list of predicted tables.
          ground_truth_table_data: A list of ground truth tables.

        Returns:
          A list of indices indicating the best match in the ground truth for
          each predicted table.

        r   g?   r
   n)r   r   difflibget_close_matchesappendindex)r   r    ground_truth_textsmatched_indicesr   	referencematchess          r   get_table_level_alignmentz(TableAlignment.get_table_level_alignment   s    " ,AABYZZ& 	\ 	\B&<<bTBB1EI/	;MVY]^___G""7#Z#5#;#;GAJ#G#G#GXZ[[[[r   c                     t          j        | g d          }|                    d          }|d                             t                    |d<   |S )N)	row_index	col_indexr   )columnsr0   r1   )pd	DataFrame	set_indexastypestr)r   dfs     r   _zip_to_dataframez TableAlignment._zip_to_dataframe3   sN    \*.S.S.STTT\\+&&[/0055;	r   r+   c                 6  "# g }g }g }g }t          |           D ]\  }}	|dk    rU|                    d           |                    d           |                    d           |                    d           a||         }
t                              |	          }t                              |
          }t	          |                    d          |                    d                    }|                    |d                    |                    |d                    d}d}d}d |
D             }t                      #g }|	D ]}|d                                         }|d         }|d	         }t          j	        |||d
          "g }"g k    r"#fdt          |          D             }|s0#                                 "#fdt          |          D             }|d         }|                    |           #                    |           ndg}|d         }|dk    r7|
|         d         }|
|         d	         }|                    ||f||ff           |D ]M}|d         d         |d
         d         k    r|d
z  }|d         d
         |d
         d
         k    r|d
z  }|d
z  }Nd}d}|dk    r&t          ||z  d          }t          ||z  d          }|                    |           |                    |           fdt          t          |                    D             } | D ]V}!|                    d           |                    d           |                    d           |                    d           Wt          t!          j        |          d          t          t!          j        |          d          t          t!          j        |          dz  d          t          t!          j        |          dz  d          dS )a  Aligns elements of the predicted tables with the ground truth tables at the cell level.

        Args:
          predicted_table_data: A list of predicted tables.
          ground_truth_table_data: A list of ground truth tables.
          matched_indices: Indices of the best matching ground truth table for each predicted table.
          cutoff: The cutoff value for the close matches.

        Returns:
          A dictionary with column and row alignment accuracies.

        r%   r    by_col_token_ratioby_row_token_ratioc                 B    g | ]}|d                                           S r   )lower)r   gtds     r   r   z>TableAlignment.get_element_level_alignment.<locals>.<listcomp>j   s(    ,_,_,_S^-A-A-C-C,_,_,_r   r   r0   r1   r"   r#   c                 :    g | ]\  }}|d          k    |v|S r   r   r   ib_stringr-   used_indicess      r   r   z>TableAlignment.get_element_level_alignment.<locals>.<listcomp>~   s@     ! ! !'Ax#wqz11a|6K6K 6K6K6Kr   c                 :    g | ]\  }}|d          k    |v|S rB   r   rC   s      r   r   z>TableAlignment.get_element_level_alignment.<locals>.<listcomp>   s@     % % % +8'71:55!<:O:O :O:O:Or      c                     g | ]}|v|	S r   r   )r   idr+   s     r   r   z>TableAlignment.get_element_level_alignment.<locals>.<listcomp>   s*     &
 &
 &
/@Y@YB@Y@Y@Yr   g      Y@)col_index_accrow_index_acccol_content_accrow_content_acc)zipr(   r   r9   r   fillnasetr?   r&   r'   	enumerateclearaddroundrangelennpmean)$r   r    r+   r
   content_diff_colscontent_diff_rowsrK   rL   idxr   ground_truth_tdpredict_table_dfground_truth_table_dftable_content_diffaligned_element_col_countaligned_element_row_counttotal_element_countground_truth_td_contents_listindices_tuple_pairstd_eler   r0   col_idxmatching_indices	b_indicesmatching_indexmatched_idxgt_row_indexgt_col_indexindices_tuple_pairtable_col_index_acctable_row_index_accnot_found_gt_table_indexes_r-   rF   s$     `                               @@r   get_element_level_alignmentz*TableAlignment.get_element_level_alignment:   s   & ?,@AA R	6 R	6GCbyy!((+++!((+++$$Q'''$$Q'''5c:O  .??CC$2$D$D_$U$U!!7%,,R00 ''++" " $$%78L%MNNN$$%78L%MNNN()%()%"#,_,_,_,_,_)55L"$ 'e 'e +1133";/	 -!31!	   $& b==! ! ! ! !+45R+S+S! ! !I
 % $**,,,% % % % %/89V/W/W% % %	
 &/q\N$++N;;; $$^4444(*t$.q1!###2;#?#LL#2;#?#LL'..G0D|UaFb/cddd&9 ) )"%a(+/A!/DQ/GGG-2-%a(+/A!/DQ/GGG-2-#q(##"#"#"Q&&&+,EH[,[]^&_&_#&+,EH[,[]^&_&_#  !4555  !45555&
 &
 &
 &
s#:;;<<&
 &
 &
" , 	$ 	$A$$Q'''$$Q'''  ###  #### #27=#9#91=="27=#9#91==$RW->%?%?%%GKK$RW->%?%?%%GKK	
 
 	
r   N)r	   )__name__
__module____qualname__floatr   staticmethodr   r   r7   r   r   intr.   r3   r4   r9   rs   r   r   r   r   r   	   s        u     
]$tDcN/C*D 
]c 
] 
] 
] \
] "4S#X#78!%d4S>&:!; 
c   \0 d4S>&: r|    \ 
 	y
 y
"4S#X#78y
!%d4S>&:!;y
 cy
 	y

 
c5j	y
 y
 y
 \y
 y
 y
r   r   )r&   typingr   r   r   numpyrX   pandasr3   "unstructured_inference.models.evalr   r   r   r   r   <module>r~      s     " " " " " " " " " "         E E E E E Ek
 k
 k
 k
 k
 k
 k
 k
 k
 k
r   