
    Ng                     *   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Z
d dlZd dlmZmZ  e j        d          Zdededefd	Zd
edee         fdZd Zd Zd Z	 d"dededej        dedef
dZd ZdefdZdefdZd#de	ej        ee         f         dee          de	edf         fdZ!d#deee                  dee          de	edf         fdZ"d#deee                  dee          de	edf         fdZ#deee                  defd Z$d! Z%dS )$    N)Path)ListOptionalUnion)elements_from_jsonelements_to_textzunstructured.evaldocpathoutput_typereturnc                    	 |dk    rt          t          |                     }n)|dk    rt          |           }nt          d| d          n1# t          $ r$}t                              d|             |d}~ww xY w|S )z
    Convert given input document (path) into cct-ready. The function only support conversion
    from `json` or `txt` file.
    jsontxtzZFile type not supported. Expects one of `json` or `txt`,                     but received z	 instead.zCould not read the file N)r   r   _read_text_file
ValueErrorloggererror)r	   r
   
output_cctes       V/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/metrics/utils.py_prepare_output_cctr      s    
&  )*<W*E*EFFJJE!!(11JJ9"-9 9 9      999::: s   AA 
A=A88A=dirc                 (   g }t          j        |           D ]z\  }}}|D ]q}t           j                            ||           }|dk    r|                    |           >|                    t           j                            ||                     r{|S )z
    Recursively lists all files in the given directory and its subdirectories.
    Returns a list of all files found, with each file's path relative to the
    initial directory.
    .)oswalkpathrelpathappendjoin)r   listdirdirpath_	filenamesfilenamerelative_paths          r   _listdir_recursiver&   %   s     G!# F FI! 	F 	FHGOOGS99M##x((((rw||M8DDEEEE	F N    c                 <    ddddd}|                      |          S )a  
    Renames aggregated columns in a DataFrame based on a predefined mapping.

    Parameters:
    df (pandas.DataFrame): The DataFrame with aggregated columns to rename.

    Returns:
    pandas.DataFrame: A new DataFrame with renamed aggregated columns.
    meanstdevpstdevcount)_mean_stdev_pstdev_count)columns)rename)df
rename_maps     r   _rename_aggregated_columnsr5   7   s*     "WU\]]J99Z9(((r'   c                  R    t          j        | d                                          S )zs
    Concatenates multiple pandas DataFrame objects along the columns (side-by-side)
    and resets the index.
       )axis)pdconcatreset_index)r3   s    r   _format_grouping_outputr<   E   s%    
 9Ra   ,,...r'   c                     t                     dk    rdS  j                                        } fd|D             t          j        d                    fdt          |          D                                  t          j        dt                    z  dt          |          dz
  z  z                                               D ]\  }}g |D ]Q}t          |t                    r                    |d           /                    t          |                     Rt          j        d                    fd	t          t                              D                                  dS )
zD
    Displays the evaluation metrics in a formatted text table.
    r   Nc           
          g | ]<}t          t          |          t          d  |         D                                 =S )c              3   N   K   | ] }t          t          |                    V  !d S N)lenstr).0items     r   	<genexpr>z&_display.<locals>.<listcomp>.<genexpr>U   s.      CCST^^CCCCCCr'   )maxrA   )rC   headerr3   s     r   
<listcomp>z_display.<locals>.<listcomp>T   sQ       IOCKKCC6
CCCCCDD  r'    c              3   T   K   | ]"\  }}|                     |                   V  #d S r@   ljust)rC   irG   
col_widthss      r   rE   z_display.<locals>.<genexpr>W   s7      XX	6Z]33XXXXXXr'   -r7   z.3fc              3   Z   K   | ]%}|                              |                   V  &d S r@   rK   )rC   rM   rN   formatted_rows     r   rE   z_display.<locals>.<genexpr>a   s:      ^^q]1%++JqM::^^^^^^r'   )rA   r1   tolistclickechor   	enumeratesumiterrows
isinstancefloatr   rB   range)r3   headersr"   rowrD   rN   rQ   s   `    @@r   _displayr]   M   s    2ww!||j!!G   SZ  J 
JsxxXXXXYwEWEWXXXXXYYY	JsS__$sc'llQ.>'??@@@++-- 	
 	
3 	0 	0D$&& 0$$]]3333$$SYY////
HH^^^^^E#mJ\J\D]D]^^^^^	
 	
 	
 	
	
 	
r'   wT	directoryr$   r3   mode	overwritec                    |dvrt          d          | r#t          |                               d           d|j        v r#|d                             t
                    |d<   d|j        v r"d|j        v r|                    ddgd           |st          | |          }|                    t          j
                            | |          d	|d
|dk               dS )z
    Save the metrics report to tsv file. The function allows an option 1) to choose `mode`
    as `w` (write) or `a` (append) and 2) to `overwrite` the file if filename existed or not.
    )r^   az/Mode not supported. Mode must be one of [w, a].T)exist_okr,   r$   	connector)byinplace	Fr^   )sepr`   indexrG   N)r   r   mkdirr1   astypeintsort_values_get_non_duplicated_filenameto_csvr   r   r   )r_   r$   r3   r`   ra   s        r   _write_to_filerq   e   s     :JKKK -Yt,,,"*k((--7RZK2:$=$=
;
3TBBB E/	8DDII
Y))t$eUY]`U`      r'   c                 ^    t          j        d|           }|rt          |d                   S dS )a  
    A function that defines the sorting method for duplicated file names. For example,
    with filename.ext filename (1).ext filename (2).ext filename (10).ext - this function
    extracts the integer in the bracket and sort those numbers ascendingly.
    z(\d+)r   )refindallrm   )r$   numberss     r   _sorting_keyrw   {   s6     j8,,G 72; qr'   c                   
 |                     dd          \  }}dt          j        |           dt          j        |           d
t          
fd| D             t                    }g }|D ]N}t          j        d|          }|r5|                    t          |                    d                               O|	                                 d}|D ]}	|	|k    r|dz  } |d	z   t          |          z   d
z   |z   S )z
    Checks the duplicity of the file name from the list and run the numerical check
    of the minimum number needed as extension to not overwrite the exising file.
    Returns a string of file name in the format of `filename (<min number>).ext`.
    r   r7   ^z(?: \((\d+)\))?\.$c                 >    g | ]}t          j        |          |S  )rt   match)rC   fpatterns     r   rH   z"_uniquity_file.<locals>.<listcomp>   s*    LLLQrx7K7KLqLLLr'   )keyz	\((\d+)\)z (z).)rsplitrt   escapesortedrw   searchr   rm   groupsortrB   )	file_listtarget_filenameoriginal_filename	extensionduplicated_filesrv   filer}   counternumberr   s             @r   _uniquity_filer      s"    $3#9#9#q#A#A yY29.//YY")IBVBVYYYGLLLL)LLLR^___G  0 0	,-- 	0NN3u{{1~~..///LLNNNG  WqLGGt#c'll2T9IEEr'   c                 J    t          t          j        |           |          }|S )zs
    Helper function to calls the `_uniquity_file` function. Takes in directory and file name
    to check on.
    )r   r   r    )r   r$   s     r   ro   ro      s     
 bjoox88HOr'      scoresroundingc                 |    t          |           dk    rdS t          j        |           }|s|S t          ||          S )z
    Find mean from the list. Returns None if no element in the list.

    Args:
        rounding (int): optional argument that allows user to define decimal points. Default at 3.
    r   N)rA   
statisticsr)   round)r   r   r)   s      r   r-   r-      sF     6{{at?6""D x   r'   c                     d | D             } t          |           dk    rdS |st          j        |           S t          t          j        |           |          S )z
    Find standard deviation from the list.
    Returns None if only 0 or 1 element in the list.

    Args:
        rounding (int): optional argument that allows user to define decimal points. Default at 3.
    c                     g | ]}||S r@   r|   rC   scores     r   rH   z_stdev.<locals>.<listcomp>       ===5+<e+<+<+<r'   r7   N)rA   r   r*   r   r   r   s     r   r.   r.      sa     >====F
6{{at ('''!&))8444r'   c                     d | D             } t          |           dk    rdS |st          j        |           S t          t          j        |           |          S )z
    Find population standard deviation from the list.
    Returns None if only 0 or 1 element in the list.

    Args:
        rounding (int): optional argument that allows user to define decimal points. Default at 3.
    c                     g | ]}||S r@   r|   r   s     r   rH   z_pstdev.<locals>.<listcomp>   r   r'   r7   N)rA   r   r+   r   r   s     r   r/   r/      sa     >====F
6{{at ) ((("6**H555r'   c                      t          |           S )z,
    Returns the row count of the list.
    )rA   )r   s    r   r0   r0      s     v;;r'   c                 4   t           j                            |           st          d|  d          	 t	          | d          5 }|                                }ddd           n# 1 swxY w Y   |S # t          $ r}t          d|  d|           d}~ww xY w)zG
    Reads the contents of a text file and returns it as a string.
    zThe file at z does not exist.ignore)errorsNz+An error occurred when reading the file at z: )r   r   existsFileNotFoundErroropenreadOSErrorIOError)r   r~   textr   s       r   r   r      s    
 7>>$ G Et E E EFFFQ$x((( 	A6688D	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 Q Q QODOOAOOPPPQs;   A3 A&A3 &A**A3 -A*.A3 3
B=BB)r^   T)r   )&loggingr   rt   r   pathlibr   typingr   r   r   rS   pandasr9   unstructured.staging.baser   r   	getLoggerr   rB   r   r&   r5   r<   r]   	DataFrameboolrq   rw   r   ro   SeriesrY   rm   r-   r.   r/   r0   r   r|   r'   r   <module>r      s    				 				           ( ( ( ( ( ( ( ( ( (      J J J J J J J J		.	/	/ 3 3    *C DI    $) ) )/ / /
 
 
2 Y] !')|;>QU   ,   F# F F F F83    ! !%	4;./ !8C= !QVW\^bWbQc ! ! ! !5 54( 5HSM 5%PUW[P[J\ 5 5 5 5$6 6D%) 6Xc] 65QVX\Q\K] 6 6 6 6 4( U    Q Q Q Q Qr'   