
    Ng#                         d dl Z d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZ d dlmZ d dlmZ  e j        e          ZdZ G d d	e          ZdS )
    N)BytesIO)ListOptionalSequence)ElementTree)Document)
BaseLoader@   c                   r   e Zd ZdZ	 d dddededee         defd	Z	 	 	 	 	 	 d!dee	e                  dee	e                  dee         dededede	e
         fdZdedede	e         dd
fdZdee         dedede	e
         fdZdedededee
         fdZdedefdZdedefdZededefd            Zd
S )"
QuipLoaderz_Load `Quip` pages.

    Port of https://github.com/quip/quip-api/tree/master/samples/baqup
    <   F)allow_dangerous_xml_parsingapi_urlaccess_tokenrequest_timeoutr   c                    	 ddl m} n# t          $ r t          d          w xY w ||||          | _        |st	          d          dS )a  
        Args:
            api_url: https://platform.quip.com
            access_token: token of access quip API. Please refer:
                https://quip.com/dev/automation/documentation/current#section/Authentication/Get-Access-to-Quip's-APIs
            request_timeout: timeout of request, default 60s.
            allow_dangerous_xml_parsing: Allow dangerous XML parsing, defaults to False
        r   )
QuipClientz?`quip_api` package not found, please run `pip install quip_api`)r   base_urlr   ac  The quip client uses the built-in XML parser which may causesecurity issues when parsing XML data in some cases. Please see https://docs.python.org/3/library/xml.html#xml-vulnerabilities For more information, set `allow_dangerous_xml_parsing` as True if you are sure that your distribution of the standard library is not vulnerable to XML vulnerabilities.N)quip_api.quipr   ImportErrorquip_client
ValueError)selfr   r   r   r   r   s         e/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/quip.py__init__zQuipLoader.__init__   s     	0000000 	 	 	T  	
 &:%
 
 
 + 		<  		 		s   	 #N  
folder_ids
thread_idsmax_docsinclude_all_foldersinclude_commentsinclude_imagesreturnc                    |s|s|st          d          |pg }|r|D ]}|                     |d|           |r[| j                                        }d|v r|                     |d         d|           d|v r|                     |d         d|           t	          t          |d|                             }|                     |||          S )aA  
        Args:
            :param folder_ids: List of specific folder IDs to load, defaults to None
            :param thread_ids: List of specific thread IDs to load, defaults to None
            :param max_docs: Maximum number of docs to retrieve in total, defaults 1000
            :param include_all_folders: Include all folders that your access_token
                   can access, but doesn't include your private folder
            :param include_comments: Include comments, defaults to False
            :param include_images: Include images, defaults to False
        z_Must specify at least one among `folder_ids`, `thread_ids` or set `include_all`_folders as Truer   group_folder_idsshared_folder_idsN)r   get_thread_ids_by_folder_idr   get_authenticated_userlistsetprocess_threads)	r   r   r   r   r    r!   r"   	folder_idusers	            r   loadzQuipLoader.load=   s*   &  	* 	5H 	7  
  %2
 	K' K K	00AzJJJJ 		#::<<D!T))00+,a   #d**00,-q*   #j(34455
##J@PQQQ    r,   depthc           	      z   ddl m}m} 	 | j                            |          }n# |$ rV}|j        dk    rt          j        d| d| d|            n"t          j        d| d| d|j                    Y d	}~d	S d	}~w|$ r-}t          j        d| d| d
|j                    Y d	}~d	S d	}~ww xY w|d                             dd|z            }t          j	        d| d|            |d         D ]F}	d|	v r!| 
                    |	d         |dz   |           'd|	v r|                    |	d                    Gd	S )z4Get thread ids by folder id and update in thread_idsr   )	HTTPError	QuipErrori  zdepth z!, Skipped over restricted folder z, z, Skipped over folder z due to unknown error Nz due to HTTP error foldertitlez	Folder %sz, Processing folder childrenr,      	thread_id)r   r2   r3   r   
get_foldercodeloggingwarninggetinfor'   append)
r   r,   r0   r   r2   r3   r4   er5   childs
             r   r'   z&QuipLoader.get_thread_ids_by_folder_idj   s    	76666666	%00;;FF 
	 
	 
	v}}UUUUYUURSUU    5U 5 5) 5 5,-F5 5   FFFFF 	 	 	O. . .i . .%&V. .   FFFFF	 x $$WkI.EFF@e@@@@AAAJ' 	6 	6Ee##00+&	:    %%!!%"4555	6 	6s!   % B0AA;;B0"B++B0include_messagesc                 p    g }|D ]0}|                      |||          }||                    |           1|S )z2Process a list of thread into a list of documents.)process_threadr?   )r   r   r"   rB   docsr8   docs          r   r+   zQuipLoader.process_threads   sM     # 	! 	!I%%iAQRRCC   r/   r8   c           
         | j                             |          }|d         d         }|d         d         }|d         d         }|d         d         }t                              |          }t                              d| d| d| d	|            d
|v r	 | j                             |d
                   }	nJ# t          j        j	        j
        $ r.}
t                              d| d| d|
            Y d }
~
d S d }
~
ww xY w||||d}d}|r|                     |	          }|r|dz   |                     |          z   }t          |d
         |z   |          S d S )Nthreadidr5   linkupdated_useczprocessing thread z title z link z update_ts htmlzError parsing thread  z, skipping, )r5   	update_tsrI   source z/n)page_contentmetadata)r   
get_threadr   _sanitize_titleloggerr>   parse_document_htmlxmletreecElementTree
ParseErrorerrorprocess_thread_imagesprocess_thread_messagesr   )r   r8   r"   rB   rH   r5   rJ   rN   sanitized_titletreer@   rR   texts                r   rD   zQuipLoader.process_thread   s    !,,Y778$T*	x )h'8$^4	$44U;;1 1 1? 1 11 1%.1 1	
 	
 	

 V';;F6NKK9)4   WUWWYWWTUWWXXXttttt
 )&	 H D 811$77 Md{T%A%A)%L%LL#F^d2!    ts    B9 9D #C;;D r_   c                 V   d}	 ddl m} ddlm} n# t          $ r t          d          w xY w|                    d          D ]}|                    d          }|r|                    d          s/|                    d	          \  }}}}	| j        	                    ||	          }
	 |
                    t          |
                                                    }|d
z   |                    |          z   }# t          $ r$}t                              d|            |d }~ww xY w|S )NrP   r   )Image)pytesseractzg`Pillow or pytesseract` package not found, please run `pip install Pillow` or `pip install pytesseract`imgsrcz/blob/
z!failed to convert image to text, )PILrb   rc   r   iterr=   
startswithsplitr   get_blobopenr   readimage_to_stringOSErrorrU   r[   )r   r_   r`   rb   rc   rd   re   _r8   blob_idblob_responseimager@   s                r   r\   z QuipLoader.process_thread_images   sn   	!!!!!!/////// 	 	 	D  	 99U## 	 	C''%..C cnnW55 '*yy~~$Aq)W ,55iIIM

7=+=+=+?+?#@#@AAd{[%@%@%G%GG   DDDEEE s!    +(AC88
D&D!!D&c                    d }g }	 | j                             ||d          }|                    |           |r|d         d         dz
  }nnH|                                 d |D             }d                    |          S )	NTd   )max_created_useccountcreated_usecr7   c                     g | ]
}|d          S )r`    ).0messages     r   
<listcomp>z6QuipLoader.process_thread_messages.<locals>.<listcomp>   s    999W999r/   rg   )r   get_messagesextendreversejoin)r   r8   rw   messageschunktextss         r   r]   z"QuipLoader.process_thread_messages   s    	$11,<C 2  E OOE""" #(9^#<q#@  	 	99999yyr/   r5   c                     t          j        dd|           }t          j        dd|          }t          |          t          k    r|d t                   }|S )Nz\srM   z(?u)[^- \w.]rP   )resublen_MAXIMUM_TITLE_LENGTH)r5   r^   s     r   rT   zQuipLoader._sanitize_title   sS    &U33&"oFF"777-.D/D.DEOr/   )r   )NNr   FFF)__name__
__module____qualname____doc__strr   intboolr   r   r   r.   r'   r   r+   rD   r   r\   r]   staticmethodrT   r|   r/   r   r   r      s/         *,	$ -2$ $ $$ $ "#	$ &*$ $ $ $P +/*."&$)!&$+R +RT#Y'+R T#Y'+R 3-	+R
 "+R +R +R 
h+R +R +R +RZ#6#6%(#66:3i#6	#6 #6 #6 #6J	"3-	9=	QU		h	 	 	 	**.2*FJ*	(	* * * *X+ #    6           $ s s    \  r/   r   )r;   r   xml.etree.cElementTreerW   ior   typingr   r   r   xml.etree.ElementTreer   langchain_core.documentsr   )langchain_community.document_loaders.baser	   	getLoggerr   rU   r   r   r|   r/   r   <module>r      s     				           + + + + + + + + + + - - - - - - - - - - - - @ @ @ @ @ @		8	$	$ h h h h h h h h h hr/   