
    NgP                     V    d dl Z d dlmZmZmZmZmZ d dlmZm	Z	  G d de          Z
dS )    N)AnyListOptionalSequenceUnion)BaseDocumentTransformerDocumentc                       e Zd ZdZ	 	 	 	 ddeeeee         f                  deeeee         f                  deded	e	d
dfdZ
dee         d	e	d
ee         fdZdee         d	e	d
ee         fdZdS )MarkdownifyTransformeram  Converts HTML documents to Markdown format with customizable options for handling
    links, images, other tags and heading styles using the markdownify library.

    Arguments:
        strip: A list of tags to strip. This option can't be used with the convert option.
        convert: A list of tags to convert. This option can't be used with the strip option.
        autolinks: A boolean indicating whether the "automatic link" style should be used when a a tag's contents match its href. Defaults to True.
        heading_style: Defines how headings should be converted. Accepted values are ATX, ATX_CLOSED, SETEXT, and UNDERLINED (which is an alias for SETEXT). Defaults to ATX.
        kwargs: Additional options to pass to markdownify.

    Example:
        .. code-block:: python
            from langchain_community.document_transformers import MarkdownifyTransformer
            markdownify = MarkdownifyTransformer()
            docs_transform = markdownify.transform_documents(docs)

    More configuration options can be found at the markdownify GitHub page:
    https://github.com/matthewwithanm/python-markdownify
    NTATXstripconvert	autolinksheading_stylekwargsreturnc                     t          |t                    r|gn|| _        t          |t                    r|gn|| _        || _        || _        || _        d S N)
isinstancestrr   r   r   r   additional_options)selfr   r   r   r   r   s         q/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_transformers/markdownify.py__init__zMarkdownifyTransformer.__init__   sY     !+5# 6 6AeWWE
$.w$<$<Iyy'"*"(    	documentsc           
         	 ddl m } n# t          $ r t          d          w xY wg }|D ]} |d
|j        | j        | j        | j        | j        d| j                            dd                                          }t          j
        dd|          }|                    t          ||j        	                     |S )Nr   )markdownifyz`markdownify package not found, please 
                install it with `pip install markdownify`)htmlr   r   r   r        z\n\s*\nz

)metadata )r   ImportErrorpage_contentr   r   r   r   r   replaceresubappendr	   r"   )r   r   r   r   converted_documentsdocmarkdown_contentcleaned_markdowns           r   transform_documentsz*MarkdownifyTransformer.transform_documents*   s   
	/////// 	 	 	=  	 ! 	 	C )* L"n"&"4  -  %%   "vj&:JKK&&)CLAAA    #"s   	 #c                    K   t           r   )NotImplementedError)r   r   r   s      r   atransform_documentsz+MarkdownifyTransformer.atransform_documentsN   s      
 "!r   )NNTr   )__name__
__module____qualname____doc__r   r   r   r   boolr   r   r   r	   r.   r1   r#   r   r   r   r      s        , 2637") )c49n-.) %T#Y/0) 	)
 ) ) 
) ) ) )"#H%"# "# 
(		"# "# "# "#H"H%" " 
(		" " " " " "r   r   )r'   typingr   r   r   r   r   langchain_core.documentsr   r	   r   r#   r   r   <module>r9      s    				 7 7 7 7 7 7 7 7 7 7 7 7 7 7 F F F F F F F FL" L" L" L" L"4 L" L" L" L" L"r   