
    Ng'                        d Z g dZddlZddlmZmZ 	 ddlmZmZm	Z	m
Z
mZmZmZ eefZn# e$ r ddlmZmZm	Z	m
Z
mZmZ eZY nw xY wddZddZdd	Zd
 Z ej        dej                  j        Z G d d          Zd Zd Z	 ddlmZ n# e$ r	 ddlmZ Y nw xY w ej        d          j        Z	 e  n# e!$ r e"Z Y nw xY wd Z#dS )z5External interface to the BeautifulSoup HTML parser.
)
fromstringparseconvert_tree    N)etreehtml)BeautifulSoupTagCommentProcessingInstructionNavigableStringDeclarationDoctype)r   r	   r
   r   r   r   c                      t          | ||fi |S )a  Parse a string of HTML data into an Element tree using the
    BeautifulSoup parser.

    Returns the root ``<html>`` Element of the tree.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    )_parse)databeautifulsoupmakeelementbsargss       P/var/www/html/ai-engine/env/lib/python3.11/site-packages/lxml/html/soupparser.pyr   r      s     ${==f===    c                     t          | d          st          |           } t          | ||fi |}t          j        |          S )aY  Parse a file into an ElemenTree using the BeautifulSoup parser.

    You can pass a different BeautifulSoup parser through the
    `beautifulsoup` keyword, and a diffent Element factory function
    through the `makeelement` keyword.  By default, the standard
    ``BeautifulSoup`` class and the default factory of `lxml.html` are
    used.
    read)hasattropenr   r   ElementTree)filer   r   r   roots        r   r   r   $   sK     4   Dzz${==f==DT"""r   c                     t          | |          }|                                }|D ]}|                    |           |S )a  Convert a BeautifulSoup tree to a list of Element trees.

    Returns a list instead of a single root Element to support
    HTML-like soup with more than one root element.

    You can pass a different Element factory through the `makeelement`
    keyword.
    )_convert_treegetchildrenremove)beautiful_soup_treer   r   childrenchilds        r   r   r   3   sN     ,k::D!!H  EOr   c                    |t           }t          |d          r	d|vrd|d<   t          |d          r	d|vrd|d<    || fi |}t          ||          }t          |          dk    r|d         j        dk    r|d         S d|_        |S )	NHTML_ENTITIESconvertEntitiesr   DEFAULT_BUILDER_FEATURESfeatureszhtml.parser   r   )r   r   r   lentag)sourcer   r   r   treer   s         r   r   r   E   s    %}o.. /F**(.F$%}899 /V##!.F:=**6**D{++D
4yyA~~$q'+//AwDHKr   z`(?:\s|[<!])*DOCTYPE\s*HTML(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?(?:\s+(\'[^\']*\'|"[^"]*"))?c                       e Zd Zd Zd ZdS )
_PseudoTagc                 0    d| _         g | _        || _        d S )Nr   )nameattrscontents)selfr4   s     r   __init__z_PseudoTag.__init__`   s    	
 r   c                 4    | j                                         S N)r4   __iter__)r5   s    r   r9   z_PseudoTag.__iter__e   s    }%%'''r   N)__name__
__module____qualname__r6   r9    r   r   r0   r0   ^   s2        ! ! !
( ( ( ( (r   r0   c                 Z   |t           j        j        }d x}}d x}}t          |           D ]b\  }}t	          |t
                    r/||}|}|&|j        r|j                                        dk    r|}I|t	          |t                    r|}c|g x}}	| j	        }
n3| j	        d |         }| j	        ||dz            }
| j	        |dz   d          }	|;|

                    |          }|
d |         |j	        z   |
|dz   d          z   |_	        nt          |
          }t          |          } ||          }|}t          |          D ]&} ||          }||                    |           |}'|}|	D ]&} ||          }||                    |           |}'|	 |                                }n# t"          $ r
 |j        }Y nw xY wt'          |          }|snR|                                \  }}|                                j        }|o	|dd         |_        |o	|dd         |_        |S )Nr   r*   )r   html_parserr   	enumerate
isinstancer	   r2   lower_DECLARATION_OR_DOCTYPEr4   indexr0   _init_node_convertersreversedaddpreviousaddnextoutput_readyAttributeErrorstring_parse_doctype_declarationgroupsgetroottreedocinfo	public_id
system_url)r"   r   first_element_idxlast_element_idx	html_rootdeclarationiepre_root	post_rootrootsconvert_noderes_rootprev	converteddoctype_stringmatchexternal_idsys_urirP   s                       r   r   r   i   s   &2 ,0/(""I-..  1a 	 ($%!  QV &0H0H	 Z3J%K%K K  !!9#,&/0B1B0BC#,->?OPQ?Q-QR'01A!1C1D1DE	 KK	"""2A2Y);;eAaCDDkI	 u%%	(55L |I&&HDh   LOO	 Y'''D D   LOO	 LL###D	0(5577NN 	0 	0 	0(/NNN	0 +>:: 		; #(<<>> K**,,4G + AAbD0AG!(!:WQrT]GOs   F0 0GGc                 0   	
 i g fd}fd	d
	fd	d 
d  |t           t                     
fd            } |t                    d             } |t                    d             } |t                    fd	            }S )Nc                        fd}|S )Nc                 F    D ]}| |<                        |           | S r8   )append)handlert
convertersordered_node_typestypess     r   addz5_init_node_converters.<locals>.converter.<locals>.add   s7     - - '
1"))!,,,,Nr   r=   )rl   rm   rj   rk   s   ` r   	converterz(_init_node_converters.<locals>.converter   s.    	 	 	 	 	 	 	
 
r   c                 F    D ]}t          | |          r
|         c S d S r8   )rB   )noderi   rj   rk   s     r   find_best_converterz2_init_node_converters.<locals>.find_best_converter   s>    # 	% 	%A$"" %!!}$$$%tr   c                     	 t          |                    }n-# t          $ r   |           x}t          |           <   Y nw xY w|d S  || |          S r8   )typeKeyError)bs_nodeparentrh   rj   rq   s      r   r\   z+_init_node_converters.<locals>.convert_node   s|    	O g/GG 	O 	O 	O2E2Eg2N2NNGjg///	O?4ww'''s    'AAc                     t          | t                    rYi }|                                 D ]A\  }}t          |t                    rd                    |          }t          |          ||<   Bnd | D             }|S )N c                 4    i | ]\  }}|t          |          S r=   unescape).0kvs      r   
<dictcomp>z<_init_node_converters.<locals>.map_attrs.<locals>.<dictcomp>   s$    ;;;$!Qq(1++;;;r   )rB   dictitemslistjoinr{   )bs_attrsattribsr}   r~   s       r   	map_attrsz(_init_node_converters.<locals>.map_attrs   s    h%% 	<G (( ) )1a&& $A%a[[

)
 <;(;;;Gr   c                     t          |           dk    r| j        pd|z   | _        d S | d         j        pd|z   | d         _        d S )Nr    r?   )r+   texttail)rv   r   s     r   append_textz*_init_node_converters.<locals>.append_text   sI    v;;!!;,"4FKKK%bz4"<F2JOOOr   c                 @   | j         }|,|r 
|          nd }t          j        || j        |          }n!|r 
|          ni } 	| j        |          }| D ]C}	 t	          |                   }| |||           '# t
          $ r Y nw xY w ||           D|S )N)attrib)r3   r   
SubElementr2   rs   rt   )ru   rv   r3   r   resr$   rh   r\   rj   r   r   s          r   convert_tagz*_init_node_converters.<locals>.convert_tag   s    */9ii&&&TG"67<HHHCC*/7ii&&&RG+gl7;;;C 
	% 
	%E$T%[[1 &GE3'''     L$$$$
s   B
BBc                 \    t          j        |           }||                    |           |S r8   )r   HtmlCommentrg   ru   rv   r   s      r   convert_commentz._init_node_converters.<locals>.convert_comment  s/    w''MM#
r   c                     |                      d          r
| d d         } t          j        |                     dd           }||                    |           |S )N?r?   rx   r*   )endswithr   r   splitrg   r   s      r   
convert_piz)_init_node_converters.<locals>.convert_pi  s_    C   	# crclG)7==a+@+@AMM#
r   c                 >    | |t          |                      d S r8   rz   )ru   rv   r   s     r   convert_textz+_init_node_converters.<locals>.convert_text  s(    K 1 1222tr   r8   )r	   r0   r
   r   r   )r   rn   r   r   r   r   r   r\   rj   rq   r   rk   s   `     @@@@@@r   rF   rF      sW   J          ( ( ( ( ( ( (	 	 	= = = YsJ        , Yw   Y$%%  &% Y     
 r   )name2codepointz&(\w+);c                 0    | sdS d }t          ||           S )Nr   c                     	 t          t          |                     d                             S # t          $ r |                     d          cY S w xY w)Nr*   r   )unichrr   grouprt   )ms    r   unescape_entityz!unescape.<locals>.unescape_entity5  sU    	.4555 	 	 	771::	s   ,/ AA)handle_entities)rL   r   s     r   r{   r{   1  s0     r  
 ?F333r   )NNr8   )$__doc____all__relxmlr   r   bs4r   r	   r
   r   r   r   r   rD   ImportErrorr   r   r   r   compile
IGNORECASEra   rM   r0   r   rF   html.entitiesr   htmlentitydefssubr   r   	NameErrorchrr{   r=   r   r   <module>r      s    2
1
1 				        	*                   +G4 * * *                *	*> > > ># # # #   $  $ (RZ$ M	  	 ( ( ( ( ( ( ( (R R Rj^ ^ ^F.,,,,,,, . . .--------. "*Z((,
FF   FFF
	4 	4 	4 	4 	4s3   + AAB BB4B7 7C C