
    Ng,                        d dl mZmZmZ d dlmZ d dlmZm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ  ee          Zedk    reZne	Z G d de          ZdS )    )absolute_importdivisionunicode_literals)unichr)dequeOrderedDict)version_info   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Trie)      c                       e Zd ZdZdM fd	Zd Zd ZdNdZd Zd	 Z	d
 Z
d Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd  Z d! Z!d" Z"d# Z#d$ Z$d% Z%d& Z&d' Z'd( Z(d) Z)d* Z*d+ Z+d, Z,d- Z-d. Z.d/ Z/d0 Z0d1 Z1d2 Z2d3 Z3d4 Z4d5 Z5d6 Z6d7 Z7d8 Z8d9 Z9d: Z:d; Z;d< Z<d= Z=d> Z>d? Z?d@ Z@dA ZAdB ZBdC ZCdD ZDdE ZEdF ZFdG ZGdH ZHdI ZIdJ ZJdK ZKdL ZL xZMS )OHTMLTokenizera	   This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    Nc                     t          |fi || _        || _        d| _        g | _        | j        | _        d| _        d | _        t          t          |                                            d S NF)r   streamparser
escapeFlaglastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   r   kwargs	__class__s       O/var/www/html/ai-engine/env/lib/python3.11/site-packages/html5lib/_tokenizer.pyr&   zHTMLTokenizer.__init__(   sn    %f7777  ^
 !mT""++-----    c              #   f  K   t          g           | _        |                                 r| j        j        r;t
          d         | j        j                            d          dV  | j        j        ;| j        r"| j                                        V  | j        "|                                 dS dS )z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorr   typedataN)r   
tokenQueuer"   r   errorsr   poppopleftr'   s    r*   __iter__zHTMLTokenizer.__iter__7   s        )) jjll 	0+$ \),7ASAWAWXYAZAZ[[[[[ +$ \/ 0o--///// / 0 jjll 	0 	0 	0 	0 	0r+   c                 l   t           }d}|r	t          }d}g }| j                                        }||v rD|t          ur;|                    |           | j                                        }||v r	|t          u;t          d                    |          |          }|t          v r:t          |         }| j	                            t          d         dd|id           nd|cxk    rd	k    sn |d
k    r.d}| j	                            t          d         dd|id           nd|cxk    rdk    sBn d|cxk    rdk    s3n d|cxk    rdk    s$n d|cxk    rdk    sn |t          g d          v r+| j	                            t          d         dd|id           	 t          |          }n@# t          $ r3 |dz
  }t          d|dz	  z            t          d|dz  z            z   }Y nw xY w|dk    rB| j	                            t          d         dd           | j                            |           |S )zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
       r-   z$illegal-codepoint-for-numeric-entity	charAsIntr/   r0   datavarsi   i      �r
                  i  i  )#   i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i r>   i   i   i  ;z numeric-entity-without-semicolonr.   )r   r   r   charr   appendintjoinr   r1   r   	frozensetchr
ValueErrorunget)	r'   isHexallowedradix	charStackcr;   rG   vs	            r*   consumeNumberEntityz!HTMLTokenizer.consumeNumberEntityG   s     	GE	 K7llq||Q  ""A 7llq||
 	**E22	 ---(3DO""J|,D$J1<i0H$J $J K K K K ,,,,f,,,,8##DO""J|,D$J1<i0H$J $J K K K K
 9........9........9........9........Y (E (E (E F F F F &&
<0H(N5@)4L(N (N O O OK 9~~ K K K'6Q"W-..Vq5y5I1J1JJK 88O""J|,D$F$H $H I I IKa   s   F, ,:G)(G)Fc                    d}| j                                         g}|d         t          v s |d         t          ddfv s|.||d         k    r"| j                             |d                    n|d         dk    r-d}|                    | j                                                    |d         dv r.d}|                    | j                                                    |r|d         t          v s|sF|d         t          v r7| j                             |d                    |                     |          }n| j	                            t          d	         d
d           | j                             |                                           dd                    |          z   }nJ|d         t          urit                              d                    |                    sn;|                    | j                                                    |d         t          ui	 t                              d                    |d d                             }t!          |          }n# t"          $ r d }Y nw xY w||d         dk    r(| j	                            t          d	         dd           |d         dk    rq|ro||         t$          v s||         t          v s||         dk    rE| j                             |                                           dd                    |          z   }nt&          |         }| j                             |                                           |d                    ||d                    z  }nl| j	                            t          d	         dd           | j                             |                                           dd                    |          z   }|r#| j        d         d         dxx         |z  cc<   d S |t          v rd}nd}| j	                            t          |         |d           d S )N&r   <#F)xXTr-   zexpected-numeric-entityr.   r:   rF   znamed-entity-without-semicolon=zexpected-named-entityr0   r
   SpaceCharacters
Characters)r   rG   r   r   rN   rH   r   r   rU   r1   r   r3   rJ   entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr   r   r$   )	r'   allowedCharfromAttributeoutputrR   hex
entityNameentityLength	tokenTypes	            r*   consumeEntityzHTMLTokenizer.consumeEntity   sX   [%%''(	aLO++y|S#/N/N([IaL-H-HKil++++q\S  CT[--//000}
**  !1!1!3!3444  
2	"22 3$-bMV$;$;!!)B-00011#66 &&
<0H0I(K (K L L L!!)--//222rwwy111 R=++#889K9KLL   !1!1!3!3444 R=++")883B39P9PQQ
": " " "!


" %b>S((O**J|4L,L,N ,N O O OrNc))m)|,<<|,66|,33K%%immoo666 2779#5#55FF%j1FK%%immoo666bggi&>???FF&&
<0H(?(A (A B B B!!)--//222rwwy111 	Tf%b)!,,,6,,,,,((-		(	O""Jy,A6#R#RSSSSSs   !AI& &I54I5c                 4    |                      |d           dS )zIThis method replaces the need for "entityInAttributeValueState".
        T)re   rf   N)rl   )r'   re   s     r*   processEntityInAttributez&HTMLTokenizer.processEntityInAttribute   s#     	{$GGGGGr+   c                    | j         }|d         t          v r|d                             t                    |d<   |d         t          d         k    rZ|d         }t          |          }t          |          t          |          k    r|                    |ddd                    ||d<   |d         t          d         k    r`|d         r(| j        	                    t          d         d	d
           |d         r(| j        	                    t          d         dd
           | j        	                    |           | j
        | _        dS )zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r/   nameStartTagr0   NrZ   EndTagr-   zattributes-in-end-tagr.   selfClosingzself-closing-flag-on-end-tag)r$   r   	translater   r   attributeMaprc   updater1   rH   r!   r"   )r'   tokenrawr0   s       r*   emitCurrentTokenzHTMLTokenizer.emitCurrentToken   sU   
 !&M]**!&M334DEEE&MV}
: 666Fm#C((s88c$ii''KKDDbD	*** $fV}
8 444= NO**J|4L4K,M ,M N N N' UO**J|4L4R,T ,T U U Uu%%%^


r+   c                 z   | j                                         }|dk    r| j        | _        n|dk    r| j        | _        n|dk    rQ| j                            t          d         dd           | j                            t          d         dd           n|t          u rdS |t          v rJ| j                            t          d	         || j         
                    t          d
          z   d           nE| j         
                    d          }| j                            t          d         ||z   d           d
S )NrW   rX    r-   invalid-codepointr.   r_   Fr^   TrW   rX   r{   )r   rG   entityDataStater"   tagOpenStater1   rH   r   r   r   
charsUntilr'   r0   charss      r*   r!   zHTMLTokenizer.dataState   sl   {!!3;;-DJJS[[*DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7S[[5_$$ O""J7H,I$(4;+A+A/SW+X+X$X$Z $Z [ [ [ [ K**+?@@EO""J|,D$(5L$2 $2 3 3 3tr+   c                 F    |                                   | j        | _        dS NT)rl   r!   r"   r5   s    r*   r~   zHTMLTokenizer.entityDataState  s"    ^
tr+   c                 ~   | j                                         }|dk    r| j        | _        n|dk    r| j        | _        n|t
          k    rdS |dk    rQ| j                            t          d         dd           | j                            t          d         d	d           n|t          v rJ| j                            t          d
         || j         
                    t          d          z   d           nE| j         
                    d          }| j                            t          d         ||z   d           dS )NrW   rX   Fr{   r-   r|   r.   r_   r?   r^   Tr}   )r   rG   characterReferenceInRcdatar"   rcdataLessThanSignStater   r1   rH   r   r   r   r   s      r*   rcdataStatezHTMLTokenizer.rcdataState"  sl   {!!3;;8DJJS[[5DJJS[[5XO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7_$$ O""J7H,I$(4;+A+A/SW+X+X$X$Z $Z [ [ [ [ K**+?@@EO""J|,D$(5L$2 $2 3 3 3tr+   c                 F    |                                   | j        | _        dS r   )rl   r   r"   r5   s    r*   r   z(HTMLTokenizer.characterReferenceInRcdata?  s#    %
tr+   c                    | j                                         }|dk    r| j        | _        n|dk    rQ| j                            t          d         dd           | j                            t          d         dd           nR|t          k    rdS | j                             d	          }| j                            t          d         ||z   d           d
S NrX   r{   r-   r|   r.   r_   r?   F)rX   r{   T)	r   rG   rawtextLessThanSignStater"   r1   rH   r   r   r   r   s      r*   rawtextStatezHTMLTokenizer.rawtextStateD  s    {!!3;;6DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7S[[5K**?;;EO""J|,D$(5L$2 $2 3 3 3tr+   c                    | j                                         }|dk    r| j        | _        n|dk    rQ| j                            t          d         dd           | j                            t          d         dd           nR|t          k    rdS | j                             d	          }| j                            t          d         ||z   d           d
S r   )	r   rG   scriptDataLessThanSignStater"   r1   rH   r   r   r   r   s      r*   scriptDataStatezHTMLTokenizer.scriptDataStateV  s    {!!3;;9DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7S[[5K**?;;EO""J|,D$(5L$2 $2 3 3 3tr+   c                    | j                                         }|t          k    rdS |dk    rQ| j                            t
          d         dd           | j                            t
          d         dd           nC| j                            t
          d         || j                             d          z   d           dS )	NFr{   r-   r|   r.   r_   r?   T)r   rG   r   r1   rH   r   r   r'   r0   s     r*   plaintextStatezHTMLTokenizer.plaintextStateh  s    {!!3;;5XO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7 O""J|,D$(4;+A+A(+K+K$K$M $M N N Ntr+   c                 B   | j                                         }|dk    r| j        | _        nq|dk    r| j        | _        n]|t
          v r&t          d         |g ddd| _        | j        | _        n.|dk    r]| j	        
                    t          d         dd	           | j	        
                    t          d
         dd	           | j        | _        n|dk    rO| j	        
                    t          d         dd	           | j                             |           | j        | _        nv| j	        
                    t          d         dd	           | j	        
                    t          d
         dd	           | j                             |           | j        | _        dS )N!/rq   F)r/   rp   r0   rs   selfClosingAcknowledged>r-   z'expected-tag-name-but-got-right-bracketr.   r_   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerX   T)r   rG   markupDeclarationOpenStater"   closeTagOpenStater   r   r$   tagNameStater1   rH   r!   rN   bogusCommentStater   s     r*   r   zHTMLTokenizer.tagOpenStatew  s   {!!3;;8DJJS[[/DJJ\!!)3J)?)-r05<A!C !CD *DJJS[[ O""J|,D$M$O $O P P PO""J|,Dd#S#STTTDJJS[[ O""J|,D$M$O $O P P PKd###/DJJ O""J|,D$7$9 $9 : : :O""J|,Dc#R#RSSSKd###DJtr+   c                 v   | j                                         }|t          v r$t          d         |g dd| _        | j        | _        n|dk    r5| j                            t          d         dd           | j	        | _        n|t          u r]| j                            t          d         dd           | j                            t          d	         d
d           | j	        | _        nQ| j                            t          d         dd|id           | j                             |           | j        | _        dS )Nrr   Fr/   rp   r0   rs   r   r-   z*expected-closing-tag-but-got-right-bracketr.   z expected-closing-tag-but-got-eofr_   </z!expected-closing-tag-but-got-charr0   r<   T)r   rG   r   r   r$   r   r"   r1   rH   r!   r   rN   r   r   s     r*   r   zHTMLTokenizer.closeTagOpenState  s`   {!!<)3H)=t)+E!C !CD*DJJS[[O""J|,D$P$R $R S S SDJJS[[O""J|,D$F$H $H I I IO""J|,Dd#S#STTTDJJ O""J|,D$G17$@ $@ A A A Kd###/DJtr+   c                    | j                                         }|t          v r| j        | _        n|dk    r|                                  n|t          u r5| j                            t          d         dd           | j
        | _        nl|dk    r| j        | _        nY|dk    r>| j                            t          d         dd           | j        dxx         d	z  cc<   n| j        dxx         |z  cc<   d
S )Nr   r-   zeof-in-tag-namer.   r   r{   r|   rp   r?   T)r   rG   r   beforeAttributeNameStater"   ry   r   r1   rH   r   r!   selfClosingStartTagStater$   r   s     r*   r   zHTMLTokenizer.tagNameState  s'   {!!?""6DJJS[[!!####S[[O""J|,D$5$7 $7 8 8 8DJJS[[6DJJXO""J|,D,?$A $A B B Bf%%%1%%%%f%%%-%%% tr+   c                    | j                                         }|dk    rd| _        | j        | _        nN| j                            t          d         dd           | j                             |           | j	        | _        dS Nr   r:   r_   rX   r.   T)
r   rG   temporaryBufferrcdataEndTagOpenStater"   r1   rH   r   rN   r   r   s     r*   r   z%HTMLTokenizer.rcdataLessThanSignState  sz    {!!3;;#%D 3DJJO""J|,Dc#R#RSSSKd###)DJtr+   c                     | j                                         }|t          v r| xj        |z  c_        | j        | _        nN| j                            t          d         dd           | j         	                    |           | j
        | _        dS Nr_   r   r.   T)r   rG   r   r   rcdataEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z#HTMLTokenizer.rcdataEndTagOpenState  s    {!!<  D(  3DJJO""J|,Dd#S#STTTKd###)DJtr+   c                    | j         o9| j         d                                         | j                                        k    }| j                                        }|t
          v r+|r)t          d         | j        g dd| _         | j        | _        n|dk    r+|r)t          d         | j        g dd| _         | j	        | _        n|dk    r?|r=t          d         | j        g dd| _         | 
                                 | j        | _        np|t          v r| xj        |z  c_        nV| j                            t          d         d| j        z   d	           | j                            |           | j        | _        d
S Nrp   rr   Fr   r   r   r_   r   r.   T)r$   lowerr   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r'   appropriater0   s      r*   r   z#HTMLTokenizer.rcdataEndTagNameState  s   'mD,=f,E,K,K,M,MQUQeQkQkQmQm,m{!!?""{")3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD !!###DJJ\!!  D(   O""J|,D,043G,G$I $I J J JKd###)DJtr+   c                    | j                                         }|dk    rd| _        | j        | _        nN| j                            t          d         dd           | j                             |           | j	        | _        dS r   )
r   rG   r   rawtextEndTagOpenStater"   r1   rH   r   rN   r   r   s     r*   r   z&HTMLTokenizer.rawtextLessThanSignState  sz    {!!3;;#%D 4DJJO""J|,Dc#R#RSSSKd###*DJtr+   c                     | j                                         }|t          v r| xj        |z  c_        | j        | _        nN| j                            t          d         dd           | j         	                    |           | j
        | _        dS r   )r   rG   r   r   rawtextEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z$HTMLTokenizer.rawtextEndTagOpenState  s    {!!<  D(  4DJJO""J|,Dd#S#STTTKd###*DJtr+   c                    | j         o9| j         d                                         | j                                        k    }| j                                        }|t
          v r+|r)t          d         | j        g dd| _         | j        | _        n|dk    r+|r)t          d         | j        g dd| _         | j	        | _        n|dk    r?|r=t          d         | j        g dd| _         | 
                                 | j        | _        np|t          v r| xj        |z  c_        nV| j                            t          d         d| j        z   d	           | j                            |           | j        | _        d
S r   )r$   r   r   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r   s      r*   r   z$HTMLTokenizer.rawtextEndTagNameState  s   'mD,=f,E,K,K,M,MQUQeQkQkQmQm,m{!!?""{")3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD !!###DJJ\!!  D(   O""J|,D,043G,G$I $I J J JKd###*DJtr+   c                 ~   | j                                         }|dk    rd| _        | j        | _        n|dk    r5| j                            t          d         dd           | j        | _        nN| j                            t          d         dd           | j         	                    |           | j
        | _        dS )	Nr   r:   r   r_   z<!r.   rX   T)r   rG   r   scriptDataEndTagOpenStater"   r1   rH   r   scriptDataEscapeStartStaterN   r   r   s     r*   r   z)HTMLTokenizer.scriptDataLessThanSignState,  s    {!!3;;#%D 7DJJS[[O""J|,Dd#S#STTT8DJJO""J|,Dc#R#RSSSKd###-DJtr+   c                     | j                                         }|t          v r| xj        |z  c_        | j        | _        nN| j                            t          d         dd           | j         	                    |           | j
        | _        dS r   )r   rG   r   r   scriptDataEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z'HTMLTokenizer.scriptDataEndTagOpenState:  s    {!!<  D(  7DJJO""J|,Dd#S#STTTKd###-DJtr+   c                    | j         o9| j         d                                         | j                                        k    }| j                                        }|t
          v r+|r)t          d         | j        g dd| _         | j        | _        n|dk    r+|r)t          d         | j        g dd| _         | j	        | _        n|dk    r?|r=t          d         | j        g dd| _         | 
                                 | j        | _        np|t          v r| xj        |z  c_        nV| j                            t          d         d| j        z   d	           | j                            |           | j        | _        d
S r   )r$   r   r   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r   s      r*   r   z'HTMLTokenizer.scriptDataEndTagNameStateE  s   'mD,=f,E,K,K,M,MQUQeQkQkQmQm,m{!!?""{")3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD !!###DJJ\!!  D(   O""J|,D,043G,G$I $I J J JKd###-DJtr+   c                     | j                                         }|dk    r5| j                            t          d         dd           | j        | _        n&| j                             |           | j        | _        dS N-r_   r.   T)	r   rG   r1   rH   r   scriptDataEscapeStartDashStater"   rN   r   r   s     r*   r   z(HTMLTokenizer.scriptDataEscapeStartStatea  r    {!!3;;O""J|,Dc#R#RSSS<DJJKd###-DJtr+   c                     | j                                         }|dk    r5| j                            t          d         dd           | j        | _        n&| j                             |           | j        | _        dS r   )	r   rG   r1   rH   r   scriptDataEscapedDashDashStater"   rN   r   r   s     r*   r   z,HTMLTokenizer.scriptDataEscapeStartDashStatek  r   r+   c                 <   | j                                         }|dk    r5| j                            t          d         dd           | j        | _        n|dk    r| j        | _        n|dk    rQ| j                            t          d         dd           | j                            t          d         dd           n]|t          k    r| j	        | _        nE| j         
                    d	          }| j                            t          d         ||z   d           d
S )Nr   r_   r.   rX   r{   r-   r|   r?   )rX   r   r{   T)r   rG   r1   rH   r   scriptDataEscapedDashStater"   "scriptDataEscapedLessThanSignStater   r!   r   r   s      r*   scriptDataEscapedStatez$HTMLTokenizer.scriptDataEscapedStateu  s4   {!!3;;O""J|,Dc#R#RSSS8DJJS[[@DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7S[[DJJK**+?@@EO""J|,D$(5L$2 $2 3 3 3tr+   c                 2   | j                                         }|dk    r5| j                            t          d         dd           | j        | _        n|dk    r| j        | _        n|dk    r]| j                            t          d         dd           | j                            t          d         dd           | j        | _        nL|t          k    r| j
        | _        n4| j                            t          d         |d           | j        | _        d	S )
Nr   r_   r.   rX   r{   r-   r|   r?   T)r   rG   r1   rH   r   r   r"   r   r   r   r!   r   s     r*   r   z(HTMLTokenizer.scriptDataEscapedDashState  s    {!!3;;O""J|,Dc#R#RSSS<DJJS[[@DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 74DJJS[[DJJO""J|,Dd#S#STTT4DJtr+   c                    | j                                         }|dk    r)| j                            t          d         dd           n|dk    r| j        | _        n|dk    r5| j                            t          d         dd           | j        | _        n|dk    r]| j                            t          d         dd           | j                            t          d         d	d           | j        | _        nL|t          k    r| j
        | _        n4| j                            t          d         |d           | j        | _        d
S )Nr   r_   r.   rX   r   r{   r-   r|   r?   T)r   rG   r1   rH   r   r   r"   r   r   r   r!   r   s     r*   r   z,HTMLTokenizer.scriptDataEscapedDashDashState  sO   {!!3;;O""J|,Dc#R#RSSSSS[[@DJJS[[O""J|,Dc#R#RSSS-DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 74DJJS[[DJJO""J|,Dd#S#STTT4DJtr+   c                    | j                                         }|dk    rd| _        | j        | _        n|t
          v r?| j                            t          d         d|z   d           || _        | j	        | _        nN| j                            t          d         dd           | j         
                    |           | j        | _        dS r   )r   rG   r    scriptDataEscapedEndTagOpenStater"   r   r1   rH   r    scriptDataDoubleEscapeStartStaterN   r   r   s     r*   r   z0HTMLTokenizer.scriptDataEscapedLessThanSignState  s    {!!3;;#%D >DJJ\!!O""J|,DcTXj#Y#YZZZ#'D >DJJO""J|,Dc#R#RSSSKd###4DJtr+   c                    | j                                         }|t          v r|| _        | j        | _        nN| j                            t          d         dd           | j         	                    |           | j
        | _        dS r   )r   rG   r   r    scriptDataEscapedEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z.HTMLTokenizer.scriptDataEscapedEndTagOpenState  s|    {!!<#'D >DJJO""J|,Dd#S#STTTKd###4DJtr+   c                    | j         o9| j         d                                         | j                                        k    }| j                                        }|t
          v r+|r)t          d         | j        g dd| _         | j        | _        n|dk    r+|r)t          d         | j        g dd| _         | j	        | _        n|dk    r?|r=t          d         | j        g dd| _         | 
                                 | j        | _        np|t          v r| xj        |z  c_        nV| j                            t          d         d| j        z   d	           | j                            |           | j        | _        d
S r   )r$   r   r   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r   s      r*   r   z.HTMLTokenizer.scriptDataEscapedEndTagNameState  s   'mD,=f,E,K,K,M,MQUQeQkQkQmQm,m{!!?""{")3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD 6DJJS[[[[)3H)=)-)=)+E!C !CD !!###DJJ\!!  D(   O""J|,D,043G,G$I $I J J JKd###4DJtr+   c                    | j                                         }|t          t          d          z  v r_| j                            t          d         |d           | j                                        dk    r| j	        | _
        nu| j        | _
        nh|t          v r9| j                            t          d         |d           | xj        |z  c_        n&| j                             |           | j        | _
        dS N)r   r   r_   r.   scriptT)r   rG   r   rK   r1   rH   r   r   r   scriptDataDoubleEscapedStater"   r   r   rN   r   s     r*   r   z.HTMLTokenizer.scriptDataDoubleEscapeStartState  s    {!!Oi
&;&;;<<O""J|,Dd#S#STTT#))++x77!>

!8

\!!O""J|,Dd#S#STTT  D(   Kd###4DJtr+   c                    | j                                         }|dk    r5| j                            t          d         dd           | j        | _        n|dk    r5| j                            t          d         dd           | j        | _        n|dk    rQ| j                            t          d         dd           | j                            t          d         dd           nh|t          k    r5| j                            t          d         d	d           | j	        | _        n(| j                            t          d         |d           d
S Nr   r_   r.   rX   r{   r-   r|   r?   eof-in-script-in-scriptT)
r   rG   r1   rH   r    scriptDataDoubleEscapedDashStater"   (scriptDataDoubleEscapedLessThanSignStater   r!   r   s     r*   r   z*HTMLTokenizer.scriptDataDoubleEscapedState  sc   {!!3;;O""J|,Dc#R#RSSS>DJJS[[O""J|,Dc#R#RSSSFDJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7 7S[[O""J|,D$=$? $? @ @ @DJJO""J|,Dd#S#STTTtr+   c                    | j                                         }|dk    r6| j                            t          d         dd           | j        | _        n|dk    r5| j                            t          d         dd           | j        | _        n|dk    r]| j                            t          d         dd           | j                            t          d         dd           | j        | _        nt|t          k    r5| j                            t          d         d	d           | j
        | _        n4| j                            t          d         |d           | j        | _        d
S r   )r   rG   r1   rH   r   $scriptDataDoubleEscapedDashDashStater"   r   r   r   r!   r   s     r*   r   z.HTMLTokenizer.scriptDataDoubleEscapedDashState  st   {!!3;;O""J|,Dc#R#RSSSBDJJS[[O""J|,Dc#R#RSSSFDJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7:DJJS[[O""J|,D$=$? $? @ @ @DJJO""J|,Dd#S#STTT:DJtr+   c                 4   | j                                         }|dk    r*| j                            t          d         dd           nN|dk    r6| j                            t          d         dd           | j        | _        n|dk    r5| j                            t          d         dd           | j        | _        n|dk    r]| j                            t          d         dd           | j                            t          d         d	d           | j        | _        nt|t          k    r5| j                            t          d         d
d           | j
        | _        n4| j                            t          d         |d           | j        | _        dS )Nr   r_   r.   rX   r   r{   r-   r|   r?   r   T)r   rG   r1   rH   r   r   r"   r   r   r   r!   r   s     r*   r   z2HTMLTokenizer.scriptDataDoubleEscapedDashDashState%  s   {!!3;;O""J|,Dc#R#RSSSSS[[O""J|,Dc#R#RSSSFDJJS[[O""J|,Dc#R#RSSS-DJJXO""J|,D,?$A $A B B BO""J|,D,4$6 $6 7 7 7:DJJS[[O""J|,D$=$? $? @ @ @DJJO""J|,Dd#S#STTT:DJtr+   c                    | j                                         }|dk    r<| j                            t          d         dd           d| _        | j        | _        n&| j                             |           | j	        | _        dS )Nr   r_   r.   r:   T)
r   rG   r1   rH   r   r   scriptDataDoubleEscapeEndStater"   rN   r   r   s     r*   r   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignState>  sz    {!!3;;O""J|,Dc#R#RSSS#%D <DJJKd###:DJtr+   c                    | j                                         }|t          t          d          z  v r_| j                            t          d         |d           | j                                        dk    r| j	        | _
        nu| j        | _
        nh|t          v r9| j                            t          d         |d           | xj        |z  c_        n&| j                             |           | j        | _
        dS r   )r   rG   r   rK   r1   rH   r   r   r   r   r"   r   r   rN   r   s     r*   r   z,HTMLTokenizer.scriptDataDoubleEscapeEndStateI  s    {!!Oi
&;&;;<<O""J|,Dd#S#STTT#))++x77!8

!>

\!!O""J|,Dd#S#STTT  D(   Kd###:DJtr+   c                    | j                                         }|t          v r"| j                             t          d           n|t          v r0| j        d                             |dg           | j        | _        nT|dk    r| 	                                 n8|dk    r| j
        | _        n$|dv rW| j                            t          d         dd	           | j        d                             |dg           | j        | _        n|d
k    rW| j                            t          d         dd	           | j        d                             ddg           | j        | _        nl|t          u r5| j                            t          d         dd	           | j        | _        n.| j        d                             |dg           | j        | _        dS )NTr0   r:   r   r   )'"r]   rX   r-   #invalid-character-in-attribute-namer.   r{   r|   r?   z#expected-attribute-name-but-got-eof)r   rG   r   r   r   r$   rH   attributeNameStater"   ry   r   r1   r   r   r!   r   s     r*   r   z&HTMLTokenizer.beforeAttributeNameStateY  s   {!!?""K""?D9999\!!f%,,dBZ8880DJJS[[!!####S[[6DJJ)))O""J|,D$I$K $K L L Lf%,,dBZ8880DJJXO""J|,D,?$A $A B B Bf%,,h^<<<0DJJS[[O""J|,D$I$K $K L L LDJJf%,,dBZ8880DJtr+   c                    | j                                         }d}d}|dk    r| j        | _        n|t          v rF| j        d         d         dxx         || j                             t          d          z   z  cc<   d}n8|dk    rd}n.|t          v r| j        | _        n|dk    r| j	        | _        n|d	k    rL| j
                            t          d
         dd           | j        d         d         dxx         dz  cc<   d}n|dv rL| j
                            t          d
         dd           | j        d         d         dxx         |z  cc<   d}na|t          u r5| j
                            t          d
         dd           | j        | _        n#| j        d         d         dxx         |z  cc<   d}|r| j        d         d         d                             t                     | j        d         d         d<   | j        d         d d         D ]L\  }}| j        d         d         d         |k    r*| j
                            t          d
         dd            nM|r|                                  dS )NTFr]   r0   rZ   r   r   r   r{   r-   r|   r.   r?   r   r   rX   r   zeof-in-attribute-namezduplicate-attribute)r   rG   beforeAttributeValueStater"   r   r$   r   r   afterAttributeNameStater   r1   rH   r   r   r!   rt   r   ry   )r'   r0   leavingThisState	emitTokenrp   _s         r*   r   z HTMLTokenizer.attributeNameStatew  s   {!!	3;;7DJJ\!!f%b)!,,,&&|T::1; ;,,,$S[[ II_$$5DJJS[[6DJJXO""J|,D,?$A $A B B Bf%b)!,,,8,,,$_$$O""J|,D$I$K $K L L L f%b)!,,,4,,,$S[[O""J|,D,C$E $E F F FDJJf%b)!,,,4,,,$ 	(
 !&)"-a0::;KLL f%b)!,,V4SbS9  a$V,R03t;;O**J|4L,A,C ,C D D DE <
  (%%'''tr+   c                    | j                                         }|t          v r"| j                             t          d           n|dk    r| j        | _        n|dk    r|                                  nq|t          v r0| j        d         	                    |dg           | j
        | _        n8|dk    r| j        | _        n$|dk    rW| j        	                    t          d         d	d
           | j        d         	                    ddg           | j
        | _        n|dv rW| j        	                    t          d         dd
           | j        d         	                    |dg           | j
        | _        nl|t          u r5| j        	                    t          d         dd
           | j        | _        n.| j        d         	                    |dg           | j
        | _        dS )NTr]   r   r0   r:   r   r{   r-   r|   r.   r?   r   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   rG   r   r   r   r"   ry   r   r$   rH   r   r   r1   r   r   r!   r   s     r*   r   z%HTMLTokenizer.afterAttributeNameState  s   {!!?""K""?D9999S[[7DJJS[[!!####\!!f%,,dBZ8880DJJS[[6DJJXO""J|,D,?$A $A B B Bf%,,h^<<<0DJJ_$$O""J|,D$L$N $N O O Of%,,dBZ8880DJJS[[O""J|,D$E$G $G H H HDJJf%,,dBZ8880DJtr+   c                    | j                                         }|t          v r"| j                             t          d           n|dk    r| j        | _        n|dk    r(| j        | _        | j                             |           ny|dk    r| j        | _        ne|dk    r>| j	        
                    t          d         dd           |                                  n!|d	k    rV| j	        
                    t          d         d
d           | j        d         d         dxx         dz  cc<   | j        | _        n|dv rV| j	        
                    t          d         dd           | j        d         d         dxx         |z  cc<   | j        | _        nk|t          u r5| j	        
                    t          d         dd           | j        | _        n-| j        d         d         dxx         |z  cc<   | j        | _        dS )NTr   rW   r   r   r-   z.expected-attribute-value-but-got-right-bracketr.   r{   r|   r0   rZ   r
   r?   )r]   rX   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)r   rG   r   r   attributeValueDoubleQuotedStater"   attributeValueUnQuotedStaterN   attributeValueSingleQuotedStater1   rH   r   ry   r$   r   r!   r   s     r*   r   z'HTMLTokenizer.beforeAttributeValueState  sG   {!!?""K""?D9999T\\=DJJS[[9DJKd####S[[=DJJS[[O""J|,D$T$V $V W W W!!####XO""J|,D,?$A $A B B Bf%b)!,,,8,,,9DJJ_$$O""J|,D$H$J $J K K Kf%b)!,,,4,,,9DJJS[[O""J|,D$J$L $L M M MDJJf%b)!,,,4,,,9DJtr+   c                 *   | j                                         }|dk    r| j        | _        n|dk    r|                     d           n|dk    rJ| j                            t          d         dd           | j        d         d         d	xx         d
z  cc<   nz|t          u r5| j                            t          d         dd           | j
        | _        n<| j        d         d         d	xx         || j                             d          z   z  cc<   dS )Nr   rW   r{   r-   r|   r.   r0   rZ   r
   r?   z#eof-in-attribute-value-double-quote)r   rW   r{   Tr   rG   afterAttributeValueStater"   rn   r1   rH   r   r$   r   r!   r   r   s     r*   r   z-HTMLTokenizer.attributeValueDoubleQuotedState  sD   {!!4<<6DJJS[[))#....XO""J|,D,?$A $A B B Bf%b)!,,,8,,,,S[[O""J|,D$I$K $K L L LDJJf%b)!,,,&&'<==1> >,,,tr+   c                 *   | j                                         }|dk    r| j        | _        n|dk    r|                     d           n|dk    rJ| j                            t          d         dd           | j        d         d         d	xx         d
z  cc<   nz|t          u r5| j                            t          d         dd           | j
        | _        n<| j        d         d         d	xx         || j                             d          z   z  cc<   dS )Nr   rW   r{   r-   r|   r.   r0   rZ   r
   r?   z#eof-in-attribute-value-single-quote)r   rW   r{   Tr   r   s     r*   r   z-HTMLTokenizer.attributeValueSingleQuotedState  sD   {!!3;;6DJJS[[))#....XO""J|,D,?$A $A B B Bf%b)!,,,8,,,,S[[O""J|,D$I$K $K L L LDJJf%b)!,,,&&';<<1= =,,,tr+   c           	      2   | j                                         }|t          v r| j        | _        nf|dk    r|                     d           nI|dk    r|                                  n-|dv rJ| j                            t          d         dd           | j
        d         d         d	xx         |z  cc<   n|d
k    rJ| j                            t          d         dd           | j
        d         d         d	xx         dz  cc<   n|t          u r5| j                            t          d         dd           | j        | _        nQ| j
        d         d         d	xx         || j                             t          d          t          z            z   z  cc<   dS )NrW   r   )r   r   r]   rX   r   r-   z0unexpected-character-in-unquoted-attribute-valuer.   r0   rZ   r
   r{   r|   r?   z eof-in-attribute-value-no-quotes)rW   r   r   r   r]   rX   r   r{   T)r   rG   r   r   r"   rn   ry   r1   rH   r   r$   r   r!   r   rK   r   s     r*   r   z)HTMLTokenizer.attributeValueUnQuotedState  s   {!!?""6DJJS[[))#....S[[!!####...O""J|,D$V$X $X Y Y Yf%b)!,,,4,,,,XO""J|,D,?$A $A B B Bf%b)!,,,8,,,,S[[O""J|,D$F$H $H I I IDJJf%b)!,,,t{7M7MGHH?Z8\ 8\ 1\ \,,,tr+   c                    | j                                         }|t          v r| j        | _        n|dk    r|                                  n|dk    r| j        | _        n|t          u rO| j        	                    t          d         dd           | j                             |           | j        | _        nN| j        	                    t          d         dd           | j                             |           | j        | _        dS )Nr   r   r-   z$unexpected-EOF-after-attribute-valuer.   z*unexpected-character-after-attribute-valueT)r   rG   r   r   r"   ry   r   r   r1   rH   r   rN   r!   r   s     r*   r   z&HTMLTokenizer.afterAttributeValueState.  s   {!!?""6DJJS[[!!####S[[6DJJS[[O""J|,D$J$L $L M M MKd###DJJO""J|,D$P$R $R S S SKd###6DJtr+   c                    | j                                         }|dk    rd| j        d<   |                                  n|t          u rO| j                            t          d         dd           | j                             |           | j	        | _
        nN| j                            t          d         dd           | j                             |           | j        | _
        dS )Nr   Trs   r-   z#unexpected-EOF-after-solidus-in-tagr.   z)unexpected-character-after-solidus-in-tag)r   rG   r$   ry   r   r1   rH   r   rN   r!   r"   r   r   s     r*   r   z&HTMLTokenizer.selfClosingStartTagStateB  s    {!!3;;/3Dm,!!####S[[O""J|,D$I$K $K L L L Kd###DJJO""J|,D$O$Q $Q R R RKd###6DJtr+   c                     | j                             d          }|                    dd          }| j                            t
          d         |d           | j                                          | j        | _        dS )Nr   r{   r?   Commentr.   T)	r   r   replacer1   rH   r   rG   r!   r"   r   s     r*   r   zHTMLTokenizer.bogusCommentStateT  sz     {%%c**||Hh//	*D99	; 	; 	;
 	^
tr+   c                    | j                                         g}|d         dk    r]|                    | j                                                    |d         dk    r#t          d         dd| _        | j        | _        dS n|d         dv rjd}dD ]<}|                    | j                                                    |d         |vrd	} n=|r&t          d
         dd d dd| _        | j        | _        dS n|d         dk    r| j        | j        j	        j
        r| j        j	        j
        d         j        | j        j	        j        k    rSd}dD ]>}|                    | j                                                    |d         |k    rd	} n?|r| j        | _        dS | j                            t          d         dd           |r.| j                             |                                           |.| j        | _        dS )NrZ   r   r   r:   r.   T)dD))oOrS   CtTyYpPeEFDoctype)r/   rp   publicIdsystemIdcorrect[)r   r   Ar  r  r  r-   zexpected-dashes-or-doctype)r   rG   rH   r   r$   commentStartStater"   doctypeStater   treeopenElements	namespacedefaultNamespacecdataSectionStater1   rN   r3   r   )r'   rR   matchedexpecteds       r*   r   z(HTMLTokenizer.markupDeclarationOpenStatec  sS   [%%''(	R=CT[--//000}##-7	-BB$O$O!!3
t $ r]j((GA    !1!1!3!3444R=00#GE 1  -7	-B-/15404%6 %6! ".
t ms""k%k+ &k+B/9T[=M=^^^G:    !1!1!3!3444R=H,,#GE -  !3
t
<(@ < >  > 	? 	? 	?  	/Kimmoo...  	/+
tr+   c                    | j                                         }|dk    r| j        | _        n|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|dk    rT| j                            t          d         d	d           | j                            | j                   | j        | _        n~|t          u rT| j                            t          d         d
d           | j                            | j                   | j        | _        n!| j        dxx         |z  cc<   | j
        | _        dS )Nr   r{   r-   r|   r.   r0   r?   r   incorrect-commenteof-in-commentT)r   rG   commentStartDashStater"   r1   rH   r   r$   r!   r   commentStater   s     r*   r  zHTMLTokenizer.commentStartState  sn   {!!3;;3DJJXO""J|,D,?$A $A B B Bf%%%1%%%%S[[O""J|,D$7$9 $9 : : :O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7O""4#4555DJJf%%%-%%%*DJtr+   c                    | j                                         }|dk    r| j        | _        n|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|dk    rT| j                            t          d         d	d           | j                            | j                   | j        | _        n|t          u rT| j                            t          d         d
d           | j                            | j                   | j        | _        n$| j        dxx         d|z   z  cc<   | j
        | _        dS )Nr   r{   r-   r|   r.   r0      -�r   r  r  T)r   rG   commentEndStater"   r1   rH   r   r$   r!   r   r  r   s     r*   r  z#HTMLTokenizer.commentStartDashState  sr   {!!3;;-DJJXO""J|,D,?$A $A B B Bf%%%2%%%%S[[O""J|,D$7$9 $9 : : :O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7O""4#4555DJJf%%%t3%%%*DJtr+   c                     | j                                         }|dk    r| j        | _        n|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|t          u rT| j                            t          d         dd           | j                            | j                   | j	        | _        n0| j        dxx         || j         
                    d	          z   z  cc<   d
S )Nr   r{   r-   r|   r.   r0   r?   r  )r   r{   T)r   rG   commentEndDashStater"   r1   rH   r   r$   r   r!   r   r   s     r*   r  zHTMLTokenizer.commentState  s#   {!!3;;1DJJXO""J|,D,?$A $A B B Bf%%%1%%%%S[[O""J|,D,<$> $> ? ? ?O""4#4555DJJf%%%&&77*8 8%%%tr+   c                     | j                                         }|dk    r| j        | _        n|dk    rJ| j                            t          d         dd           | j        dxx         dz  cc<   | j        | _        n|t          u rT| j                            t          d         dd           | j                            | j                   | j
        | _        n$| j        dxx         d|z   z  cc<   | j        | _        d	S )
Nr   r{   r-   r|   r.   r0   r!  zeof-in-comment-end-dashT)r   rG   r"  r"   r1   rH   r   r$   r  r   r!   r   s     r*   r$  z!HTMLTokenizer.commentEndDashState  s#   {!!3;;-DJJXO""J|,D,?$A $A B B Bf%%%2%%%*DJJS[[O""J|,D$=$? $? @ @ @O""4#4555DJJf%%%t3%%%*DJtr+   c                    | j                                         }|dk    r-| j                            | j                   | j        | _        ny|dk    rK| j                            t          d         dd           | j        dxx         dz  cc<   | j        | _        n(|dk    r5| j                            t          d         d	d           | j	        | _        n|d
k    r>| j                            t          d         dd           | j        dxx         |z  cc<   n|t          u rT| j                            t          d         dd           | j                            | j                   | j        | _        nL| j                            t          d         dd           | j        dxx         d|z   z  cc<   | j        | _        dS )Nr   r{   r-   r|   r.   r0   u   --�r   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   rG   r1   rH   r$   r!   r"   r   r  commentEndBangStater   r   s     r*   r"  zHTMLTokenizer.commentEndState  s   {!!3;;O""4#4555DJJXO""J|,D,?$A $A B B Bf%%%3%%%*DJJS[[O""J|,D$R$T $T U U U1DJJS[[O""J|,D$R$T $T U U Uf%%%-%%%%S[[O""J|,D$@$B $B C C CO""4#4555DJJ O""J|,D$@$B $B C C Cf%%%4%%%*DJtr+   c                    | j                                         }|dk    r,| j                            | j                   | j        | _        n|dk    r"| j        dxx         dz  cc<   | j        | _        n|dk    rJ| j                            t          d         dd           | j        dxx         d	z  cc<   | j	        | _        n|t          u rT| j                            t          d         d
d           | j                            | j                   | j        | _        n$| j        dxx         d|z   z  cc<   | j	        | _        dS )Nr   r   r0   z--!r{   r-   r|   r.   u   --!�zeof-in-comment-end-bang-stateT)r   rG   r1   rH   r$   r!   r"   r$  r   r  r   r   s     r*   r'  z!HTMLTokenizer.commentEndBangState  sq   {!!3;;O""4#4555DJJS[[f%%%.%%%1DJJXO""J|,D,?$A $A B B Bf%%%4%%%*DJJS[[O""J|,D$C$E $E F F FO""4#4555DJJf%%%5%%%*DJtr+   c                    | j                                         }|t          v r| j        | _        n|t
          u r^| j                            t          d         dd           d| j	        d<   | j                            | j	                   | j
        | _        nN| j                            t          d         dd           | j                             |           | j        | _        dS )Nr-   !expected-doctype-name-but-got-eofr.   Fr  zneed-space-after-doctypeT)r   rG   r   beforeDoctypeNameStater"   r   r1   rH   r   r$   r!   rN   r   s     r*   r  zHTMLTokenizer.doctypeState  s    {!!?""4DJJS[[O""J|,D$G$I $I J J J+0Di(O""4#4555DJJO""J|,D$>$@ $@ A A AKd###4DJtr+   c                    | j                                         }|t          v rn&|dk    r^| j                            t
          d         dd           d| j        d<   | j                            | j                   | j        | _        n|dk    r?| j                            t
          d         dd           d	| j        d
<   | j	        | _        n}|t          u r^| j                            t
          d         dd           d| j        d<   | j                            | j                   | j        | _        n|| j        d
<   | j	        | _        dS )Nr   r-   z+expected-doctype-name-but-got-right-bracketr.   Fr  r{   r|   r?   rp   r*  T)r   rG   r   r1   rH   r   r$   r!   r"   doctypeNameStater   r   s     r*   r+  z$HTMLTokenizer.beforeDoctypeNameState*  sp   {!!?""S[[O""J|,D$Q$S $S T T T+0Di(O""4#4555DJJXO""J|,D,?$A $A B B B(0Df%.DJJS[[O""J|,D$G$I $I J J J+0Di(O""4#4555DJJ(,Df%.DJtr+   c                 p   | j                                         }|t          v r;| j        d                             t
                    | j        d<   | j        | _        nX|dk    rY| j        d                             t
                    | j        d<   | j        	                    | j                   | j
        | _        n|dk    rJ| j        	                    t          d         dd           | j        dxx         dz  cc<   | j        | _        n|t          u r| j        	                    t          d         dd           d	| j        d
<   | j        d                             t
                    | j        d<   | j        	                    | j                   | j
        | _        n| j        dxx         |z  cc<   dS )Nrp   r   r{   r-   r|   r.   r?   zeof-in-doctype-nameFr  T)r   rG   r   r$   rt   r   afterDoctypeNameStater"   r1   rH   r!   r   r-  r   r   s     r*   r-  zHTMLTokenizer.doctypeNameStateD  s   {!!?""(,(9&(A(K(KL\(](]Df%3DJJS[[(,(9&(A(K(KL\(](]Df%O""4#4555DJJXO""J|,D,?$A $A B B Bf%%%1%%%.DJJS[[O""J|,D$9$; $; < < <+0Di((,(9&(A(K(KL\(](]Df%O""4#4555DJJf%%%-%%%tr+   c                 ^   | j                                         }|t          v rn|dk    r-| j                            | j                   | j        | _        nU|t          u rxd| j        d<   | j         	                    |           | j                            t          d         dd           | j                            | j                   | j        | _        n|dv r9d}d	D ]#}| j                                         }||vrd} n$|r| j        | _        dS n<|d
v r8d}dD ]#}| j                                         }||vrd} n$|r| j        | _        dS | j         	                    |           | j                            t          d         dd|id           d| j        d<   | j        | _        dS )Nr   Fr  r-   eof-in-doctyper.   r  T))uU)bB)lL)iIr   sS)r  r:  r   r	  )mMz*expected-space-or-right-bracket-in-doctyper0   r<   )r   rG   r   r1   rH   r$   r!   r"   r   rN   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r'   r0   r  r  s       r*   r/  z#HTMLTokenizer.afterDoctypeNameState]  s   {!!?""S[[O""4#4555DJJS[[+0Di(Kd###O""J|,D$4$6 $6 7 7 7O""4#4555DJJz!!!9  H;++--D8++"' ,   !%!DDJ4  ##!9  H;++--D8++"' ,   !%!DDJ4 Kd###O""J|,D$P%+TN$4 $4 5 5 5 ,1Di(/DJtr+   c                 $   | j                                         }|t          v r| j        | _        n|dv rO| j                            t          d         dd           | j                             |           | j        | _        n|t          u r^| j                            t          d         dd           d| j
        d<   | j                            | j
                   | j        | _        n&| j                             |           | j        | _        dS 	N)r   r   r-   unexpected-char-in-doctyper.   r1  Fr  T)r   rG   r   "beforeDoctypePublicIdentifierStater"   r1   rH   r   rN   r   r$   r!   r   s     r*   r?  z,HTMLTokenizer.afterDoctypePublicKeywordState     {!!?""@DJJZO""J|,D$@$B $B C C CKd###@DJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJKd###@DJtr+   c                    | j                                         }|t          v rnE|dk    rd| j        d<   | j        | _        n'|dk    rd| j        d<   | j        | _        n	|dk    r^| j                            t          d         dd           d	| j        d
<   | j                            | j                   | j
        | _        n|t          u r^| j                            t          d         dd           d	| j        d
<   | j                            | j                   | j
        | _        n>| j                            t          d         dd           d	| j        d
<   | j        | _        dS )Nr   r:   r  r   r   r-   unexpected-end-of-doctyper.   Fr  r1  rD  T)r   rG   r   r$   (doctypePublicIdentifierDoubleQuotedStater"   (doctypePublicIdentifierSingleQuotedStater1   rH   r   r!   r   rA  r   s     r*   rE  z0HTMLTokenizer.beforeDoctypePublicIdentifierState  s   {!!?""T\\,.Dj)FDJJS[[,.Dj)FDJJS[[O""J|,D$?$A $A B B B+0Di(O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJO""J|,D$@$B $B C C C+0Di(/DJtr+   c                    | j                                         }|dk    r| j        | _        n$|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|dk    r^| j                            t          d         d	d           d
| j        d<   | j                            | j                   | j        | _        n||t          u r^| j                            t          d         dd           d
| j        d<   | j                            | j                   | j        | _        n| j        dxx         |z  cc<   dS )Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  T
r   rG   !afterDoctypePublicIdentifierStater"   r1   rH   r   r$   r!   r   r   s     r*   rI  z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedState     {!!4<<?DJJXO""J|,D,?$A $A B B Bj)))X5))))S[[O""J|,D$?$A $A B B B+0Di(O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJj)))T1)))tr+   c                    | j                                         }|dk    r| j        | _        n$|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|dk    r^| j                            t          d         d	d           d
| j        d<   | j                            | j                   | j        | _        n||t          u r^| j                            t          d         dd           d
| j        d<   | j                            | j                   | j        | _        n| j        dxx         |z  cc<   dS )Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  TrL  r   s     r*   rJ  z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedState     {!!3;;?DJJXO""J|,D,?$A $A B B Bj)))X5))))S[[O""J|,D$?$A $A B B B+0Di(O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJj)))T1)))tr+   c                 *   | j                                         }|t          v r| j        | _        nb|dk    r-| j                            | j                   | j        | _        n/|dk    r?| j                            t          d         dd           d| j        d<   | j
        | _        n|dk    r?| j                            t          d         dd           d| j        d<   | j        | _        n|t          u r^| j                            t          d         d	d           d
| j        d<   | j                            | j                   | j        | _        n>| j                            t          d         dd           d
| j        d<   | j        | _        dS )Nr   r   r-   rD  r.   r:   r  r   r1  Fr  T)r   rG   r   -betweenDoctypePublicAndSystemIdentifiersStater"   r1   rH   r$   r!   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   rA  r   s     r*   rM  z/HTMLTokenizer.afterDoctypePublicIdentifierState  s   {!!?""KDJJS[[O""4#4555DJJS[[O""J|,D$@$B $B C C C,.Dj)FDJJS[[O""J|,D$@$B $B C C C,.Dj)FDJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJO""J|,D$@$B $B C C C+0Di(/DJtr+   c                 t   | j                                         }|t          v rn|dk    r,| j                            | j                   | j        | _        n|dk    rd| j        d<   | j        | _        n|dk    rd| j        d<   | j	        | _        n|t          k    r^| j                            t          d         dd           d	| j        d
<   | j                            | j                   | j        | _        n>| j                            t          d         dd           d	| j        d
<   | j        | _        dS )Nr   r   r:   r  r   r-   r1  r.   Fr  rD  T)r   rG   r   r1   rH   r$   r!   r"   rS  rT  r   r   rA  r   s     r*   rR  z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersState  sK   {!!?""S[[O""4#4555DJJS[[,.Dj)FDJJS[[,.Dj)FDJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJO""J|,D$@$B $B C C C+0Di(/DJtr+   c                 $   | j                                         }|t          v r| j        | _        n|dv rO| j                            t          d         dd           | j                             |           | j        | _        n|t          u r^| j                            t          d         dd           d| j
        d<   | j                            | j
                   | j        | _        n&| j                             |           | j        | _        dS rC  )r   rG   r   "beforeDoctypeSystemIdentifierStater"   r1   rH   r   rN   r   r$   r!   r   s     r*   r@  z,HTMLTokenizer.afterDoctypeSystemKeywordState)  rF  r+   c                    | j                                         }|t          v rnE|dk    rd| j        d<   | j        | _        n'|dk    rd| j        d<   | j        | _        n	|dk    r^| j                            t          d         dd           d	| j        d
<   | j                            | j                   | j
        | _        n|t          u r^| j                            t          d         dd           d	| j        d
<   | j                            | j                   | j
        | _        n>| j                            t          d         dd           d	| j        d
<   | j        | _        dS )Nr   r:   r  r   r   r-   rD  r.   Fr  r1  T)r   rG   r   r$   rS  r"   rT  r1   rH   r   r!   r   rA  r   s     r*   rW  z0HTMLTokenizer.beforeDoctypeSystemIdentifierState=  s   {!!?""T\\,.Dj)FDJJS[[,.Dj)FDJJS[[O""J|,D$@$B $B C C C+0Di(O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJO""J|,D$@$B $B C C C+0Di(/DJtr+   c                    | j                                         }|dk    r| j        | _        n$|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|dk    r^| j                            t          d         d	d           d
| j        d<   | j                            | j                   | j        | _        n||t          u r^| j                            t          d         dd           d
| j        d<   | j                            | j                   | j        | _        n| j        dxx         |z  cc<   dS )Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  T
r   rG   !afterDoctypeSystemIdentifierStater"   r1   rH   r   r$   r!   r   r   s     r*   rS  z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStateZ  rN  r+   c                    | j                                         }|dk    r| j        | _        n$|dk    r>| j                            t          d         dd           | j        dxx         dz  cc<   n|dk    r^| j                            t          d         d	d           d
| j        d<   | j                            | j                   | j        | _        n||t          u r^| j                            t          d         dd           d
| j        d<   | j                            | j                   | j        | _        n| j        dxx         |z  cc<   dS )Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  TrZ  r   s     r*   rT  z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStater  rP  r+   c                    | j                                         }|t          v rn|dk    r,| j                            | j                   | j        | _        n|t          u r^| j                            t          d         dd           d| j        d<   | j                            | j                   | j        | _        n4| j                            t          d         dd           | j
        | _        dS )	Nr   r-   r1  r.   Fr  rD  T)r   rG   r   r1   rH   r$   r!   r"   r   r   rA  r   s     r*   r[  z/HTMLTokenizer.afterDoctypeSystemIdentifierState  s    {!!?""S[[O""4#4555DJJS[[O""J|,D$4$6 $6 7 7 7+0Di(O""4#4555DJJO""J|,D$@$B $B C C C/DJtr+   c                 <   | j                                         }|dk    r,| j                            | j                   | j        | _        nP|t          u rF| j                             |           | j                            | j                   | j        | _        n	 dS )Nr   T)	r   rG   r1   rH   r$   r!   r"   r   rN   r   s     r*   rA  zHTMLTokenizer.bogusDoctypeState  s    {!!3;;O""4#4555DJJS[[Kd###O""4#4555DJJtr+   c                    g }	 |                     | j                            d                     |                     | j                            d                     | j                                        }|t          k    rnF|dk    sJ |d         dd          dk    r|d         d d         |d<   n|                     |           d                    |          }|                    d          }|d	k    rPt          |          D ]*}| j                             t          d
         dd           +|
                    dd          }|r(| j                             t          d         |d           | j        | _        dS )NT]r   rZ   z]]r:   r{   r   r-   r|   r.   r?   r_   )rH   r   r   rG   r   rJ   countranger1   r   r   r!   r"   )r'   r0   rG   	nullCountr   s        r*   r  zHTMLTokenizer.cdataSectionState  s   	&KK..s33444KK..s33444;##%%Ds{{s{{{{8BCC=D((#Bx}DHKK%%%	& wwt}}JJx((	q==9%% F F&&
<0H0C(E (E F F F F<<(33D 	3O""J|,D,0$2 $2 3 3 3^
tr+   )Nr   )N__name__
__module____qualname____doc__r&   r6   rU   rl   rn   ry   r!   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r$  r"  r'  r  r+  r-  r/  r?  rE  rI  rJ  rM  rR  r@  rW  rS  rT  r[  rA  r  __classcell__)r)   s   @r*   r   r      s       
 
. . . . . .0 0 0 F F FPNT NT NT NT`H H H
$ $ $8  :  
  :  
  $  $  ! ! !F  0  ,	 	 		 	 	  8	 	 		 	 	  8  	 	 	  8      (  (  ,  	 	 	  8     *  .  2	 	 	     <4 4 4l  @     D  &  &  2  (  $  + + +Z  .  .  $  &  >  .  "  4  21 1 1f  (  :  0  0  <  4  (  :  0  0  &        r+   r   N) 
__future__r   r   r   sixr   rL   collectionsr   r   sysr	   	constantsr   r   r   r   r   r   r   r   r   r   _inputstreamr   _trier   r`   dictru   objectr    r+   r*   <module>rt     si   B B B B B B B B B B       * * * * * * * *       & & & & & &       5 5 5 5 5 5 5 5 - - - - - - - - - - 0 0 0 0 0 0 0 0 , , , , , , ) ) ) ) ) )      tH~~6LLLl l l l lF l l l l lr+   