
    Ng                     V   U d Z ddlmZmZmZ ddlmZmZ ddlm	Z	 de
e         fdZdeee	f         fdZ eej                  Zd eD             Zeeeef         eej                 f         ed	<   d
 eD             Zeeeej                 f         ed<   i dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej        dej         dej!        dej"        dej#        dej$        dej%        i dej&        dej'        dej'        d ej'        d!ej'        d"ej(        d#ej)        d$ej*        d%ej+        d&ej,        d'ej-        d(ej.        d)ej/        d*ej0        d+ej1        d,ej2        d-ej3        ej4        ej        ej5        ej        ej6        ej7        ej8        ej9        ej:        ej;        ej<        ej=        ej>        ej?        ej@        d.ZAeeeej                 f         ed/<    e            ZBd0S )1z
This module contains mapping between:
HTML Tags <-> Elements Ontology <-> Unstructured Element classes
They are used to simplify transformations between different representations
of parsed documents
    )AnyDictType)elementsontology)Elementreturnc                     |                                  }|                                }|D ]$}|                    t          |                     %|S )z
    Recursively find all subclasses of a given class.

    Parameters:
    cls (type): The class for which to find all subclasses.

    Returns:
    list: A list of all subclasses of the given class.
    )__subclasses__copyextendget_all_subclasses)cls
subclassesall_subclassessubclasss       [/var/www/html/ai-engine/env/lib/python3.11/site-packages/unstructured/documents/mappings.pyr   r      sY     ##%%J__&&N < <0::;;;;    c                     i t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j	        t          j	        t           j
        t          j
        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        i t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j        t          j        t           j         t          j         t           j!        t          j         t           j"        t          j        t           j#        t          j        t           j$        t          j         t           j%        t          j         i t           j&        t          j         t           j'        t          j(        t           j)        t          j(        t           j*        t          j*        t           j+        t          j*        t           j,        t          j        t           j-        t          j        t           j.        t          j        t           j/        t          j        t           j0        t          j        t           j1        t          j        t           j2        t          j        t           j3        t          j        t           j4        t          j        t           j5        t          j        t           j6        t          j        t           j7        t          j        i t           j8        t          j        t           j9        t          j        t           j:        t          j        t           j;        t          j        t           j<        t          j        t           j=        t          j        t           j>        t          j        t           j?        t          j        t           j@        t          j        t           jA        t          jA        t           jB        t          jB        t           jC        t          j        t           jD        t          j        t           jE        t          j        t           jF        t          j        t           jG        t          j        t           jH        t          j	        t           jI        t          j        t           jJ        t          j        t           jK        t          j        i} | S )au  
    Get a mapping of ontology element to unstructured type.

    The dictionary here was created base on ontology mapping json
    Can be generated via the following code:
    ```
        ontology_elements_list = json.loads(
            Path("unstructured_element_ontology.json").read_text()
        )
        ontology_to_unstructured_class_mapping = {
            ontology_element["name"]: ontology_element["ontologyV1Mapping"]
            for ontology_element in ontology_elements_list
    }
    ```

    Returns:
    dict: A dictionary where keys are ontology element classes
          and values are unstructured types.
    )Lr   Documentr   TextSectionPageColumn	ParagraphNarrativeTextHeaderFooterSidebar	PageBreakTitleSubtitleHeadingQuoteFootnoteCaptionFigureCaption
PageNumberUncategorizedTextOrderedListUnorderedListDefinitionListListItemTableTableRow	TableCellTableCellHeader	TableBodyTableHeaderImageFigureVideoAudioBarcodeQRCodeLogo	CodeBlockCodeSnippet
InlineCodeFormulaEquationFootnoteReferenceCitationBibliographyGlossaryAuthorMetaDateKeywordsAbstract	HyperlinkTableOfContentsIndexForm	FormFieldFormFieldValueCheckboxRadioButtonButtonComment	HighlightRevisionInsertionRevisionDeletionAddressEmailAddressPhoneNumberCalendarDateTimeCurrencyMeasurement
Letterhead	Signature	WatermarkStamp)&ontology_to_unstructured_class_mappings    r   )get_ontology_to_unstructured_type_mappingra   !   so   (H.8=H.(-H. 	x}H. 		H.
 	H2H. 	H. 	H. 	(-H. 	H.H. 	H. 	8>H. 	(.H. 	 6H. 	.H. 	8=H.  	(0!H." 	X0#H. H.$ 	"HM%H.& 	hm'H.( 	)H.* 	+H., 	8,-H.. 	/H.0 	8>1H.2 	HN3H.4 	 (.5H.6 	HN7H.8 	hn9H.: 	;H.< 	=H.> 	?H.@ 	AH.B 	(.CH.D 	EH. H. H.F 	x~GH.H 	H0IH.J 	X1KH.L 	(*MH.N 	8+OH.P 	"HMQH.R 	8=SH.T 	x}UH.V 	8=WH.X 	YH.Z 	8=[H.\ 	8=]H.^ 	81_H.` 	HMaH.b 	 (.cH.d 	eH.f 	x}gH. H. H.h 	HMiH.j 	kH.l 	8=mH.n 	hmoH.p 	qH.r 	(-sH.t 	HMuH.v 	"HMwH.x 	!8=yH.z 	(*{H.| 	x4}H.~ 	hmH.@ 	x}AH.B 	x}CH.D 	8=EH.F 	hmGH.H 	X_IH. H.J 	HMHMOH. H.*T 21r   c                 V    i | ]&} |            j         D ]}| |            j        f|'S  allowed_tagscss_class_name.0element_typetags      r   
<dictcomp>rk      sa     d d d|~~*d d 	 	,,..
'(,d d d dr   )HTML_TAG_AND_CSS_NAME_TO_ELEMENT_TYPE_MAPc                 R    i | ]$} |            j         D ]} |            j        |%S rc   rd   rg   s      r   rk   rk      s\     L L L|~~*L L 	 LNN!<L L L Lr   CSS_CLASS_TO_ELEMENT_TYPE_MAPaaddressasideaudio
blockquotebodybuttoncitecodedeldivdl
figcaptionfigurefooterformh1h2h3h4h5h6headerhrimginputinslabellimarkmathmetanavol)ppresectionspansubsvgtabletbodytdththeadtimetrulvideo$HTML_TAG_TO_DEFAULT_ELEMENT_TYPE_MAPN)C__doc__typingr   r   r   unstructured.documentsr   r   unstructured.documents.elementsr   listr   dictstrra   OntologyElementALL_ONTOLOGY_ELEMENT_TYPESrl   tuple__annotations__rn   rH   rU   r   r7   r$   r   rP   rA   r;   rT   r)   r,   r&   r5   r   rK   r!   r"   r#   r   r    r4   rN   rS   rL   r-   rR   r?   rF   rJ   r*   r   r   r@   r]   r.   r2   r0   r1   r3   rY   r/   r+   r6   r   +ONTOLOGY_CLASS_TO_UNSTRUCTURED_ELEMENT_TYPErc   r   r   <module>r      s)     # " " " " " " " " " 5 5 5 5 5 5 5 5 3 3 3 3 3 3tCy    &^24W3E ^2 ^2 ^2 ^2B 0/0HII d d2d d d )4c3hhF^A_0_+`   
L L2L L L tCh.F)G$GH   2S	2Sx2S X2S X^	2S
 (.2S H2S ho2S H2S H2S 
8$2S 
8%2S 	(
!2S ("2S ho2S ho2S  HM!2S" 	(.#2S 2S$ 	(
%2S& 	(
'2S( 	(
)2S* 	(
+2S, 	(
-2S. ho/2S0 	(
12S2 
8>32S4 X52S6 
8%72S8 X92S: 	(
;2S< H=2S> H?2S@ HA2SB 
8>C2SD 	(
E2S 2SF 
	&%^



"!M



 ^c2S 2S 2S $d3X5M0N+N&O 2 2 2j /X.W.Y.Y + + +r   