
    Ngj#                     z    d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ  G d de          ZdS )	    N)PathLike)Path)AnyCallableDictIteratorOptionalUnion)Document)
BaseLoaderc                   &   e Zd ZdZ	 	 	 	 	 ddeeef         dedee         dee         d	ee	e
e
ge
f                  d
edefdZdee         fdZdededee         fdZdedefdZde
eef         dede
eef         fdZdeddfdZdeddfdZdS )
JSONLoadera  
    Load a `JSON` file using a `jq` schema.

    Setup:
        .. code-block:: bash

            pip install -U jq

    Instantiate:
        .. code-block:: python

            from langchain_community.document_loaders import JSONLoader
            import json
            from pathlib import Path

            file_path='./sample_quiz.json'
            data = json.loads(Path(file_path).read_text())
            loader = JSONLoader(
                     file_path=file_path,
                     jq_schema='.quiz',
                     text_content=False)

    Load:
        .. code-block:: python

            docs = loader.load()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}

    Async load:
        .. code-block:: python

            docs = await loader.aload()
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quizg
            .json', 'seq_num': 1}

    Lazy load:
        .. code-block:: python

            docs = []
            docs_lazy = loader.lazy_load()

            # async variant:
            # docs_lazy = await loader.alazy_load()

            for doc in docs_lazy:
                docs.append(doc)
            print(docs[0].page_content[:100])
            print(docs[0].metadata)

        .. code-block:: python

            {"sport": {"q1": {"question": "Which one is correct team name in
            NBA?", "options": ["New York Bulls"
            {'source': '/sample_quiz
            .json', 'seq_num': 1}
    NFT	file_path	jq_schemacontent_keyis_content_key_jq_parsablemetadata_functext_content
json_linesc                     	 ddl }|| _         n# t          $ r t          d          w xY wt          |                                          | _        |                    |          | _        || _        || _        || _	        || _
        || _        dS )a~  Initialize the JSONLoader.

        Args:
            file_path (Union[str, PathLike]): The path to the JSON or JSON Lines file.
            jq_schema (str): The jq schema to use to extract the data or text from
                the JSON.
            content_key (str): The key to use to extract the content from
                the JSON if the jq_schema results to a list of objects (dict).
                If is_content_key_jq_parsable is True, this has to be a jq compatible
                schema. If is_content_key_jq_parsable is False, this should be a simple
                string key.
            is_content_key_jq_parsable (bool): A flag to determine if
                content_key is parsable by jq or not. If True, content_key is
                treated as a jq schema and compiled accordingly. If False or if
                content_key is None, content_key is used as a simple string.
                Default is False.
            metadata_func (Callable[Dict, Dict]): A function that takes in the JSON
                object extracted by the jq_schema and the default metadata and returns
                a dict of the updated metadata.
            text_content (bool): Boolean flag to indicate whether the content is in
                string format, default to True.
            json_lines (bool): Boolean flag to indicate whether the input is in
                JSON Lines format.
        r   Nz=jq package not found, please install it with `pip install jq`)jqImportErrorr   resolver   compile
_jq_schema_is_content_key_jq_parsable_content_key_metadata_func_text_content_json_lines)	selfr   r   r   r   r   r   r   r   s	            l/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/json_loader.py__init__zJSONLoader.__init__T   s    D	IIIDGG 	 	 	O  	
 i0022**Y//+E('+)%s    (returnc              #   z  K   d}| j         rt| j                            d          5 }|D ]:}|                                }|r"|                     ||          D ]}|V  |dz  };	 ddd           dS # 1 swxY w Y   dS |                     | j                            d          |          D ]}|V  |dz  }dS )z-Load and return documents from the JSON file.r   zutf-8)encoding   N)r    r   openstrip_parse	read_text)r!   indexflinedocs        r"   	lazy_loadzJSONLoader.lazy_load   s;      	$$g$66 '! ' 'D::<<D '#';;tU#;#; ' 'C"%III!QJEE'' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' {{4>#;#;W#;#M#MuUU  			
 s   >A33A7:A7contentr,   c              #     K   | j                             t          j        |                    }| j        |                     |           | j        |                     |           t          ||dz             D ]X\  }}| 	                    |          }| 
                    |t          | j                  |          }t          ||          V  YdS )z#Convert given content to documents.Nr'   )sample)r3   sourceseq_num)page_contentmetadata)r   inputjsonloadsr   _validate_content_keyr   _validate_metadata_func	enumerate	_get_text_get_metadatastrr   r   )r!   r1   r,   datair3   textr7   s           r"   r*   zJSONLoader._parse   s      $$TZ%8%899
 (&&t,,,*((..."433 	A 	AIAv>>>00D))c$.&9&91 *  H x@@@@@@@	A 	A    r3   c                    | j         \| j        rG| j                            | j                   }|                    |                                          }n|| j                  }n|}| j        r5t          |t                    s t          dt          |           d          t          |t                    r|S t          |t                    r|rt          j        |          ndS |t          |          ndS )zConvert sample to string formatNz%Expected page_content is string, got z instead.                     Set `text_content=False` if the desired input for                     `page_content` is not a string )r   r   r   r   r8   firstr   
isinstancer@   
ValueErrortypedictr9   dumps)r!   r3   compiled_content_keyr1   s       r"   r>   zJSONLoader._get_text   s   (/ 4'+wt7H'I'I$.44V<<BBDD !23G 	?j#&>&> 	?4W 4 4 4   %% 	?N&& 	?*194:g&&&r9#*#63w<<<B>rD   additional_fieldsc                 @    | j         |                      ||          S |S )z
        Return a metadata dictionary base on the existence of metadata_func
        :param sample: single data payload
        :param additional_fields: key-word arguments to be added as metadata values
        :return:
        )r   )r!   r3   rN   s      r"   r?   zJSONLoader._get_metadata   s*     *&&v/@AAA$$rD   rA   c                    |                                 }t          |t                    s t          dt	          |           d          | j        s2|                    | j                  t          d| j         d          | j        r\| j        	                    | j                  
                    |                                          t          d| j         d          dS dS )zCheck if a content key is validztExpected the jq schema to result in a list of objects (dict),                     so sample must be a dict but got ``Nz_Expected the jq schema to result in a list of objects (dict)                     with the key `z ` which should be parsable by jq)rG   rH   rK   rI   rJ   r   getr   r   r   r8   rC   )r!   rA   r3   s      r"   r;   z JSONLoader._validate_content_key   s"    &$'' 	G7;F||G G G   0	

4,--58#'#48 8 8  
 ,	 12288@@EEGGOW#'#4W W W  		 	OOrD   c                     |                                 }| j        K|                     |i           }t          |t                    s"t	          dt          |           d          dS dS )z*Check if the metadata_func output is validNzMExpected the metadata_func to return a dict but got                         `rQ   )rG   r   rH   rK   rI   rJ   )r!   rA   r3   sample_metadatas       r"   r<   z"JSONLoader._validate_metadata_func   s     *"11&"==Oot44  3//3 3 3   +* rD   )NFNTF)__name__
__module____qualname____doc__r
   r@   r   r	   boolr   r   r#   r   r   r0   intr*   r   r>   r?   r;   r<    rD   r"   r   r      s       F FX &*5:@D! 1& 1&h'1& 1& c]	1&
 %-TN1&  $t); <=1& 1& 1& 1& 1& 1&f8H-     Ac A# A(82D A A A A&? ? ? ? ? ?4%38n%;>%	c3h% % % %# $    6
C 
D 
 
 
 
 
 
rD   r   )r9   osr   pathlibr   typingr   r   r   r   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   r   r[   rD   r"   <module>ra      s                 A A A A A A A A A A A A A A A A - - - - - - @ @ @ @ @ @l l l l l l l l l lrD   