
    Ng1                         d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
  ej        d          Z ej        d          Z G d de
          ZdS )	    N)ListTuple)Document)
BaseLoaderzBV\w+zav[0-9]+c            	       v    e Zd ZdZ	 	 	 ddee         dededefdZdee         fd	Zd
ede	ee
f         fdZdS )BiliBiliLoaderz9
    Load fetching transcripts from BiliBili videos.
     
video_urlssessdatabili_jctbuvid3c                     || _         d| _        	 ddlm} n# t          $ r t	          d          w xY w|r#|r#|r#|                    |||          | _        dS dS dS dS )a  
        Initialize the loader with BiliBili video URLs and authentication cookies.
        if no authentication cookies are provided, the loader can't get transcripts
        and will only fetch videos info.

        Args:
            video_urls (List[str]): List of BiliBili video URLs.
            sessdata (str): SESSDATA cookie value for authentication.
            bili_jct (str): BILI_JCT cookie value for authentication.
            buvid3 (str): BUVI3 cookie value for authentication.
        Nr   )videoTrequests package not found, please install it with `pip install bilibili-api-python`)r   r   r   )r
   
credentialbilibili_apir   ImportError
Credential)selfr
   r   r   r   r   s         i/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/bilibili.py__init__zBiliBiliLoader.__init__   s    $ %	******* 	 	 	4  	
  	 	V 	#..!HV /  DOOO	 	 	 	 	 	s    1returnc                     g }| j         D ]@}|                     |          \  }}t          ||          }|                    |           A|S )z
        Load and return a list of documents containing video transcripts.

        Returns:
            List[Document]: List of Document objects transcripts and metadata.
        )page_contentmetadata)r
   _get_bilibili_subs_and_infor   append)r   resultsurl
transcript
video_infodocs         r   loadzBiliBiliLoader.load5   s^     ? 	  	 C%)%E%Ec%J%J"J

ZHHHCNN3    r   c                    t                               |          }	 ddlm}m} n# t
          $ r t          d          w xY w|r/|                    |                                | j                  }nrt                              |          }|rD|                    t          |                                dd                   | j                  }nt          d|            ||                                          }|                    d	|i           | j        sd
|fS  ||                    |d                             }|                    dg           }	|	r|	d                             dd
          }
|
                    d          sd|
z   }
t#          j        |
          }|j        dk    rgt'          j        |j                                      dg           }d                    d |D                       }d|d          d|d          d| }||fS t/          j        d| d|j                    nt/          j        d| d           d
|fS )zU
        Retrieve video information and transcript for a given BiliBili URL.
        r   )syncr   r   )bvidr      N)aidr   z(Unable to find a valid video ID in URL: r   r	   cid	subtitlessubtitle_urlhttpzhttps:   body c                     g | ]
}|d          S )content ).0cs     r   
<listcomp>z>BiliBiliLoader._get_bilibili_subs_and_info.<locals>.<listcomp>k   s    *P*P*PA1Y<*P*P*Pr$   zVideo Title: titlez, description: descz

Transcript: zFailed to fetch subtitles for z. HTTP Status Code: zNo subtitles found for video: z. Returning empty transcript.)
BV_PATTERNsearchr   r&   r   r   Videogroupr   
AV_PATTERNint
ValueErrorget_infoupdateget_subtitleget
startswithrequestsstatus_codejsonloadsr2   joinwarningswarn)r   r   r'   r&   r   vr)   r!   subsub_listsub_urlresponseraw_sub_titlesraw_transcriptraw_transcript_with_meta_infos                  r   r   z*BiliBiliLoader._get_bilibili_subs_and_infoD   s      %%	000000000 	 	 	4  	
  	S$/JJAA##C((C SKKC		ABB$8$8T_KUU !QC!Q!QRRRT!**,,''
5#,'''  	"z>! d1>>*U"3445577;++ 	qkoonb99G%%f-- -"W,|G,,H#s**!%H,<!=!=!A!A&"!M!M!$*P*P*P*P*P!Q!Q4Jw$7 4 4$.v$64 4#14 4 .
 5j@@@S @ @)1)=@ @   
 MSSSS  
 :~s   % ?N)r	   r	   r	   )__name__
__module____qualname____doc__r   strr   r   r#   r   dictr   r3   r$   r   r   r      s           I  	
    @d8n    :s :uS$Y7G : : : : : :r$   r   )rG   rerJ   typingr   r   rE   langchain_core.documentsr   )langchain_community.document_loaders.baser   compiler9   r=   r   r3   r$   r   <module>r_      s     				           - - - - - - @ @ @ @ @ @ RZ!!
RZ$$
n n n n nZ n n n n nr$   