
    Nga                     *   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZ  ej        e          Z G d de          Z G d	 d
e          Z G d de          Z G d de          Z G d de          ZdS )    N)AnyCallableDictIteratorLiteralOptionalTupleUnion)Document)BaseBlobParser)Blob)is_openai_v1c                   $   e Zd ZdZdddddddddd	dee         dee         dee         deeg ef         df         d	ee         d
ee         deed         df         dee	         dede
fdZedeeef         fd            Zdedee         fdZdS )AzureOpenAIWhisperParserav
  
    Transcribe and parse audio files using Azure OpenAI Whisper.

    This parser integrates with the Azure OpenAI Whisper model to transcribe
    audio files. It differs from the standard OpenAI Whisper parser, requiring
    an Azure endpoint and credentials. The parser is limited to files under 25 MB.

    **Note**:
    This parser uses the Azure OpenAI API, providing integration with the Azure
     ecosystem, and making it suitable for workflows involving other Azure services.

    For files larger than 25 MB, consider using Azure AI Speech batch transcription:
    https://learn.microsoft.com/azure/ai-services/speech-service/batch-transcription-create?pivots=rest-api#use-a-whisper-model

    Setup:
        1. Follow the instructions here to deploy Azure Whisper:
           https://learn.microsoft.com/azure/ai-services/openai/whisper-quickstart?tabs=command-line%2Cpython-new&pivots=programming-language-python
        2. Install ``langchain`` and set the following environment variables:

        .. code-block:: bash

            pip install -U langchain langchain-community

            export AZURE_OPENAI_API_KEY="your-api-key"
            export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/"
            export OPENAI_API_VERSION="your-api-version"

    Example Usage:
        .. code-block:: python

            from langchain.community import AzureOpenAIWhisperParser

            whisper_parser = AzureOpenAIWhisperParser(
                deployment_name="your-whisper-deployment",
                api_version="2024-06-01",
                api_key="your-api-key",
                # other params...
            )

            audio_blob = Blob(path="your-audio-file-path")
            response = whisper_parser.lazy_parse(audio_blob)

            for document in response:
                print(document.page_content)

    Integration with Other Loaders:
        The AzureOpenAIWhisperParser can be used with video/audio loaders and
        `GenericLoader` to automate retrieval and parsing.

    YoutubeAudioLoader Example:
        .. code-block:: python

            from langchain_community.document_loaders.blob_loaders import (
                YoutubeAudioLoader
                )
            from langchain_community.document_loaders.generic import GenericLoader

            # Must be a list
            youtube_url = ["https://your-youtube-url"]
            save_dir = "directory-to-download-videos"

            loader = GenericLoader(
                YoutubeAudioLoader(youtube_url, save_dir),
                AzureOpenAIWhisperParser(deployment_name="your-deployment-name")
            )

            docs = loader.load()
    N   )	api_keyazure_endpointapi_versionazure_ad_token_providerlanguagepromptresponse_formattemperaturemax_retriesr   r   r   r   r   r   r   jsontextsrtverbose_jsonvttr   deployment_namer   c       
            |pt           j                            d          | _        |pt           j                            d          | _        |pt           j                            d          | _        || _        || _        || _        || _	        || _
        |	| _        |
| _        	 ddl}n# t          $ r t          d          w xY wt                      r:|                    | j        | j        | j        | j        | j                  | _        dS | j        r| j        |_        | j        r| j        |_        | j        r| j        |_        d|_        || _        dS )	a  
        Initialize the AzureOpenAIWhisperParser.

        Args:
            api_key (Optional[str]):
                Azure OpenAI API key. If not provided, defaults to the
                `AZURE_OPENAI_API_KEY` environment variable.
            azure_endpoint (Optional[str]):
                Azure OpenAI service endpoint. Defaults to `AZURE_OPENAI_ENDPOINT`
                environment variable if not set.
            api_version (Optional[str]):
                API version to use,
                defaults to the `OPENAI_API_VERSION` environment variable.
            azure_ad_token_provider (Union[Callable[[], str], None]):
                Azure Active Directory token for authentication (if applicable).
            language (Optional[str]):
                Language in which the request should be processed.
            prompt (Optional[str]):
                Custom instructions or prompt for the Whisper model.
            response_format (Union[str, None]):
                The desired output format. Options: "json", "text", "srt",
                "verbose_json", "vtt".
            temperature (Optional[float]):
                Controls the randomness of the model's output.
            deployment_name (str):
                The deployment name of the Whisper model.
            max_retries (int):
                Maximum number of retries for failed API requests.
        Raises:
            ImportError:
                If the required package `openai` is not installed.
        AZURE_OPENAI_API_KEYAZURE_OPENAI_ENDPOINTOPENAI_API_VERSIONr   NEopenai package not found, please install it with `pip install openai`)r   r   r   r   azure_ad_tokenazure)osenvirongetr   r   r   r   r   r   r   r   r!   r   openaiImportErrorr   AzureOpenAI_clientapi_baseapi_type)selfr   r   r   r   r   r   r   r   r!   r   r,   s               n/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_community/document_loaders/parsers/audio.py__init__z!AzureOpenAIWhisperParser.__init__V   sm   ` H"*..1G"H"H,W
?V0W0W&N"*..9M*N*N'>$ .&.&	MMMM 	 	 	'  	 >> 	"!--#2 , ,#; .  DLLL | .!%" 6"&"5 6%)%5"%FO!DLLLs   %B* *Creturnc                 t    | j         | j        | j        | j        d}d |                                D             S )Nr   r   r   r   c                     i | ]
\  }}|||S N .0kvs      r3   
<dictcomp>z;AzureOpenAIWhisperParser._create_params.<locals>.<dictcomp>       AAAA1=1===    r   r   r   r   itemsr2   paramss     r3   _create_paramsz'AzureOpenAIWhisperParser._create_params   E     k#3+	
 
 BAAAAArA   blobc              #     K   t          t          |j                  d          }	 t                      r* | j        j        j        j        d| j        |d| j	        }n* | j        j
        j        d| j        | j        |d| j	        }n# t          $ r  w xY wt          t          |t                    s|j        n|d|j        i          V  dS )at  
        Lazily parse the provided audio blob for transcription.

        Args:
            blob (Blob):
                The audio file in Blob format to be transcribed.

        Yields:
            Document:
                Parsed transcription from the audio file.

        Raises:
            Exception:
                If an error occurs during transcription.
        rbmodelfile)rL   deployment_idrM   sourcepage_contentmetadataNr:   )openstrpathr   r/   audiotranscriptionscreater!   rF   Audio
transcribe	Exceptionr   
isinstancer   rO   )r2   rH   file_obj
transcripts       r3   
lazy_parsez#AzureOpenAIWhisperParser.lazy_parse   s"     " DI--	~~ ET\/>E .!  ) 

 ;T\/: ."&"6!  )	 
  	 	 		 j#..,	
 
 
 	
 	
 	
 	
 	
s   A"B	 	B)__name__
__module____qualname____doc__r   rT   r
   r   r   floatintr4   propertyr   r   rF   r   r   r   r_   r:   rA   r3   r   r      sl       C CP "&(,%)BF"& $ '+U" U" U" #U" !	U"
 c]U" "'xC'8$'>!?U" 3-U" U" @A4G
U" e_U" U" U" U" U" U"n BS#X B B B XB*
t *
(: *
 *
 *
 *
 *
 *
rA   r   c                       e Zd ZdZ	 dddddddddee         dedee         deedf         d	eedf         d
eed         df         deedf         fdZ	e
deeef         fd            Zdedee         fdZdS )OpenAIWhisperParsera  Transcribe and parse audio files.

    Audio transcription is with OpenAI Whisper model.

    Args:
        api_key: OpenAI API key
        chunk_duration_threshold: Minimum duration of a chunk in seconds
            NOTE: According to the OpenAI API, the chunk duration should be at least 0.1
            seconds. If the chunk duration is less or equal than the threshold,
            it will be skipped.
    Ng?)chunk_duration_thresholdbase_urlr   r   r   r   r   ri   rj   r   r   r   r   r   c                    || _         || _        ||nt          j                            d          | _        || _        || _        || _        || _	        d S )NOPENAI_API_BASE)
r   ri   r)   r*   r+   rj   r   r   r   r   )r2   r   ri   rj   r   r   r   r   s           r3   r4   zOpenAIWhisperParser.__init__   s[     (@% ,HH"*..AR2S2S 	 !.&rA   r5   c                 t    | j         | j        | j        | j        d}d |                                D             S )Nr7   c                     i | ]
\  }}|||S r9   r:   r;   s      r3   r?   z6OpenAIWhisperParser._create_params.<locals>.<dictcomp>  r@   rA   rB   rD   s     r3   rF   z"OpenAIWhisperParser._create_params  rG   rA   rH   c           
   #     K   	 ddl }n# t          $ r t          d          w xY w	 ddlm} n# t          $ r t          d          w xY wt	                      r"|                    | j        | j                  }n&| j        r| j        |_        | j        r| j        |_        |	                    |j
                  }d}|dz  d	z  }t          t          dt          |          |                    D ]\  }}	||	|	|z            }
|
j        | j        k    r$t!          j        |
                    d
                                                    }|j        |j        d| dz   |_        nd| d|_        t-          d|dz    d           d}|dk     r	 t	                      r  |j        j        j        dd|d| j        }n|j                            d|          }ni# t:          $ rE}|dz  }t-          d| dt=          |                      t?          j         d           Y d}~nd}~ww xY w|dk     t-          d           VtC          tE          |t<                    s|j#        n||j        |d          V  dS )Lazily parse the blob.r   Nr&   AudioSegmentCpydub package not found, please install it with `pip install pydub`)r   rj      <     mp3format_part_z.mp3part_Transcribing part    !r   z	whisper-1rK   zAttempt z failed. Exception:    z&Failed to transcribe after 3 attempts.)rO   chunkrP   r:   )$r,   r-   pydubrr   r   OpenAIr   rj   r0   	from_filerU   	enumeraterangelenduration_secondsri   ioBytesIOexportreadrO   nameprintrV   rW   rX   rF   rY   rZ   r[   rT   timesleepr   r\   r   )r2   rH   r,   rr   clientrV   chunk_durationchunk_duration_mssplit_numberir   r]   attemptsr^   es                  r3   r_   zOpenAIWhisperParser.lazy_parse  s7     	MMMM 	 	 	'  	
	******* 	 	 	X  	
 >> 	0]]4<$-]PPFF | .!%} 0"&- &&ty11 *R/$6  )q#e**>O)P)PQQ %	 %	OL!!a"3334E%)FFFz%,,e,"<"<"A"A"C"CDDH{& $.I|.I.I.I I : : : : :|a'7:::;;;HQ,,"#~~ T%GV\%@%G &"-H& &@D@S& &

 &,\%<%<[(%S%S
  " " "MHKXKK3q66KKLLLJqMMMMMMMM" Q,, >???!*c22 Z__$(K,GG	      A%	 %	s*   	 #. AA	G
H,';H''H,r9   )r`   ra   rb   rc   r   rT   rd   r
   r   r4   rf   r   r   rF   r   r   r   r_   r:   rA   r3   rh   rh      s9       
 
 "&' +."&%)#' *.' ' '#' #(	'
 3-' T	"' c4i ' @A4G
' 5$;'' ' ' '. BS#X B B B XBIt I(: I I I I I IrA   rh   c                   z    e Zd ZdZ	 	 	 	 	 ddedee         ded	ed
eee                  f
dZ	de
dee         fdZdS )OpenAIWhisperParserLocala;  Transcribe and parse audio files with OpenAI Whisper model.

    Audio transcription with OpenAI Whisper model locally from transformers.

    Parameters:
    device - device to use
        NOTE: By default uses the gpu if available,
        if you want to use cpu, please set device = "cpu"
    lang_model - whisper model to use, for example "openai/whisper-medium"
    forced_decoder_ids - id states for decoder in multilanguage model,
        usage example:
        from transformers import WhisperProcessor
        processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
        forced_decoder_ids = WhisperProcessor.get_decoder_prompt_ids(language="french",
          task="transcribe")
        forced_decoder_ids = WhisperProcessor.get_decoder_prompt_ids(language="french",
        task="translate")



    0N      device
lang_model
batch_sizechunk_lengthforced_decoder_idsc                    	 ddl m} n# t          $ r t          d          w xY w	 ddl}n# t          $ r t          d          w xY w|dk    rd| _        n"|j                                        rdnd| _        | j        dk    rd}|r|n|| _        nO|j                            | j                  j	        d	z  }	|	d
k     rd}
n|	dk     rd}
n|	dk     rd}
nd}
|r|n|
| _        t          d| j                   || _         |d| j        || j                  | _        |O	 || j        j        j        _        dS # t           $ r)}t"                              d| d           Y d}~dS d}~ww xY wdS )a  Initialize the parser.

        Args:
            device: device to use.
            lang_model: whisper model to use, for example "openai/whisper-medium".
              Defaults to None.
            forced_decoder_ids: id states for decoder in a multilanguage model.
              Defaults to None.
            batch_size: batch size used for decoding
              Defaults to 8.
            chunk_length: chunk length used during inference.
              Defaults to 30s.
        r   )pipelinezQtransformers package not found, please install it with `pip install transformers`NCtorch package not found, please install it with `pip install torch`cpuzcuda:0zopenai/whisper-base     iX  zopenai/whisper-smalli.  zopenai/whisper-mediumzopenai/whisper-largezUsing the following model: zautomatic-speech-recognition)rL   chunk_length_sr   zOUnable to set forced_decoder_ids parameter for whisper modelText of exception: z9Therefore whisper model will use default mode for decoder)transformersr   r-   torchr   cudais_availabler   get_device_propertiestotal_memoryr   r   piperL   configr   r[   loggerinfo)r2   r   r   r   r   r   r   r   default_modelmem	rec_modelexception_texts               r3   r4   z!OpenAIWhisperParserLocal.__init__u  s'   *	------- 	 	 	-  	
	LLLL 	 	 	X  	 U??DKK&+j&=&=&?&?J((UDK;%1M,6IjjMDOO *224;??LPWXCTzz1		t2		u3		2	,6EjjIDO+T_===$ H*/';	
 
 
	 )<N	&999   P*8P P P         *)s)   	 #, AD6 6
E) E$$E)rH   r5   c              #   B  K   	 ddl m} n# t          $ r t          d          w xY w	 ddl}n# t          $ r t          d          w xY w|                    |j                  }t          j        |                    d          	                                          }t          d|j         d	           |                    |d
          \  }}|                     |                                | j                  d         }t          |d|j        i          V  dS )rp   r   rq   rs   NzGlibrosa package not found, please install it with `pip install librosa`rw   rx   r|   r~   i>  )sr)r   r   rO   rP   )r   rr   r-   librosar   rU   r   r   r   r   r   loadr   copyr   r   rO   )	r2   rH   rr   r   rV   r]   yr   
predictions	            r3   r_   z#OpenAIWhisperParserLocal.lazy_parse  se     	******* 	 	 	U  	
	NNNN 	 	 	(  	 &&ty11:ell%l88==??@@ 	/49///000X%002YYqvvxxDOYDDVL
#,
 
 
 	
 	
 	
 	
 	
s    %. A)r   Nr   r   N)r`   ra   rb   rc   rT   r   re   r	   r   r4   r   r   r   r_   r:   rA   r3   r   r   ^  s         0 $(48L LL SML 	L
 L %U4[1L L L L\!
t !
(: !
 !
 !
 !
 !
 !
rA   r   c            	       j    e Zd ZdZddddddee         dee         ded	efd
Zdedee	         fdZ
dS )YandexSTTParserzWTranscribe and parse audio files.
    Audio transcription is with OpenAI Whisper model.Ngeneralauto)r   	iam_tokenrL   r   r   r   rL   r   c                p    |du |du k    rt          d          || _        || _        || _        || _        dS )a  Initialize the parser.

        Args:
            api_key: API key for a service account
            with the `ai.speechkit-stt.user` role.
            iam_token: IAM token for a service account
            with the `ai.speechkit-stt.user` role.
            model: Recognition model name.
              Defaults to general.
            language: The language in ISO 639-1 format.
              Defaults to automatic language recognition.
        Either `api_key` or `iam_token` must be provided, but not both.
        Nz?Either 'api_key' or 'iam_token' must be provided, but not both.)
ValueErrorr   r   rL   r   )r2   r   r   rL   r   s        r3   r4   zYandexSTTParser.__init__  sO    * tOd!233Q   "
 rA   rH   r5   c              #   l  K   	 ddl m}m}m} ddlm} n# t          $ r t          d          w xY w	 ddlm} n# t          $ r t          d          w xY w| j	        r& ||
                    | j	                             n% ||
                    | j        	                     |                    |j                  }|                                }| j        |_        | j        |_        |j        |_        |                    |          }	|	D ]!}
t)          |
j        d
|j        i          V  "dS )rp   r   )configure_credentialscredsmodel_repository)AudioProcessingTypezYyandex-speechkit package not found, please install it with `pip install yandex-speechkit`rq   rs   )r   )yandex_credentials)r   rO   rP   N)	speechkitr   r   r   speechkit.sttr   r-   r   rr   r   YandexCredentialsr   r   rU   recognition_modelrL   r   Fullaudio_processing_typerZ   r   normalized_textrO   )r2   rH   r   r   r   r   rr   rV   rL   resultress              r3   r_   zYandexSTTParser.lazy_parse	  s     	PPPPPPPPPP9999999 	 	 	1  	
	******* 	 	 	X  	
 < 	!!#(#:#:4<#:#P#P     "!#(#:#:T^#:#T#T    &&ty11 2244j&9&>#!!%(( 	 	C 0"DK0      	 	s    /: Ar`   ra   rb   rc   r   rT   r4   r   r   r   r_   r:   rA   r3   r   r     s        9 9 "&#'! ! ! #! C=	!
 ! ! ! ! !<)t )(: ) ) ) ) ) )rA   r   c                   ^    e Zd ZdZddddee         dee         fdZded	ee	         fd
Z
dS )FasterWhisperParsera  Transcribe and parse audio files with faster-whisper.

    faster-whisper is a reimplementation of OpenAI's Whisper model using CTranslate2,
    which is up to 4 times faster than openai/whisper for the same accuracy while using
    less memory. The efficiency can be further improved with 8-bit quantization on both
    CPU and GPU.

    It can automatically detect the following 14 languages and transcribe the text
    into their respective languages: en, zh, fr, de, ja, ko, ru, es, th, it, pt, vi,
    ar, tr.

    The gitbub repository for faster-whisper is :
    https://github.com/SYSTRAN/faster-whisper

    Example: Load a YouTube video and transcribe the video speech into a document.
        .. code-block:: python

            from langchain.document_loaders.generic import GenericLoader
            from langchain_community.document_loaders.parsers.audio
                import FasterWhisperParser
            from langchain.document_loaders.blob_loaders.youtube_audio
                import YoutubeAudioLoader


            url="https://www.youtube.com/watch?v=your_video"
            save_dir="your_dir/"
            loader = GenericLoader(
                YoutubeAudioLoader([url],save_dir),
                FasterWhisperParser()
            )
            docs = loader.load()

    r   N)r   
model_sizer   r   c                   	 ddl }n# t          $ r t          d          w xY w|dk    rd| _        n"|j                                        rdnd| _        | j        dk    rd| _        nX|j                            | j                  j        dz  }|dk     rd| _        n#|d	k     rd
| _        n|dk     rd| _        nd| _        ||dv r|| _        dS dS dS )a$  Initialize the parser.

        Args:
            device: It can be "cuda" or "cpu" based on the available device.
            model_size: There are four model sizes to choose from: "base", "small",
                        "medium", and "large-v3", based on the available GPU memory.
        r   Nr   r   r   baser   rv   i  smallr   mediumlarge-v3)r   r   r   r   )r   r-   r   r   r   r   r   r   )r2   r   r   r   r   s        r3   r4   zFasterWhisperParser.__init__X  s   	LLLL 	 	 	U  	 U??DKK$)J$;$;$=$=H&&5DK ;%$DOO *224;??LPWXCTzz"(t")t"*",!DDD", "!DDs    !rH   r5   c              #   (  K   	 ddl m} n# t          $ r t          d          w xY w	 ddlm} n# t          $ r t          d          w xY wt          |j        t                    r-|                    t          j
        |j                            }n8|j        "|j        r|                    |j                  }nt          d          t          j
        |                    d	                                                    } || j        | j        d
          }|                    |d          \  }}|D ]W}	t%          |	j        |j        d|	j        |	j        fz  |j        dt1          |j        dz            z  d|j                  V  XdS )rp   r   rq   rs   )WhisperModelzUfaster_whisper package not found, please install it with `pip install faster-whisper`NzUnable to get audio from blobrw   rx   float16)r   compute_typer   )	beam_sizez[%.2fs -> %.2fs]z%d%%d   )rO   
timestampsr   probabilityrP   )r   rr   r-   faster_whisperr   r\   databytesr   r   r   rU   r   r   r   r   r   rZ   r   r   rO   startendr   roundlanguage_probabilityrR   )
r2   rH   rr   r   rV   r]   rL   segmentsr   segments
             r3   r_   zFasterWhisperParser.lazy_parse  s     	******* 	 	 	U  	
	3333333 	 	 	/  	 di'' 	> **2:di+@+@AAEEY49 **4955EE<===:ell%l88==??@@ ODKi
 
 
 ))(a)@@$ 
	 
	G$\"k"4w{7S"S $#)E$2Kc2Q,R,R#R	 
 m	 	 	 	 	 	 	
	 
	s    %0 A
r   r:   rA   r3   r   r   5  s           J !'$(	+- +- +- +- SM	+- +- +- +-Z/t /(: / / / / / /rA   r   )r   loggingr)   r   typingr   r   r   r   r   r   r	   r
   langchain_core.documentsr   )langchain_community.document_loaders.baser   1langchain_community.document_loaders.blob_loadersr    langchain_community.utils.openair   	getLoggerr`   r   r   rh   r   r   r   r:   rA   r3   <module>r      s   				  				  Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q - - - - - - D D D D D D B B B B B B 9 9 9 9 9 9		8	$	$Q
 Q
 Q
 Q
 Q
~ Q
 Q
 Q
hw w w w w. w w wtF
 F
 F
 F
 F
~ F
 F
 F
RK K K K Kn K K K\    .     rA   