
    NgX                        d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
mZ  G d de          ZddZ G d de          ZdS )    )annotationsN)AnyListLiteralOptionalUnion)LanguageTextSplitterc                  0     e Zd ZdZ	 dd fdZddZ xZS )CharacterTextSplitterz(Splitting text that looks at characters.

F	separatorstris_separator_regexboolkwargsr   returnNonec                V     t                      j        di | || _        || _        dS )Create a new TextSplitter.N )super__init__
_separator_is_separator_regex)selfr   r   r   	__class__s       ^/var/www/html/ai-engine/env/lib/python3.11/site-packages/langchain_text_splitters/character.pyr   zCharacterTextSplitter.__init__   s7     	""6"""##5       text	List[str]c                    | j         r| j        nt          j        | j                  }t	          ||| j                  }| j        rdn| j        }|                     ||          S )&Split incoming text and return chunks. )r   r   reescape_split_text_with_regex_keep_separator_merge_splits)r   r    r   splitsr   s        r   
split_textz CharacterTextSplitter.split_text   se      $7WDOORYt=W=W 	 (i9MNN/DRRT_
!!&*555r   )r   F)r   r   r   r   r   r   r   r   r    r   r   r!   )__name__
__module____qualname____doc__r   r+   __classcell__r   s   @r   r   r   	   sc        22 CH6 6 6 6 6 6 66 6 6 6 6 6 6 6r   r   r    r   r   keep_separator$Union[bool, Literal['start', 'end']]r   r!   c                   |r|rt          j        d| d|           |dk    r-fdt          dt                    dz
  d          D             n)fdt          dt                    d          D             }t                    dz  dk    r|d	d          z  }|dk    r|d	         gz   nd         g|z   }n%t          j        ||           }nt	          |           }d
 |D             S )N()endc                8    g | ]}|         |d z            z   S    r   .0i_splitss     r   
<listcomp>z*_split_text_with_regex.<locals>.<listcomp>(   s*    UUU!'!*wq1u~-UUUr   r   r;      c                8    g | ]}|         |d z            z   S r:   r   r<   s     r   r@   z*_split_text_with_regex.<locals>.<listcomp>*   s*    VVVqwqzGAEN2VVVr   c                    g | ]
}|d k    |S )r$   r   )r=   ss     r   r@   z*_split_text_with_regex.<locals>.<listcomp>7   s    )))!bAr   )r%   splitrangelenlist)r    r   r3   r*   r?   s       @r   r'   r'      s6      	/h/9///66G "U** VUUUuQGq@PRS7T7TUUUUVVVVE!S\\ST<U<UVVV 
 7||a1$$'"##,& "U** 72;-''qzlV+ F Xi..FFd))v))))r   c                  l     e Zd ZdZ	 	 	 dd fdZddZddZedd            Ze	dd            Z
 xZS )RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    NTF
separatorsOptional[List[str]]r3   r4   r   r   r   r   r   r   c                b     t                      j        dd|i| |pg d| _        || _        dS )r   r3   )r   
 r$   Nr   )r   r   _separatorsr   )r   rL   r3   r   r   r   s        r   r   z'RecursiveCharacterTextSplitter.__init__A   sI     	AAA&AAA%@)@)@)@#5   r   r    r   r!   c                   g }|d         }g }t          |          D ]R\  }}| j        r|nt          j        |          }|dk    r|} n't          j        ||          r|}||dz   d         } nS| j        r|nt          j        |          }t          ||| j                  }	g }
| j        rdn|}|	D ]}|                     |          | j        k     r|
	                    |           6|
r-| 
                    |
|          }|                    |           g }
|s|	                    |           }|                     ||          }|                    |           |
r+| 
                    |
|          }|                    |           |S )r#   rC   r$   r;   N)	enumerater   r%   r&   searchr'   r(   _length_function_chunk_sizeappendr)   extend_split_text)r   r    rL   final_chunksr   new_separatorsr>   _sr   r*   _good_splitsrE   merged_text
other_infos                 r   rY   z*RecursiveCharacterTextSplitter._split_textM   s   rN	z** 	 	EAr#7JRYr]]JRxx	yT** 	!+AEGG!4
 #'":TYY	)@T@T
'j$:NOO />RRY
 	4 	4A$$Q''$*:::##A&&&& &"&"4"4\:"N"NK ''444#%L% 4 ''****!%!1!1!^!D!DJ ''
3333 	-,,\:FFK,,,r   c                8    |                      || j                  S )N)rY   rQ   )r   r    s     r   r+   z)RecursiveCharacterTextSplitter.split_textu   s    d&6777r   languager	   c                B    |                      |          } | d|dd|S )NT)rL   r   r   )get_separators_for_language)clsra   r   rL   s       r   from_languagez,RecursiveCharacterTextSplitter.from_languagex   s4     44X>>
sLjTLLVLLLr   c                r   | t           j        k    s| t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j	        k    rg dS | t           j
        k    rg d	S | t           j        k    rg d
S | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        k    rg dS | t           j        v rt7          d|  d          t7          d|  dt9          t                                )N)
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r   rO   rP   r$   )
func 
var 
const 
type rh   ri   rk   rl   r   rO   rP   r$   )rg   
public 
protected 	
private 
static rh   ri   rj   rk   rl   r   rO   rP   r$   )rg   rq   rr   rs   z

internal z
companion z
fun 
val rn   rh   ri   rj   z
when rl   
else r   rO   rP   r$   )

function ro   
let rn   rg   rh   ri   rj   rk   rl   	
default r   rO   rP   r$   )
enum 
interface z
namespace rp   rg   rw   ro   rx   rn   rh   ri   rj   rk   rl   ry   r   rO   rP   r$   )rw   rg   rh   	
foreach rj   
do rk   rl   r   rO   rP   r$   )
z	
message z	
service rz   z
option 
import z
syntax r   rO   rP   r$   )rg   
def z
	def r   rO   rP   r$   )z
=+
z
-+
z
\*+
z

.. *

r   rO   rP   r$   )r   rg   rh   
unless rj   ri   r}   z
begin z
rescue r   rO   rP   r$   )r   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop rh   r   rj   rl   z
cond z
with ri   r}   r   rO   rP   r$   )z
fn ro   rx   rh   rj   ri   z
loop 
match ro   r   rO   rP   r$   )rg   z
object r   ru   rn   rh   ri   rj   r   rl   r   rO   rP   r$   )rm   rg   
struct rz   rh   ri   rj   r}   rk   rl   r   rO   rP   r$   )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r   rO   rP   r$   )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rP   r$   )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler$   )r{   rz   z
implements z

delegate 
event rg   z

abstract rq   rr   rs   rt   z
return rh   z

continue ri   r|   rj   rk   z
break rl   rv   
try z
throw 	
finally 
catch r   rO   rP   r$   )z
pragma z
using z

contract r{   z	
library z
constructor rp   rw   r   z

modifier z
error r   rz   rh   ri   rj   z

do while z

assembly r   rO   rP   r$   )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rO   rP   r$   )
z
local rw   rh   ri   rj   z
repeat r   rO   rP   r$   )z	
main :: z
main = rx   z
in r}   z
where 
:: z
= 
data z	
newtype rp   r   z
module r~   z
qualified z
import qualified rg   z

instance rl   z
| r   z
= {z
, r   rO   rP   r$   )rw   z
param rh   r|   ri   rj   rk   rg   r   r   r   r   rO   rP   r$   z	Language z is not implemented yet!z& is not supported! Please choose from )r	   CCPPGOJAVAKOTLINJSTSPHPPROTOPYTHONRSTRUBYELIXIRRUSTSCALASWIFTMARKDOWNLATEXHTMLCSHARPSOLCOBOLLUAHASKELL
POWERSHELL_value2member_map_
ValueErrorrI   )ra   s    r   rc   z:RecursiveCharacterTextSplitter.get_separators_for_language   s   xz!!X%=%=   ( $$   " &&   ( ((   2 $$   ( $$   2 %%   $ ''   & ((
 
 
 
 %%    &&   $ ((   . &&   $ ''   ( ''   ( ***   & ''   . &&   > ((# # # #H %%   : ''   > %%     )))% % % %L ,,,   . 444KKKKLLL7H 7 7&*8nn7 7  r   )NTF)
rL   rM   r3   r4   r   r   r   r   r   r   )r    r   rL   r!   r   r!   r,   )ra   r	   r   r   r   rK   )ra   r	   r   r!   )r-   r.   r/   r0   r   rY   r+   classmethodre   staticmethodrc   r1   r2   s   @r   rK   rK   :   s          +/?C#(	
6 
6 
6 
6 
6 
6 
6& & & &P8 8 8 8 M M M [M t t t \t t t t tr   rK   )r    r   r   r   r3   r4   r   r!   )
__future__r   r%   typingr   r   r   r   r   langchain_text_splitters.baser	   r
   r   r'   rK   r   r   r   <module>r      s    " " " " " " 				 6 6 6 6 6 6 6 6 6 6 6 6 6 6 @ @ @ @ @ @ @ @6 6 6 6 6L 6 6 6,* * * *6z	 z	 z	 z	 z	\ z	 z	 z	 z	 z	r   