
    gt                         d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	  e	j
        e          Zd Zd Zd Zd	 Zd
 Zd ZddZd Zd Zd Zd Zd ZddZddZd Zd Zd Zd ZdS )a  
Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
probability that a question is unanswerable.
    N   )BasicTokenizer)loggingc           	      f    d }d }d }d } | | | ||                                         S )zALower text and remove punctuation, articles and extra whitespace.c                 l    t          j        dt           j                  }t          j        |d|           S )Nz\b(a|an|the)\b )recompileUNICODEsub)textregexs     c/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/data/metrics/squad_metrics.pyremove_articlesz)normalize_answer.<locals>.remove_articles'   s*    
,bj99veS$'''    c                 P    d                     |                                           S )Nr   )joinsplitr   s    r   white_space_fixz)normalize_answer.<locals>.white_space_fix+   s    xx

%%%r   c                 x    t          t          j                  d                    fd| D                       S )N c              3   $   K   | ]
}|v|V  d S N ).0chexcludes     r   	<genexpr>z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>0   s-      >>bBg,=,=r,=,=,=,=>>r   )setstringpunctuationr   )r   r   s    @r   remove_puncz%normalize_answer.<locals>.remove_punc.   s:    f())ww>>>>D>>>>>>r   c                 *    |                                  S r   )lowerr   s    r   r%   znormalize_answer.<locals>.lower2   s    zz||r   r   )sr   r   r#   r%   s        r   normalize_answerr'   $   sl    ( ( (& & &? ? ?   ???;;uuQxx+@+@AABBBr   c                 L    | sg S t          |                                           S r   )r'   r   )r&   s    r   
get_tokensr)   8   s)     	A$$&&&r   c                 \    t          t          |           t          |          k              S r   )intr'   )a_golda_preds     r   compute_exactr.   >   s'    ''+;F+C+CCDDDr   c                    t          |           }t          |          }t          j        |          t          j        |          z  }t          |                                          }t          |          dk    st          |          dk    rt          ||k              S |dk    rdS d|z  t          |          z  }d|z  t          |          z  }d|z  |z  ||z   z  }|S )Nr         ?   )r)   collectionsCountersumvalueslenr+   )	r,   r-   	gold_toks	pred_tokscommonnum_same	precisionrecallf1s	            r   
compute_f1r>   B   s    6""I6""I ++k.A).L.LLF6==??##H
9~~c)nn119	)***1}}qhY/I8^c)nn,F
i-&
 Y%7	8BIr   c                    i }i }| D ]z}|j         }d |j        D             }|sdg}||vrt          d|            6||         t          fd|D                       ||<   t          fd|D                       ||<   {||fS )zV
    Computes the exact and f1 scores from the examples and the model predictions
    c                 H    g | ]}t          |d                    |d           S r   )r'   )r   answers     r   
<listcomp>z"get_raw_scores.<locals>.<listcomp>[   s1    iii6HXY_`fYgHhHhiviiir   r   zMissing prediction for c              3   8   K   | ]}t          |          V  d S r   )r.   r   a
predictions     r   r   z!get_raw_scores.<locals>.<genexpr>f   s-      "V"VA=J#?#?"V"V"V"V"V"Vr   c              3   8   K   | ]}t          |          V  d S r   )r>   rD   s     r   r   z!get_raw_scores.<locals>.<genexpr>g   s-      PPa
1j 9 9PPPPPPr   )qas_idanswersprintmax)examplespredsexact_scores	f1_scoresexamplerH   gold_answersrF   s          @r   get_raw_scoresrR   R   s     LI Q QiiW_iii 	 4L4F445556]
""V"V"V"V"V"V"VVVVPPPP<PPPPP	&""r   c                     i }|                                  D ]2\  }}||         |k    }|rt          ||                    ||<   -|||<   3|S r   )itemsfloat)scoresna_probsqid_to_has_ansna_prob_thresh
new_scoresqidr&   pred_nas           r   apply_no_ans_thresholdr]   l   sd    J,,..    Q3-.0 	 #s(;$;<<JsOOJsOOr   c           	          |svt                     }t          j        ddt                                                     z  |z  fddt                                                    z  |z  fd|fg          S t          |          }t          j        ddt           fd|D                       z  |z  fddt          fd|D                       z  |z  fd|fg          S )Nexact      Y@r=   totalc              3   (   K   | ]}|         V  d S r   r   )r   krN   s     r   r   z!make_eval_dict.<locals>.<genexpr>   s'      %H%H!l1o%H%H%H%H%H%Hr   c              3   (   K   | ]}|         V  d S r   r   )r   rc   rO   s     r   r   z!make_eval_dict.<locals>.<genexpr>   s'      "B"BA9Q<"B"B"B"B"B"Br   )r6   r2   OrderedDictr4   r5   )rN   rO   qid_listra   s   ``  r   make_eval_dictrg   w   s    
L!!&%#l&9&9&;&;"<"<<uDEus9#3#3#5#5666>?% 
 
 	
 H&%#%H%H%H%Hx%H%H%H"H"HH5PQus"B"B"B"B"B"B"BBBBUJK% 
 
 	
r   c                 0    |D ]}||         | | d| <   d S )N_r   )	main_evalnew_evalprefixrc   s       r   
merge_evalrm      s6     1 1%-a[	V//a//""1 1r   c                    t          fdD                       }|}|}d}t          fd          }t          |          D ]=\  }	}
|
|vr
|
         r	||
         }n| |
         rd}nd}||z  }||k    r
|}|
         }>d\  }}|D ] }
|
         s|dz  }|
|vr|||
         z  }!d	|z  t          |          z  |d
|z  |z  fS )Nc              3   ,   K   | ]}|         
d V  dS    Nr   r   rc   rX   s     r   r   z&find_best_thresh_v2.<locals>.<genexpr>   -      HH1nQ6GHQHHHHHHr           c                     |          S r   r   rc   rW   s    r   <lambda>z%find_best_thresh_v2.<locals>.<lambda>       hqk r   keyr   )r   r   rq   r`   r0   r4   sorted	enumerater6   )rM   rV   rW   rX   
num_no_ans	cur_score
best_scorebest_threshrf   ir[   diffhas_ans_scorehas_ans_cnts     ``          r   find_best_thresh_v2r      sG   HHHHHHHHHJIJKh$9$9$9$9:::HH%% ( (3f# 	#;DDSz T	z!!"J"3-K!%M; % %c" 	qf$:F+[#:MP[:[[[r   c                     t          ||||          \  }}}t          ||||          \  }	}
}|| d<   || d<   |	| d<   |
| d<   || d<   || d<   d S )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)r   )rj   rM   	exact_rawf1_rawrW   rX   r   exact_threshr   r   	f1_threshr   s               r   find_all_best_thresh_v2r      s}    .A%T\^l.m.m+Jm%8R`%a%a"GY
(Il%1I!""Ii"+I!.Io(Ilr   c                 8   t          fdD                       }|}|}d}t          fd          }t          |          D ]=\  }	}
|
|vr
|
         r	||
         }n| |
         rd}nd}||z  }||k    r
|}|
         }>d|z  t          |          z  |fS )Nc              3   ,   K   | ]}|         
d V  dS rp   r   rr   s     r   r   z#find_best_thresh.<locals>.<genexpr>   rs   r   rt   c                     |          S r   r   rv   s    r   rw   z"find_best_thresh.<locals>.<lambda>   rx   r   ry   r{   r   r`   r|   )rM   rV   rW   rX   r   r   r   r   rf   ri   r[   r   s     ``        r   find_best_threshr      s    HHHHHHHHHJIJKh$9$9$9$9:::HH%% ( (3f# 	#;DDSz T	z!!"J"3-K:F+[88r   c                     t          ||||          \  }}t          ||||          \  }}	|| d<   || d<   || d<   |	| d<   d S )Nr   r   r   r   )r   )
rj   rM   r   r   rW   rX   r   r   r   r   s
             r   find_all_best_threshr      sb    /y(N[[J)%>RRGY(Il%1I!""Ii"+Ir   r0   c                     d | D             }d |                                 D             }d |                                 D             }|d |D             }t          | |          \  }}t          ||||          }	t          ||||          }
t          |	|
          }|r#t          |	|
|          }t	          ||d           |r#t          |	|
|          }t	          ||d           |rt          ||||||           |S )Nc                 B    i | ]}|j         t          |j                  S r   )rH   boolrI   )r   rP   s     r   
<dictcomp>z"squad_evaluate.<locals>.<dictcomp>   s&    ZZZgGND,A,AZZZr   c                     g | ]	\  }}||
S r   r   r   rH   
has_answers      r   rB   z"squad_evaluate.<locals>.<listcomp>   s#    ccc"4&*Xbcvcccr   c                     g | ]	\  }}||
S r   r   r   s      r   rB   z"squad_evaluate.<locals>.<listcomp>   s#    fff!3[efffffr   c                     i | ]}|d S )rt   r   )r   rc   s     r   r   z"squad_evaluate.<locals>.<dictcomp>   s    111a1c111r   )rf   HasAnsNoAns)rT   rR   r]   rg   rm   r   )rL   rM   no_answer_probsno_answer_probability_thresholdqas_id_to_has_answerhas_answer_qidsno_answer_qidsr_   r=   exact_thresholdf1_threshold
evaluationhas_ans_evalno_ans_evals                 r   squad_evaluater      sG   ZZQYZZZcc8L8R8R8T8TcccOff7K7Q7Q7S7SfffN115111x//IE2, 46U O *"o?SUtuuL>>J 7%o|o^^^:|X666 5$_l^\\\:{G444 bZr?L`aaar   Fc                 ,   d }t          |          }d                    |                    |                    }|                    |           }|dk    r%|r!t                              d|  d| d           |S |t          |           z   dz
  } ||          \  }	}
 ||          \  }}t          |	          t          |          k    r%|r!t                              d	|	 d
| d           |S i }|                                D ]
\  }}|||<   d}||v r||         }||
v r|
|         }||rt                              d           |S d}||v r||         }||
v r|
|         }||rt                              d           |S |||dz            }|S )z;Project the tokenized prediction back to the original text.c                     g }t          j                    }t          |           D ]3\  }}|dk    r||t          |          <   |                    |           4d                    |          }||fS )Nr   r   )r2   re   r~   r6   appendr   )r   ns_charsns_to_s_mapr   cns_texts         r   _strip_spacesz%get_final_text.<locals>._strip_spaces  s{    !-//dOO 	 	DAqCxx)*KH&OOA''(##%%r   )do_lower_caser   r{   zUnable to find text: 'z' in ''rq   z*Length not equal after stripping spaces: 'z' vs 'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor6   rT   )	pred_text	orig_textr   verbose_loggingr   	tokenizertok_textstart_positionend_positionorig_ns_textorig_ns_to_s_maptok_ns_texttok_ns_to_s_maptok_s_to_ns_mapr   	tok_indexorig_start_positionns_start_positionorig_end_positionns_end_positionoutput_texts                        r   get_final_textr      s%   8	& 	& 	& ];;;Ixx	**95566H]]9--N 	PKKNNN)NNNOOO!C	NN2Q6L'4}Y'?'?$\#%2]8%<%<"[/
<C,,,, 	iKKg\ggYdggghhh O'--// ' '9%&	""((+N; 000"23D"E" 	7KK5666&&),7... 0 A  	5KK3444/3Dq3HIJKr   c                     t          t          |           d d          }g }t          t          |                    D ]+}||k    r n"|                    ||         d                    ,|S )z"Get the n-best logits from a list.c                     | d         S )Nrq   r   xs    r   rw   z#_get_best_indexes.<locals>.<lambda>^  s
    ad r   Trz   reverser   )r}   r~   ranger6   r   )logitsn_best_sizeindex_and_scorebest_indexesr   s        r   _get_best_indexesr   \  s{    Yv..NNDQQQOL3''(( 3 3EOA.q12222r   c                     | sg S d}| D ]}|||k    r|}g }d}| D ]3}t          j        ||z
            }|                    |           ||z  }4g }|D ]}|                    ||z             |S )z,Compute softmax probability over raw logits.Nrt   )mathexpr   )rV   	max_scorescore
exp_scores	total_sumr   probss          r   _compute_softmaxr   h  s     	I  	 1 1IJI  HUY&''!Q		E ( (UY&''''Lr   c                    |rt                               d|            |rt                               d|            |r|
rt                               d|            t          j        t                    }|D ]"}||j                                     |           #i }|D ]}|||j        <   t          j        dg d          }t          j	                    }t          j	                    }t          j	                    }t          |           D ]\  }}||         }g }d}d}d}d}t          |          D ]9\  }}||j                 }t          |j        |          }t          |j        |          }|
r?|j        d         |j        d         z   } | |k     r| }|}|j        d         }|j        d         }|D ]}!|D ]}"|!t          |j                  k    r|"t          |j                  k    r4|!|j        vr>|"|j        vrH|j                            |!d          sd|"|!k     rk|"|!z
  d	z   }#|#|k    rz|                     |||!|"|j        |!         |j        |"                              ;|
r#|                     ||dd||                     t'          |d
 d          }t          j        dg d          }$i }%g }&|D ]7}'t          |&          |k    r n ||'j                 }|'j        dk    r|j        |'j        |'j        d	z            }(|j        |'j                 })|j        |'j                 }*|j        |)|*d	z            }+|                    |(          },|,                                },d                    |,                                          },d                    |+          }-t9          |,|-||	          }.|.|%v rd|%|.<   nd}.d|%|.<   |&                     |$|.|'j        |'j                             9|
rZd|%vr!|&                     |$d||                     t          |&          d	k    r"|&                    d |$ddd                     |&s!|&                     |$ddd                     t          |&          d	k     rtA          d          g }/d}0|&D ]/}1|/                    |1j        |1j        z              |0s	|1j!        r|1}00tE          |/          }2g }3t          |&          D ]V\  }4}1t          j	                    }5|1j!        |5d<   |2|4         |5d<   |1j        |5d<   |1j        |5d<   |3                    |5           Wt          |3          d	k     rtA          d          |
s|3d         d         ||j#        <   n<||0j        z
  |0j        z
  }6|6||j#        <   |6|k    rd||j#        <   n|0j!        ||j#        <   |3||j#        <   |rTtI          |d          5 }7|7%                    tM          j'        |d          dz              ddd           n# 1 swxY w Y   |rTtI          |d          5 }7|7%                    tM          j'        |d          dz              ddd           n# 1 swxY w Y   |rV|
rTtI          |d          5 }7|7%                    tM          j'        |d          dz              ddd           n# 1 swxY w Y   |S )zHWrite final predictions to the json file and log-odds of null if needed.Writing predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPrediction)feature_indexstart_index	end_indexstart_logit	end_logit@B r   Frq   c                      | j         | j        z   S r   )r   r   r   s    r   rw   z,compute_predictions_logits.<locals>.<lambda>  s    q}WXWbGb r   Tr   NbestPrediction)r   r   r   r   r   emptyrt   No valid predictionsNr   probabilityr   r   w   indent
)(r   r   r2   defaultdictlistexample_indexr   	unique_id
namedtuplere   r~   r   start_logits
end_logitsr6   tokenstoken_to_orig_maptoken_is_max_contextgetr}   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insert
ValueErrorr   r   rH   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthr   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativenull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr   rP   featuresprelim_predictions
score_nullmin_null_feature_indexnull_start_logitnull_end_logitr   start_indexesend_indexesfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonr   output
score_diffwriters8                                                           r   compute_predictions_logitsr8    sD	      IG/EGGHHH ><):<<===  N%< NL1JLLMMM + 7 = = I I!'"78??HHHH 7 706F,--#.eee  "-//O ,..N".00"+L"9"9 W4 W4w,];
!"&/&9&9 (	 (	"M7():;F-f.A;OOM+F,={KKK& :%+%8%;f>OPQ>R%R"%
22!3J-:*'-':1'=$%+%6q%9N,  !,  I #c'.&9&999  C$7$777 "'*CCC  (AAA "7;;KOO !  ;.. &4q8F 111 &--))*7(3&/(.(;K(H&,&7	&B     ': # 		%%!!"8 ! 0,     $$6<c<cmqrrr&1CCC
 
 &  	t  	tD5zz[((t12G!##$^D,<QR@R,ST
!(!:4;K!L&8H%0<RSCS1TU$==jII $>>++88HNN$4$455HH[11	+HiP_``
!111/3 ,,
/3 ,LL))ztGWcgcqrrrssss" 	`)))--2CS_mnnnooo 5zzQQ 0 0g3Z] ^ ^ ^___  	YLL))wCSVWWWXXXu::>>3444" 	0 	0E 1EO CDDD& 0: 0*/' ..
!%(( 	& 	&HAu ,..F"ZF6N$)!HF=!$)$5F=!"'/F;f%%%%z??Q3444& 		K.8mF.COGN++ $&9&EEI\IfgJ/9W^,55524//2E2J/)3w~&& G(#.. 	G&LLOA>>>EFFF	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G 	G  F#S)) 	FVLLN1===DEEE	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F ! H%< H+S11 	HVLL$4Q???$FGGG	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H s6   !-XX!X7-Y00Y47Y4-[[[c                    t          j        dg d          }t          j        dg d          }t                              d|            t          j        t
                    }|D ]"}||j                                     |           #i }|D ]}|||j        <   t          j	                    }t          j	                    }t          j	                    }t          |           D ]\  }}||         }g }d}t          |          D ]\  }}||j                 }|j        }t          ||          }t          |          D ]}t          |	          D ]}|j        |         }|j        |         } ||	z  |z   }!|j        |!         }"|j        |!         }#| |j        dz
  k    rM|#|j        dz
  k    r\|j                            | d          sx|#| k     r|#| z
  dz   }$|$|k    r|                     ||| |#||"                     Čt+          |d	 d
          }i }%g }&|D ]D}'t-          |&          |k    r n-||'j                 }|j        |'j        |'j        dz            }(|j        |'j                 })|j        |'j                 }*|j        |)|*dz            }+|                    |(          },|,                                },d                    |,                                           },d                    |+          }-tC          |d          r|j"        }.n|j#        }.tI          |,|-|.|          }/|/|%v rd
|%|/<   |&                     ||/|'j%        |'j&                             F|&s!|&                     |ddd                     g }0d}1|&D ](}2|0                    |2j%        |2j&        z              |1s|2}1)tO          |0          }3g }4t          |&          D ]V\  }}2t          j	                    }5|2j(        |5d<   |3|         |5d<   |2j%        |5d<   |2j&        |5d<   |4                    |5           Wt-          |4          dk     rtS          d          |1tS          d          |}6|6||j*        <   |1j(        ||j*        <   |4||j*        <   tW          |d          5 }7|7,                    t[          j.        |d          dz              ddd           n# 1 swxY w Y   tW          |d          5 }7|7,                    t[          j.        |d          dz              ddd           n# 1 swxY w Y   |
rTtW          |d          5 }7|7,                    t[          j.        |d          dz              ddd           n# 1 swxY w Y   |S )z
    XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
    null if needed.

    Requires utils_squad_evaluate.py
    r   )r   r   r   start_log_probend_log_probr   )r   r:  r;  r   r   rq   Fc                      | j         | j        z   S r   )r:  r;  r   s    r   rw   z/compute_predictions_log_probs.<locals>.<lambda>  s    q/?!./P r   Tr   r   r   r   g    .Nr   r   r:  r;  r   r   r   r   r   )/r2   r   r   r   r   r   r   r   r   re   r~   
cls_logitsminr   r   start_top_indexr   end_top_indexparagraph_lenr  r  r}   r6   r   r   r   r   r  r  r  r  r   r   hasattrr   do_lowercase_and_remove_accentr   r:  r;  r   r   r  rH   r	  r
  r  r  )8r  r  r  r   r  r  r  r  start_n_top	end_n_topr  r   r   r  r(  r  r  r  r  r  r  r  r   rP   r  r  r   r   cur_null_scorer   jr:  r   j_indexr;  r   r'  r)  r*  r+  r,  r-  r.  r/  r   r   r   r0  r1  r2  r3  r   r4  r5  r6  r7  s8                                                           r   compute_predictions_log_probsrI  N  sr   * $.kkk  #-EEE  KKC+ACCDDD + 7 = = I I!'"78??HHHH 7 706F,--!-//O ,..N".00"+L"9"9 F4 F4w,];
&/&9&9 *	 *	"M7():;F#.N Z88J;'' " "y)) ! !A%+%8%;N"("8";K)ma/G#)#4W#=L & 4W =I
 #g&;a&???  G$9A$=== "7;;KOO !  ;.. &4q8F 111 &--))*7(3&/+9)5     3!"H $$Q$Q[_
 
 
 & )	 )	D5zz[((t12G !(8DNQ<N(OPJ$6t7GHN"4T^DL!,^|a?O-PQK 99*EEH  ~~''Hxx 0 011H--Iy/22 I ) 7 ) H')]O\\J---+/Z(LL  jATcgctuuu     	\LL))r$UYZZZ[[[" 	, 	,E 4u7I IJJJ& ,&+# ..
!%(( 	& 	&HAu ,..F"ZF6N$)!HF=!',';F#$%*%7F>"f%%%%z??Q3444&3444
+5( +>*B')3w~&&	$c	*	* CfTZ:::TABBBC C C C C C C C C C C C C C C 
	%	% BTZq999D@AAAB B B B B B B B B B B B B B B  H+S11 	HVLL$4Q???$FGGG	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H s6   -SSS'-T  T$'T$=-U66U:=U:r   )Nr0   )F)__doc__r2   r  r   r	   r!   models.bertr   utilsr   
get_logger__name__r   r'   r)   r.   r>   rR   r]   rg   rm   r   r   r   r   r   r   r   r   r8  rI  r   r   r   <module>rO     s          				  ) ) ) ) ) )       
	H	%	%C C C(' ' 'E E E   # # #4  
 
 
 
*1 1 1
\ \ \D) ) )9 9 9., , ,   >[ [ [ [|	 	 	  .L L L^} } } } }r   