
    Kg,                     p    d dl Z d dlZd dlmZmZ ddlmZmZ  ej        d          Z	 G d d          Z
dS )    N)OptionalUnion   )LanguageFilterProbingStates%   [a-zA-Z]*[-]+[a-zA-Z]*[^a-zA-Z-]?c                   `   e Zd ZdZej        fdeddfdZddZede	e
         fd            Zede	e
         fd            Zd	eeef         defd
Zedefd            ZdefdZedeeef         defd            Zedeeef         defd            Zedeeef         defd            ZdS )CharSetProbergffffff?lang_filterreturnNc                     t           j        | _        d| _        || _        t          j        t                    | _        d S )NT)	r   	DETECTING_stateactiver
   logging	getLogger__name__logger)selfr
   s     ]/var/www/html/4nations/venv/lib/python3.11/site-packages/pip/_vendor/chardet/charsetprober.py__init__zCharSetProber.__init__,   s1    ",&'11    c                 (    t           j        | _        d S N)r   r   r   r   s    r   resetzCharSetProber.reset2   s    ",r   c                     d S r    r   s    r   charset_namezCharSetProber.charset_name5   s    tr   c                     t           r   NotImplementedErrorr   s    r   languagezCharSetProber.language9   s    !!r   byte_strc                     t           r   r    )r   r#   s     r   feedzCharSetProber.feed=   s    !!r   c                     | j         S r   )r   r   s    r   statezCharSetProber.state@   s
    {r   c                     dS )Ng        r   r   s    r   get_confidencezCharSetProber.get_confidenceD   s    sr   bufc                 2    t          j        dd|           } | S )Ns   ([ -])+    )resub)r*   s    r   filter_high_byte_onlyz#CharSetProber.filter_high_byte_onlyG   s    f&c22
r   c                    t                      }t                              |           }|D ]Z}|                    |dd                    |dd         }|                                s|dk     rd}|                    |           [|S )u7  
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [-ÿ]
        marker: everything else [^a-zA-Z-ÿ]
        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.
        This filter applies to all scripts which do not use English characters.
        N   r,   )	bytearrayINTERNATIONAL_WORDS_PATTERNfindallextendisalpha)r*   filteredwordsword	last_chars        r   filter_international_wordsz(CharSetProber.filter_international_wordsL   s     ;;
 ,33C88 
	' 
	'DOOD"I&&& RSS	I$$&& !9w+>+> 	OOI&&&&r   c                 v   t                      }d}d}t          |                               d          } t          |           D ]U\  }}|dk    r|dz   }d}|dk    r<||k    r4|s2|                    | ||                    |                    d           d}V|s|                    | |d	                    |S )
a[  
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        Fr   c   >r      <r,   TN)r3   
memoryviewcast	enumerater6   )r*   r8   in_tagprevcurrbuf_chars         r   remove_xml_tagszCharSetProber.remove_xml_tagsn   s     ;;oo""3'''nn 	 	ND( 4axT!!$;;v; OOCT	N333OOD)))  	( OOCJ'''r   )r   N)r   
__module____qualname__SHORTCUT_THRESHOLDr   NONEr   r   propertyr   strr   r"   r   bytesr3   r   r%   r'   floatr)   staticmethodr/   r<   rH   r   r   r   r	   r	   (   s       5C5H 2 2N 2T 2 2 2 2- - - - hsm    X "(3- " " " X""U5)#34 " " " " " |    X     5	)9#: u    \ eY.>(? I    \B $U5)#34 $ $ $ $ \$ $ $r   r	   )r   r-   typingr   r   enumsr   r   compiler4   r	   r   r   r   <module>rU      s   :  				 " " " " " " " " / / / / / / / /(bj8  
k k k k k k k k k kr   