o
    i6x                  	   @   s  d Z ddlZddlZddlmZmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZmZmZmZ er:ddlmZ d	Zi d
ededdddddddddddddddddddd d!d"d	d#dd$d%d&di d'dd(dd)dd*dd+dd,dd-dd.dd/d0d1d2d3d!d4d5d6d7d8d8d9d:d;d<d:d:Zd=Zd>Zi ed? eed@ dedA dedB dedC d%edD dedE dedF dedG dedH dedI dedJ dedK dedL d!edM d0edN d2edO d5ZdPdQ e D ZedA ed< dRZddTedUedVefdWdXZddTedUedVefdYdZZddTedUedVefd[d\Z 	SddTed]edUedVefd^d_Z!	dd`edaedB dVefdbdcZ"ddZ#eee deZ$dTedfee dge%e& dVefdhdiZ'e'Z(djedfee dge%e& dVefdkdlZ)djedfee dge%e& dVefdmdnZ*dTedfee dVefdodpZ+dTedfee dVefdqdrZ,ddTed`edUedVefdsdtZ-dfdeB ee B dVe.e fdudvZ/d`edVefdwdxZ0	dd`edyedB dVdfdzd{Z1dfee d|e.e dVe.e fd}d~Z2dddee.e B dB dVdfddZ3dS )z6Handle alternate character sets for character strings.    N)TYPE_CHECKINGcast)MutableSequenceSequence)config)warn_and_log)TEXT_VR_DELIMS
PersonNameVRCUSTOMIZABLE_CHARSET_VR)DataElementiso8859 ISO_IR 6z	ISO_IR 13	shift_jisz
ISO_IR 100latin_1z
ISO_IR 101	iso8859_2z
ISO_IR 109	iso8859_3z
ISO_IR 110	iso8859_4z
ISO_IR 126
iso_ir_126z
ISO_IR 127
iso_ir_127z
ISO_IR 138
iso_ir_138z
ISO_IR 144
iso_ir_144z
ISO_IR 148
iso_ir_148z
ISO_IR 166
iso_ir_166zISO 2022 IR 6zISO 2022 IR 13ISO 2022 IR 87
iso2022_jpzISO 2022 IR 100zISO 2022 IR 101zISO 2022 IR 109zISO 2022 IR 110zISO 2022 IR 126zISO 2022 IR 127zISO 2022 IR 138zISO 2022 IR 144zISO 2022 IR 148zISO 2022 IR 149euc_krISO 2022 IR 159iso2022_jp_2zISO 2022 IR 166zISO 2022 IR 58	iso_ir_58
ISO_IR 192UTF8GB18030zISO 2022 GBKGBKzISO 2022 58GB2312)r!   r$   r#      s   (Bs   -A   )I   (Js   $Bs   -Bs   -Cs   -Ds   -Fs   -Gs   -Hs   -Ls   -Ms   -Ts   $)Cs   $(Ds   $)Ac                 C   s   i | ]\}}||qS  r)   ).0kvr)   r)   E/mnt/sdb/aimis/docanh/lib/python3.10/site-packages/pydicom/charset.py
<dictcomp>^   s    r.   )r   r   r    strictvalueerrorsreturnc                 C   sh  t d}| }|dks| dkr>d}| D ]'}z||}W n ty(   d}Y nw t|dks5dt|kr7d}||7 }q|S || d }t|dkrUtd| dt| d	t|d@ }t| dd
 dD ]M\}}z||}W n ty }	 z
||	_t| |	_|	d
}	~	ww t|dkst|d@ |A dkr|dkrdnd}
d|
 }td| |t| |||7 }qd|S )a  Convert a unicode string into JIS X 0201 byte string using shift_jis
    encodings.
    shift_jis is a superset of jis_x_0201. So we can regard the encoded value
    as jis_x_0201 if it is single byte character.

    Parameters
    ----------
    value : str
        The unicode string as presented to the user.
    errors : str
        The behavior of a character which could not be encoded. If 'strict' is
        passed, raise an UnicodeEncodeError. If any other value is passed,
        non ISO IR 14 characters are replaced by the ASCII '?'.

    Returns
    -------
    bytes
        The encoded string. If some characters in value could not be encoded to
        JIS X 0201, and `errors` is not set to 'strict', they are replaced to
        '?'.

    Raises
    ------
    UnicodeEncodeError
        If errors is set to 'strict' and `value` could not be encoded with
        JIS X 0201.
    r   r/   r          ?      r   zillegal multibyte sequenceNz	ISO IR 14z	ISO IR 13Given character is out of )	codecsgetincrementalencoderencodeUnicodeEncodeErrorlenord	enumeratestartend)r0   r1   encoder_classencoderencodedcbmsbiecharacter_setmsgr)   r)   r-   _encode_to_jis_x_0201g   sF   


 

rK   c                 C      t | d|dS )z7Convert a unicode string into JIS X 0208 encoded bytes.r   r1   _encode_to_given_charsetr0   r1   r)   r)   r-   _encode_to_jis_x_0208      rQ   c                 C   rL   )z7Convert a unicode string into JIS X 0212 encoded bytes.r   rM   rN   rP   r)   r)   r-   _encode_to_jis_x_0212   rR   rS   rI   c                 C   s   t | }|dkr| j||dS t|}| }|| d }|t| s2t|| dt| d| t| dd dD ];\}}z||}	W n ty\ }
 z
||
_	t| |
_
|
d}
~
ww |	dd tkrrt|| |t| d| ||	7 }q;|S )a  Encode a unicode string using the given character set.

    The escape sequence which is located at the end of the encoded value has
    to vary depending on the value 1 of SpecificCharacterSet. So we have to
    trim it and append the correct escape sequence manually.

    Parameters
    ----------
    value : text type
        The unicode string as presented to the user.
    character_set: str:
        Character set for result.
    errors : str
        The behavior of a character which could not be encoded. This value
        is passed to errors argument of str.encode().

    Returns
    -------
    bytes
        The encoded string. If some characters in value could not be encoded to
        given character_set, it depends on the behavior of corresponding python
        encoder.

    Raises
    ------
    UnicodeEncodeError
        If errors is set to 'strict' and `value` could not be encoded with
        given character_set.
    r/   rM   r   r7   r5   N)python_encodingr:   r8   r9   
startswithENCODINGS_TO_CODESr;   r<   r>   r?   r@   ESC)r0   rI   r1   encodingrA   rB   rC   rG   rD   rE   rH   r)   r)   r-   rO      s:   !


rO   rX   rC   c                 C   sH   t d }t d }| dkr|du r|S |d }d|kr|S |S t| dS )a  Return an escape sequence corresponding to the given encoding. If
    encoding is 'shift_jis', return 'ESC)I' or 'ESC(J' depending on the first
    byte of encoded.

    Parameters
    ----------
    encoding : str
        An encoding is used to specify  an escape sequence.
    encoded : bytes
        The encoded value is used to choose an escape sequence if encoding is
        'shift_jis'.

    Returns
    -------
    bytes
        Escape sequence for encoded value.
    r(   r'   r   Nr   r6   r3   )rW   rV   get)rX   rC   ESC_ISO_IR_14ESC_ISO_IR_13
first_byter)   r)   r-   !_get_escape_sequence_for_encoding   s   r]   )r   r   )r   r   r   	encodings
delimitersc                    s   t | vrNd }z| |W S  ty.   tjjtjkr td| d t}| | Y S  t	yM   tjjtjkr< td| d | j|dd Y S w d}t
|| }d	 fd
d|D S )a  Decode an encoded byte `value` into a unicode string using `encodings`.

    Parameters
    ----------
    value : bytes
        The encoded byte string in the DICOM element value.
    encodings : list of str
        The encodings needed to decode the string as a list of Python
        encodings, converted from the encodings in (0008,0005) *Specific
        Character Set*.
    delimiters : set of int
        A set of characters or character codes, each of which resets the
        encoding in `value`.

    Returns
    -------
    str
        The decoded unicode string. If the value could not be decoded,
        and :attr:`~pydicom.config.settings.reading_validation_mode`
        is not ``RAISE``, a warning is issued, and `value` is
        decoded using the first encoding with replacement characters,
        resulting in data loss.

    Raises
    ------
    UnicodeDecodeError
        If :attr:`~pydicom.config.settings.reading_validation_mode`
        is ``RAISE`` and `value` could not be decoded with the given
        encodings.
    LookupError
        If :attr:`~pydicom.config.settings.reading_validation_mode`
        is ``RAISE`` and the given encodings are invalid.
    r   Unknown encoding '"' - using default encoding insteadz,Failed to decode byte string with encoding 'z2' - using replacement characters in decoded stringreplacerM   s   (^[^]+|[][^]*)r   c                    s   g | ]}t | qS r)   )_decode_fragment)r*   fragmentr_   r^   r)   r-   
<listcomp>u  s    z decode_bytes.<locals>.<listcomp>)rW   decodeLookupErrorr   settingsreading_validation_modeRAISEr   default_encodingUnicodeErrorrefindalljoin)r0   r^   r_   first_encodingregex	fragmentsr)   re   r-   decode_bytes*  s6   #
rt   byte_strc                 C   st   z|  trt| ||W S | |d W S  ty9   tjjtjkr# t	dd
| d | j|d dd Y S w )a,  Decode a byte string encoded with a single encoding.

    If `byte_str` starts with an escape sequence, the encoding corresponding
    to this sequence is used for decoding if present in `encodings`,
    otherwise the first value in encodings.
    If a delimiter occurs inside the string, it resets the encoding to the
    first encoding in case of single-byte encodings.

    Parameters
    ----------
    byte_str : bytes
        The encoded string to be decoded.
    encodings: list of str
        The list of Python encodings as converted from the values in the
        Specific Character Set tag.
    delimiters: set of int
        A set of characters or character codes, each of which resets the
        encoding in `byte_str`.

    Returns
    -------
    str
        The decoded unicode string. If the value could not be decoded,
        and :attr:`~pydicom.config.settings.reading_validation_mode` is not
        set to ``RAISE``, a warning is issued, and the value is
        decoded using the first encoding with replacement characters,
        resulting in data loss.

    Raises
    ------
    UnicodeDecodeError
        If :attr:`~pydicom.config.settings.reading_validation_mode` is set
        to ``RAISE`` and `value` could not be decoded with the given
        encodings.

    References
    ----------
    * DICOM Standard, Part 5,
      :dcm:`Sections 6.1.2.4<part05/chapter_6.html#sect_6.1.2.4>` and
      :dcm:`6.1.2.5<part05/chapter_6.html#sect_6.1.2.5>`
    * DICOM Standard, Part 3,
      :dcm:`Annex C.12.1.1.2<part03/sect_C.12.html#sect_C.12.1.1.2>`
    r   z-Failed to decode byte string with encodings: , z1 - using replacement characters in decoded stringrb   rM   )rU   rW   _decode_escaped_fragmentrg   rm   r   ri   rj   rk   r   rp   )ru   r^   r_   r)   r)   r-   rc   |  s   .
rc   c                    s   |  drdnd}t| d| d}||v s|tkrU|tv r$| |S | |d } t fddt| D d}|durP| d| || |d |d  S | |S d	}tj	j
tjkrbt|t| d
|d   | j|d ddS )zDecodes a byte string starting with an escape sequence.

    See `_decode_fragment` for parameter description and more information.
    )s   $(s   $)      Nr   c                 3   s     | ]\}}| v r|V  qd S Nr)   )r*   idxchr_   r)   r-   	<genexpr>  s    z+_decode_escaped_fragment.<locals>.<genexpr>r   z5Found unknown escape sequence in encoded string valuez - using encoding rb   rM   )rU   CODES_TO_ENCODINGSrY   rl   handled_encodingsrg   nextr>   r   ri   rj   rk   
ValueErrorr   )ru   r^   r_   
seq_lengthrX   indexrJ   r)   r}   r-   rw     s"   

rw   c              	   C   s   t |D ]5\}}z(t| |}|dkr |tvr t||d}|| }|tv r,|t|d 7 }|W   S  ty9   Y qw t|dkrPzt| |W S  tyO   Y nw t	j
jt	jkr^| |d  tdd| d t| |d ddS )	a  Encode a unicode string `value` into :class:`bytes` using `encodings`.

    Parameters
    ----------
    value : str
        The unicode string as presented to the user.
    encodings : list of str
        The encodings needed to encode the string as a list of Python
        encodings, converted from the encodings in (0008,0005) *Specific
        Character Set*.

    Returns
    -------
    bytes
        The encoded string. If `value` could not be encoded with any of
        the given encodings, and
        :attr:`~pydicom.config.settings.reading_validation_mode` is not
        ``RAISE``, a warning is issued, and `value` is encoded using
        the first encoding with replacement characters, resulting in data loss.

    Raises
    ------
    UnicodeEncodeError
        If  :attr:`~pydicom.config.settings.writing_validation_mode`
        is set to ``RAISE`` and `value` could not be encoded with the
        supplied encodings.
    r   rC   r5   z'Failed to encode value with encodings: rv   z1 - using replacement characters in encoded stringrb   rM   )r>   _encode_string_implr   r]   #need_tail_escape_sequence_encodingsrm   r<   _encode_string_partsr   r   ri   writing_validation_moderk   r:   r   rp   )r0   r^   rG   rX   rC   escape_sequencer)   r)   r-   encode_string  s4   

r   c           	      C   s   t  }| }t}|red}|D ].}zt|| |}t|}W  n ttfy; } z|j|kr1|j}|}W Y d}~qd}~ww |dkrDtdt|d| |}|tvrY|t	||d7 }||7 }||d }|s	|t
v rq|t	|d 7 }t|S )at  Convert a unicode string into a byte string using the given
    list of encodings.
    This is invoked if `encode_string` failed to encode `value` with a single
    encoding. We try instead to use different encodings for different parts
    of the string, using the encoding that can encode the longest part of
    the rest of the string as we go along.

    Parameters
    ----------
    value : str
        The unicode string as presented to the user.
    encodings : list of str
        The encodings needed to encode the string as a list of Python
        encodings, converted from the encodings in Specific Character Set.

    Returns
    -------
    bytes
        The encoded string, including the escape sequences needed to switch
        between different encodings.

    Raises
    ------
    ValueError
        If `value` could not be encoded with the given encodings.

    r   Nz:None of the given encodings can encode the first characterr   )	bytearrayrl   r   r<   UnicodeDecodeErrorr;   r?   r   r   r]   r   bytes)	r0   r^   rC   unencoded_partbest_encoding	max_indexrX   errencoded_partr)   r)   r-   r   $  s@   

!r   c                 C   s&   |t v rt | | |dS | j||dS )zConvert a unicode string into a byte string.

    If given encoding is in `custom_encoders`, use a corresponding
    `custom_encoder`. If given encoding is not in `custom_encoders`, use a
    corresponding python handled encoder.
    rM   )custom_encodersr:   )r0   rX   r1   r)   r)   r-   r   j  s   r   c              	   C   s   | pdg} t | tr| g} n| dd } | d sd| d< g }| D ]}z	|t|  W q  ty;   |t| Y q w t| dkrGt| |}|S )a}  Convert DICOM `encodings` into corresponding Python encodings.

    Handles some common spelling mistakes and issues a warning in this case.

    Handles stand-alone encodings: if they are the first encodings,
    additional encodings are ignored, if they are not the first encoding,
    they are ignored. In both cases, a warning is issued.

    Invalid encodings are replaced with the default encoding with a
    respective warning issued, if
    :attr:`~pydicom.config.settings.reading_validation_mode` is
    ``WARN``, or an exception is raised if it is set to
    ``RAISE``.

    Parameters
    ----------
    encodings : str or list of str
        The encoding or list of encodings as read from (0008,0005)
        *Specific Character Set*.

    Returns
    -------
    list of str
        A :class:`list` of Python encodings corresponding to the DICOM
        encodings. If an encoding is already a Python encoding, it is returned
        unchanged. Encodings with common spelling errors are replaced by the
        correct encoding, and invalid encodings are replaced with the default
        encoding if :attr:`~pydicom.config.settings.reading_validation_mode`
        is not set to ``RAISE``.

    Raises
    ------
    LookupError
        If `encodings` contains a value that could not be converted and
        :attr:`~pydicom.config.settings.reading_validation_mode` is
        ``RAISE``.
    r   Nr   r   r5   )
isinstancestrappendrT   KeyError'_python_encoding_for_corrected_encodingr<   $_handle_illegal_standalone_encodings)r^   py_encodingsrX   r)   r)   r-   convert_encodings  s    
'

r   c                 C   s   d}t d| durd| dd  }nt d| dur#d| dd  }|rAzt| }t| | |W S  ty@   t|  t Y S w zt|  | W S  tyX   t|  t Y S w )a  Try to replace the given invalid encoding with a valid encoding by
    checking for common spelling errors, and return the correct Python
    encoding for that encoding. Otherwise check if the
    encoding is already a valid Python encoding, and return that. If both
    attempts fail, return the default encoding.
    Issue a warning for the invalid encoding except for the case where it is
    already converted.
    Nz
^ISO[^_]IRISO_IR   z!^(?=ISO.2022.IR.)(?!ISO 2022 IR )zISO 2022 IR    )	rn   matchrT   _warn_about_invalid_encodingr   rl   r8   lookuprh   )rX   patchedpy_encodingr)   r)   r-   r     s*   


r   patched_encodingc                 C   sV   |du rt jjt jkrtd|  dd|  d}n	d|  d| d}t|dd dS )	a?  Issue a warning for the given invalid encoding.
    If patched_encoding is given, it is mentioned as the
    replacement encoding, other the default encoding.
    If no replacement encoding is given, and
    :attr:`~pydicom.config.settings.reading_validation_mode` is set to
    ``RAISE``, `LookupError` is raised.
    Nr`   'ra   z,Incorrect value for Specific Character Set 'z' - assuming '   
stacklevel)r   ri   rj   rk   rh   r   )rX   r   rJ   r)   r)   r-   r     s   
r   r   c              	   C   s   | d t v r!td| d  dd| dd  dd |dd S ttt| dd D ]\}}|t v rDtd| d	dd ||d = q-|S )
zCheck for stand-alone encodings in multi-valued encodings.
    If the first encoding is a stand-alone encoding, the rest of the
    encodings is removed. If any other encoding is a stand-alone encoding,
    it is removed from the encodings.
    r   zValue 'zG' for Specific Character Set does not allow code extensions, ignoring: rv   r5   Nr   r   z/' cannot be used as code extension, ignoring it)STAND_ALONE_ENCODINGSr   rp   reversedlistr>   )r^   r   rG   rX   r)   r)   r-   r     s"    

r   elemr   dicom_character_setc                    s   | j rdS |s
dg}t| | jtjkr2| jdkr%tt| j | _dS  fdd| jD | _dS | jt	v rp| jdkrNt
| jtrDdS t| j t| _dS t }| jD ]}t
|tra|| qT|t| t qT|| _dS dS )a	  Apply the DICOM character encoding to a data element

    Parameters
    ----------
    elem : dataelem.DataElement
        The :class:`DataElement<pydicom.dataelem.DataElement>` instance
        containing an encoded byte string value to decode.
    dicom_character_set : str or list of str or None
        The value of (0008,0005) *Specific Character Set*, which may be a
        single value, a multiple value (code extension), or may also be ``''``
        or ``None``, in which case ``'ISO_IR 6'`` will be used.
    Nr   r5   c                    s   g | ]
}t t| qS r)   )r   r	   rg   )r*   vvr^   r)   r-   rf   9  s    z"decode_element.<locals>.<listcomp>)is_emptyr   r
   PNVMr   r	   r0   rg   r   r   r   rt   r   r   r   )r   r   outputr0   r)   r   r-   decode_element  s*   





r   )r/   rz   )4__doc__r8   rn   typingr   r   collections.abcr   r   pydicomr   pydicom.miscr   pydicom.valuerepr   r	   r
   r   pydicom.dataelemr   rl   rT   r   rW   r   itemsrV   r   r   r   rK   rQ   rS   rO   r]   r   r   setintrt   decode_stringrc   rw   r   r   r   r   r   r   r   r   r   r)   r)   r)   r-   <module>   s  	
 !"#$*	
G
D
%"O
>
+?F">%

