U
    ߦwh'                     @   s\   d Z ddlZddlmZmZ ddlmZmZ ddlm	Z	 G dd deZ
G dd	 d	e
ZdS )
z
This module is for codecs only.

While the codec implementation can contain details of the PDF specification,
the module should not do any PDF parsing.
    N)ABCabstractmethod)DictList)logger_warningc                   @   s8   e Zd ZdZeeedddZeeedddZdS )Codecz#Abstract base class for all codecs.datareturnc                 C   s   dS )z
        Encode the input data.

        Args:
            data: Data to encode.

        Returns:
            Encoded data.

        N selfr	   r   r   9/tmp/pip-unpacked-wheel-r8zeli8p/pypdf/_codecs/_codecs.pyencode   s    zCodec.encodec                 C   s   dS )z
        Decode the input data.

        Args:
            data: Data to decode.

        Returns:
            Decoded data.

        Nr   r   r   r   r   decode   s    zCodec.decodeN)__name__
__module____qualname____doc__r   bytesr   r   r   r   r   r   r      s
   r   c                   @   s   e Zd ZdZdZdZdZdZdddd	Zddd
dZ	e
e
dddZee e
dddZddddZe
edddZe
e
dddZe
eddddZdS )LzwCodecz2Lempel-Ziv-Welch (LZW) adaptive compression codec.   i  	      N)r
   c                 C   s<   dd t dD | _| jd | _| j| _d| j> d | _dS )z>Initialize the encoding table and state to initial conditions.c                 S   s   i | ]}t |g|qS r   r   .0ir   r   r   
<dictcomp>7   s      z7LzwCodec._initialize_encoding_table.<locals>.<dictcomp>r      N)rangeencoding_table
EOD_MARKER	next_codeINITIAL_BITS_PER_CODEbits_per_codemax_code_valuer   r   r   r   _initialize_encoding_table5   s    z#LzwCodec._initialize_encoding_tablec                 C   sH   |  j d7  _ | j | jkrD| j| jk rD|  jd7  _d| j> d | _dS )z5Update bits_per_code and max_code_value if necessary.r   N)r#   r&   r%   MAX_BITS_PER_CODEr'   r   r   r   _increase_next_code<   s    

zLzwCodec._increase_next_coder   c                 C   s   g }| | j |   d}|D ]z}|t|g }|| jkrB|}q | | j|  | jd| j> d kr|| j| j|< |   n| | j |   t|g}q |r| | j|  | | j | 	|S )z
        Encode data using the LZW compression algorithm.

        Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
            r   )
appendCLEAR_TABLE_MARKERr(   r   r!   r#   r)   r*   r"   _pack_codes_into_bytes)r   r	   Zresult_codesZcurrent_sequencebyteZnext_sequencer   r   r   r   F   s&    

zLzwCodec.encode)codesr
   c                 C   s   |    d}d}t }|D ]j}|| j> |B }|| j7 }|dkrZ|d8 }|||? d@  q6|| jkrn|    q|| jkr|qq|   q|dkr||d| > d@  t|S )z
        Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
        The bit-width starts at 9 bits and expands as needed.
        r         )r(   	bytearrayr%   r,   r-   r"   r*   r   )r   r0   bufferZbits_in_bufferoutputcoder   r   r   r.   p   s$    




zLzwCodec._pack_codes_into_bytesc                 C   sR   d| j > d | _dd t| jD dg| j| j d   | _| jd | _d| _d S )Nr   c                 S   s   g | ]}t |gqS r   r   r   r   r   r   
<listcomp>   s     z7LzwCodec._initialize_decoding_table.<locals>.<listcomp>r+   r   )r)   r&   r    r-   decoding_tabler"   _table_index_bits_to_getr'   r   r   r   _initialize_decoding_table   s    z#LzwCodec._initialize_decoding_tablec                 C   s   |  z| j | jk rF| jd> || j B | _|  jd7  _|  j d7  _ q| j| j | j ? | j| jd  @ }|  j | j8  _ | jd@ | _|W S  tk
r   | j Y S X d S )Nr1   r   r   i )
_next_bitsr:   
_next_data_byte_pointer
_and_table
IndexErrorr"   )r   r	   r6   r   r   r   _next_code_decode   s     zLzwCodec._next_code_decodec                 C   s:  ddddg| _ d| _d| _d| _d| _d| _t }|   d| _d| _d| _| j	}| 
|}|| jkrlq2|| j	kr|   | 
|}|| jkrq2|| j|  |}qT|| jk r| j| }|| || j	kr| | j| |d  |}qT| j| | j| dd  }|| | | j| |d  |}qT| S )	z
        The following code was converted to Python from the following code:
        https://github.com/empira/PDFsharp/blob/master/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
              i  r   r   Nr   )r?   r9   r:   r>   r=   r<   ioBytesIOr;   r-   rA   r"   writer8   _add_entry_decodegetvalue)r   r	   Zoutput_streamZold_coder6   stringr   r   r   r      sD    









zLzwCodec.decode)
old_stringnew_charr
   c                 C   sz   |t |g }| j| jkr(tdt d S || j| j< |  jd7  _| jdkrTd| _n"| jdkrfd| _n| jdkrvd| _d S )	Nz#Ignoring too large LZW table index.r   rB   
   rC      rD   r   )r   r9   r&   r   r   r8   r:   )r   rK   rL   Z
new_stringr   r   r   rH     s    



zLzwCodec._add_entry_decode)r   r   r   r   r-   r"   r$   r)   r(   r*   r   r   r   intr.   r;   rA   r   rH   r   r   r   r   r   -   s   
*"61r   )r   rE   abcr   r   typingr   r   Zpypdf._utilsr   r   r   r   r   r   r   <module>   s   