U
    ߦwhI                     @   s  U d dl Z d dl mZ d dl mZ d dlmZ d dlmZmZm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZ dd	lmZmZmZmZmZmZ eeee
eeeeeeef f eeef ef d
ddZeee
eeeeeeef f eeef f dddZdde e!ddi fZ"e
eeeeeeef f eeef f e#d< dddddddddddddddddddZ$eeef e#d< ddddddddddddddddddd Z%eeef e#d!< ee
eeeeef f eeef f d"d#d$Z&eeeeeef f d"d%d&Z'ee
eeef e	e f d"d'd(Z(eeeeeef f eeef ed)d*d+Z)ee*d"d,d-Z+e*e,e,ede
eef f eeef e	e e
e,e,ede
eef f f d.d/d0Z-e*eeef e	e ede
eef f ede
eef f d1d2d3Z.e*eeef e	e dd4d5d6Z/eeeeef d7d8d9Z0eeef eed:d;d<Z1eeef eed=d>d?Z2eeeef e	e e
eeef e	e f d@dAdBZ3dS )C    N)Error)	unhexlify)ceil)AnyDictListTupleUnioncast   )adobe_glyphscharset_encoding)logger_errorlogger_warning)ArrayObjectDecodedStreamObjectDictionaryObject
NullObjectStreamObjectis_null_or_none)	font_namespace_widthobjreturnc                 C   s0   |d d |  }t ||\}}}}|||||fS )a  
    Determine information about a font.

    Args:
        font_name: font name as a string
        space_width: default space width if no data is found.
        obj: XObject or Page where you can find a /Resource dictionary

    Returns:
        Font sub-type, space_width criteria (50% of width), encoding, map character-map, font-dictionary.
        The font-dictionary itself is suitable for the curious.

    z
/Resourcesz/Font)build_char_map_from_dict)r   r   r   ftZfont_subtypeZfont_halfspaceZfont_encodingZfont_map r   //tmp/pip-unpacked-wheel-r8zeli8p/pypdf/_cmap.pybuild_char_map   s     r   )r   r   r   c                 C   sR   t t|d  }t|\}}td||}t|| d }t||d }||||fS )ak  
    Determine information about a font.

    Args:
        space_width: default space with if no data found
             (normally half the width of a character).
        ft: Font Dictionary

    Returns:
        Font sub-type, space_width criteria(50% of width), encoding, map character-map.
        The font-dictionary itself is suitable for the curious.

    /Subtype        @)r
   str
get_objectget_encodingget_actual_str_keybuild_font_width_mapcompute_space_width)r   r   Z	font_typeencodingmap_dictZspace_key_charfont_width_mapZhalf_space_widthr   r   r   r   +   s    r   Unknowni'     u   �unknown_char_map	utf-16-begbkgb2312gb18030cp950)z/Identity-Hz/Identity-Vz	/GB-EUC-Hz	/GB-EUC-Vz/GBpc-EUC-Hz/GBpc-EUC-Vz
/GBK-EUC-Hz
/GBK-EUC-Vz/GBK2K-Hz/GBK2K-Vz
/ETen-B5-Hz
/ETen-B5-Vz/ETenms-B5-Hz/ETenms-B5-Vz/UniCNS-UTF16-Hz/UniCNS-UTF16-Vz/UniGB-UTF16-Hz/UniGB-UTF16-V_predefined_cmapiX  i        )z/Courierz/Courier-Boldz/Courier-BoldObliquez/Courier-Obliquez
/Helveticaz/Helvetica-Boldz/Helvetica-BoldObliquez/Helvetica-Obliquez/Helvetica-Narrowz/Helvetica-NarrowBoldz/Helvetica-NarrowBoldObliquez/Helvetica-NarrowObliquez/Times-Romanz/Times-Boldz/Times-BoldItalicz/Times-Italicz/Symbolz/ZapfDingbats_default_fonts_space_width)r   r   c                 C   sD   t | }t| \}}t|tr<|D ]}|dkr"t|||< q"||fS )N   )_parse_encoding_parse_to_unicode
isinstancedictchr)r   r(   r)   	int_entryxr   r   r   r$      s    
r$   c              	   C   s  g }d| krPd| krHt t| d tkrHtttdtt t| d  }nd}|S t ttttf | d 	 }t
|trzB|tkrt|  }n(|tkrt| }nd|krd}ntdW n* tk
r   td| d	t |}Y nX ntt
|trNd
|krNztt t|d
   }W n4 tk
rJ   td| d	t td  }Y nX ntd  }t
|trd|krd}t t|d D ]^}t
|tr|}nFz|t|k rt| ||< W n tk
r   |||< Y nX |d7 }qt
|tr tttd|}|S )Nz	/Encoding	/BaseFontr,   charmapz-UCS2-r.   z	not foundzAdvanced encoding z not implemented yetz/BaseEncodingz/StandardEncodingz/Differencesr   r   )r
   r"   r   r;   zipranger	   r   r   r#   r:   copyr3   	Exceptionr   __name__intlenr   list)r   r(   encr>   or   r   r   r8      s`     



r8   c                 C   s~   i }g }d| kr4|  dddkr,t| ||S i g fS d}d}d }t| }|dD ]"}t|d|||||\}}}qR||fS )N
/ToUnicoder    z/Type1F   
s    	)get_type1_alternative
prepare_cmsplitprocess_cm_linestrip)r   r)   r=   
process_rgprocess_charmultiline_rgcmliner   r   r   r9      s*    	r9   )
value_charr(   r)   r   c                    sH   i }t |tr& fdd| D }n fdd| D }|  S )Nc                    s"   i | ]\}}| kr|t |qS r   )r<   .0keyvaluerY   r   r   
<dictcomp>   s       z&get_actual_str_key.<locals>.<dictcomp>c                    s   i | ]\}}| kr||qS r   r   rZ   r^   r   r   r_      s       )r:   r;   itemsrN   )rY   r(   r)   Zkey_dictr   r^   r   r%      s
    
r%   c                 C   s  | d }t |tr&tt| d  }nd}t |tr<| }| ddddddd	d
dddd}|	d}t
t|D ]`}|| d}|dkr|dkrd}n|| d | dd}|d || |d d   ||< qd|dddddd}|S )NrK   s,   beginbfrange
<0000> <0001> <0000>
endbfrange   beginbfchars   
beginbfchar
	   endbfchars   
endbfchar
   beginbfranges   
beginbfrange

   endbfranges   
endbfrange
s   <<s   
{
s   >>s   
}
   <   >r      .        r      [s    [    ]s    ]
    rM   )r:   r   r
   r   get_datar"   encoderS   replacerQ   rB   rG   findjoin)r   ZturW   Zllijcontentr   r   r   rP      s\    

      	
"
   rP   )rX   rT   rU   rV   r)   r=   r   c              
   C   s   | dks| d dkr|||fS |  dd} d| kr8d}nd| krFd	}nd
| krTd}nrd| krbd	}nd|rzt| |||}W q tjk
r } ztd| d| t W 5 d }~X Y qX n|rt| || |||fS )Nri   r   %      	rh   rc   Trd   Fra   rb   zSkipping broken line z: )ro   parse_bfrangebinasciir   r   rE   parse_bfchar)rX   rT   rU   rV   r)   r=   errorr   r   r   rR   #  s&    
*rR   )rX   r)   r=   rV   r   c                 C   s  dd |  dD }d}|d k	rd|d d  }|d }|d	 }|D ]Z}	|	d
krXd} qt|	dd|t|| |d d	krdndd< || |d	7 }qDn`t|d d}t|d	 d}tt|d t|d	 }
t|
d |d< d|d d  }|d dkrz|dd  D ]b}	|	d
kr,d} qt|	dd|t|| |d d	krZdndd< || |d	7 }qnt|d d}dtdt|d  }d}||krt|| dd|t|| |d d	krdndd< || |d	7 }|d	7 }q|rd S ||fS )Nc                 S   s   g | ]}|r|qS r   r   r[   r>   r   r   r   
<listcomp>F  s      z!parse_bfrange.<locals>.<listcomp>rh   Fs   %%0%dX   r   r   rk   Tr.   surrogatepassr@      rj         )rQ   r   decodeappendrF   maxrG   r   )rX   r)   r=   rV   lstZclosure_foundfmtabsqZnbicZfmt2r   r   r   rw   @  sj    




rw   )rX   r)   r=   r   c              
   C   s   dd |  dD }t|d d |d< t|dkrd}|d d	krz,t|d t|d d
k rfdndd}W n> tk
r } z td|d|d dt W 5 d }~X Y nX ||t|d |d dkrdndd< |t|d d |dd  }q(d S )Nc                 S   s   g | ]}|r|qS r   r   r{   r   r   r   r|   }  s      z parse_bfchar.<locals>.<listcomp>rh   r   r~   r}   r   rL   rg   r   r@   r.   r   zGot invalid hex string: z ()r   )	rQ   rG   r   r   BinasciiErrorr   rE   r   rF   )rX   r)   r=   r   Zmap_to	exceptionr   r   r   ry   |  s*     . ry   )r   default_font_widthr   c              
   C   s  i }d}d}zt tt| d   d }W n tk
r>   Y nX d| kr| d d  }d|krztt|d  |d< n||d< d|kr|d  }ng }t|dkrt|d tr|d n
|d  }|d  }t|trT|}|d	  }t|ttfs$t	d
| dt
 |dd  }qt||d D ]}	||t|	< q2|dd  }qt|tr|}	|D ]"}
|
 }||t|	< |	d7 }	qh|d	d  }qt	d|d   t
 qqnd| krtt| d  }d| kr
dtt| d kr
| d d  |d< nHd}d}|D ](}| }|dkr||7 }|d7 }q|td| |d< tt| d }tt| d }t||d D ]B}	z ||	|   }||t|	< W n ttfk
r   Y nX q|t|dr|r|nd|d< |S )Nr   r?   r!   z/DescendantFontsz/DWdefaultz/Wr   r~   z&Expected numeric value for width, got z. Ignoring it.r   zunknown widths : 
z/Widths/FontDescriptorz/MissingWidthz
/FirstCharz	/LastChar        )r6   r
   r"   r#   KeyErrorfloatrG   r:   rF   r   rE   rB   r<   rH   __repr__r   r   r   
IndexErrorr   rN   )r   r   r*   stenZft1wsecondwidthZc_codeZwwmZcptZxxr   r   r   r&     s    
"

 

r&   )r*   
space_charr   c              	   C   sF   z| | }|dkrt dW n$ tt fk
r@   | d d }Y nX |S )Nr   z
Zero widthr   r!   )
ValueErrorr   )r*   r   Zsp_widthr   r   r   r'     s    
r'   )r*   charr   c                 C   s2   d}z| | }W n t k
r,   | d }Y nX |S )Nr   r   )r   )r*   r   Z
char_widthr   r   r   compute_font_width  s    r   )r   r)   r=   r   c           
      C   s  d| kr||fS t t| d d}t|r4||fS |d k	sDtd|  }|dd }|dd }|dd	d	}|D ]}|	d
rdd |dD }t
|dkr|d dkrqzt|d }W n tk
r   Y qY nX zt|d   }	W nh tk
rd   |d 	dr\ztt|d dd  d}	W n tk
rX   Y Y qY nX nY qY nX |	|t|< || q||fS )Nr   z	/FontFileZmypys   eexec
r   s	   /Encodingr   rl   rM   s   dupc                 S   s   g | ]}|d kr|qS )ri   r   )r[   _wr   r   r   r|     s      z&_type1_alternative.<locals>.<listcomp>rh   r   s   putr~   s   /unir   r   )r
   r   rN   r   AssertionErrorr#   rm   rQ   ro   
startswithrG   rF   r   r   r   r   r<   r   )
r   r)   r=   Zft_desctxtlinesliwordsrr   vr   r   r   rO     s>    


rO   )4rx   r   r   r   mathr   typingr   r   r   r   r	   r
   _codecsr   r   _utilsr   r   Zgenericr   r   r   r   r   r   r"   r   rF   r   r   r;   fromkeysrB   r-   __annotations__r3   r6   r$   r8   r9   r%   bytesrP   boolrR   rw   ry   r&   r'   r   rO   r   r   r   r   <module>   s        ( &"0	"<"  
*

<  
S
 

