21
21
v1.1, 1993. ISBN 0-201-57044-0.
22
22
"""
23
23
24
+ import base64
24
25
import binascii
25
26
import functools
27
+ import itertools
26
28
import logging
27
29
import re
28
30
import string
36
38
_log = logging .getLogger (__name__ )
37
39
38
40
41
+ def _make_tag (set ):
42
+ """
43
+ Hash set into a six-character tag made of uppercase letters
44
+
45
+ Useful for adding a tag into subsetted fonts while keeping the code
46
+ reproducible. The function always returns the same value for the
47
+ same set on the same exact Python version but is not guaranteed to
48
+ not have collisions.
49
+
50
+ Parameters
51
+ ----------
52
+ set : iterable
53
+ The set of glyphs present in a font subset
54
+
55
+ Returns
56
+ -------
57
+ str
58
+ Six uppercase ASCII letters and a plus sign
59
+ """
60
+
61
+ # freeze the set to make it hashable, interpret the hash as bytes
62
+ array = struct .pack ("@q" , hash (frozenset (set )))
63
+ # turn the bytes into characters with b32encode, which uses uppercase
64
+ # letters and numbers from 2 to 7 - remap those arbitrarily
65
+ trans = str .maketrans ('234567' , 'MTPLIB' , '=' )
66
+ return (base64 .b32encode (array ).decode ('ascii' )
67
+ .translate (trans )[:6 ] + '+' )
68
+
69
+
39
70
class _Token :
40
71
"""
41
72
A token in a PostScript stream
@@ -627,8 +658,7 @@ def _parse_subrs(self, tokens, _data):
627
658
628
659
return array , next (tokens ).endpos ()
629
660
630
- @staticmethod
631
- def _parse_charstrings (tokens , _data ):
661
+ def _parse_charstrings (self , tokens , _data ):
632
662
count_token = next (tokens )
633
663
if not count_token .is_number ():
634
664
raise RuntimeError (
@@ -650,7 +680,12 @@ def _parse_charstrings(tokens, _data):
650
680
f"Token following /{ glyphname } in CharStrings definition "
651
681
f"must be a number, was { nbytes_token } "
652
682
)
653
- next (tokens ) # usually RD or |-
683
+ token = next (tokens )
684
+ if not token .is_keyword (self ._abbr ['RD' ]):
685
+ raise RuntimeError (
686
+ "Token preceding charstring must be {self._abbr['RD']}, "
687
+ f"was { token } "
688
+ )
654
689
binary_token = tokens .send (1 + nbytes_token .value ())
655
690
charstrings [glyphname ] = binary_token .value ()
656
691
@@ -681,16 +716,15 @@ def _parse_encoding(tokens, _data):
681
716
continue
682
717
encoding [index_token .value ()] = name_token .value ()
683
718
684
- @staticmethod
685
- def _parse_othersubrs (tokens , data ):
719
+ def _parse_othersubrs (self , tokens , data ):
686
720
init_pos = None
687
721
while True :
688
722
token = next (tokens )
689
723
if init_pos is None :
690
724
init_pos = token .pos
691
725
if token .is_delim ():
692
726
_expression (token , tokens , data )
693
- elif token .is_keyword ('def' , 'ND' , '|-' ):
727
+ elif token .is_keyword ('def' , self . _abbr [ 'ND' ] ):
694
728
return data [init_pos :token .endpos ()], token .endpos ()
695
729
696
730
def transform (self , effects ):
@@ -745,7 +779,7 @@ def transform(self, effects):
745
779
fontmatrix = (
746
780
'[%s]' % ' ' .join (_format_approx (x , 6 ) for x in array )
747
781
)
748
- replacements = (
782
+ newparts = self . _replace (
749
783
[(x , '/FontName/%s def' % fontname )
750
784
for x in self ._pos ['FontName' ]]
751
785
+ [(x , '/ItalicAngle %a def' % italicangle )
@@ -755,11 +789,40 @@ def transform(self, effects):
755
789
+ [(x , '' ) for x in self ._pos .get ('UniqueID' , [])]
756
790
)
757
791
792
+ return Type1Font ((
793
+ newparts [0 ],
794
+ self ._encrypt (newparts [1 ], 'eexec' ),
795
+ self .parts [2 ]
796
+ ))
797
+
798
+ def _replace (self , replacements ):
799
+ """
800
+ Change the font according to `replacements`
801
+
802
+ Parameters
803
+ ----------
804
+ replacements : list of ((int, int), str)
805
+ Each element is ((pos0, pos1), replacement) where pos0 and
806
+ pos1 are indices to the original font data (parts[0] and the
807
+ decrypted part concatenated). The data in the interval
808
+ pos0:pos1 will be replaced by the replacement text. To
809
+ accommodate binary data, the replacement is taken to be in
810
+ Latin-1 encoding.
811
+
812
+ The case where pos0 is inside parts[0] and pos1 inside
813
+ the decrypted part is not supported.
814
+
815
+ Returns
816
+ -------
817
+ (bytes, bytes)
818
+ The new parts[0] and decrypted part (which needs to be
819
+ encrypted in the transformed font).
820
+ """
758
821
data = bytearray (self .parts [0 ])
759
822
data .extend (self .decrypted )
760
823
len0 = len (self .parts [0 ])
761
824
for (pos0 , pos1 ), value in sorted (replacements , reverse = True ):
762
- data [pos0 :pos1 ] = value .encode ('ascii' , 'replace ' )
825
+ data [pos0 :pos1 ] = value .encode ('latin-1 ' )
763
826
if pos0 < len (self .parts [0 ]):
764
827
if pos1 >= len (self .parts [0 ]):
765
828
raise RuntimeError (
@@ -769,12 +832,211 @@ def transform(self, effects):
769
832
len0 += len (value ) - pos1 + pos0
770
833
771
834
data = bytes (data )
835
+ return data [:len0 ], data [len0 :]
836
+
837
+ def subset (self , characters ):
838
+ """
839
+ Return a new font that only defines the given characters.
840
+
841
+ Parameters
842
+ ----------
843
+ characters : sequence of bytes
844
+ The subset of characters to include
845
+
846
+ Returns
847
+ -------
848
+ `Type1Font`
849
+ """
850
+
851
+ characters = set (characters )
852
+ encoding = {code : glyph
853
+ for code , glyph in self .prop ['Encoding' ].items ()
854
+ if code in characters }
855
+ encoding [0 ] = '.notdef'
856
+ # todo and done include strings (glyph names)
857
+ todo = set (encoding .values ())
858
+ done = set ()
859
+ seen_subrs = {0 , 1 , 2 , 3 }
860
+ while todo - done :
861
+ glyph = next (iter (todo - done ))
862
+ called_glyphs , called_subrs , _ , _ = self ._simulate (glyph , [], [])
863
+ todo .update (called_glyphs )
864
+ seen_subrs .update (called_subrs )
865
+ done .add (glyph )
866
+
867
+ fontname = _make_tag (todo ) + self .prop ['FontName' ]
868
+ charstrings = self ._subset_charstrings (todo )
869
+ subrs = self ._subset_subrs (seen_subrs )
870
+ newparts = self ._replace (
871
+ [(x , '/FontName/%s def' % fontname )
872
+ for x in self ._pos ['FontName' ]]
873
+ + [(self ._pos ['CharStrings' ][0 ], charstrings ),
874
+ (self ._pos ['Subrs' ][0 ], subrs ),
875
+ (self ._pos ['Encoding' ][0 ], self ._subset_encoding (encoding ))
876
+ ] + [(x , '' ) for x in self ._pos .get ('UniqueID' , [])]
877
+ )
772
878
return Type1Font ((
773
- data [: len0 ],
774
- self ._encrypt (data [ len0 : ], 'eexec' ),
879
+ newparts [ 0 ],
880
+ self ._encrypt (newparts [ 1 ], 'eexec' ),
775
881
self .parts [2 ]
776
882
))
777
883
884
+ @staticmethod
885
+ def _charstring_tokens (data ):
886
+ data = iter (data )
887
+ for byte in data :
888
+ if 32 <= byte <= 246 :
889
+ yield byte - 139
890
+ elif 247 <= byte <= 250 :
891
+ byte2 = next (data )
892
+ yield (byte - 247 ) * 256 + byte2 + 108
893
+ elif 251 <= byte <= 254 :
894
+ byte2 = next (data )
895
+ yield - (byte - 251 )* 256 - byte2 - 108
896
+ elif byte == 255 :
897
+ bs = itertools .islice (data , 4 )
898
+ yield struct .unpack ('>i' , bs )[0 ]
899
+ elif byte == 12 :
900
+ byte1 = next (data )
901
+ yield {
902
+ 0 : 'dotsection' ,
903
+ 1 : 'vstem3' ,
904
+ 2 : 'hstem3' ,
905
+ 6 : 'seac' ,
906
+ 7 : 'sbw' ,
907
+ 12 : 'div' ,
908
+ 16 : 'callothersubr' ,
909
+ 17 : 'pop' ,
910
+ 33 : 'setcurrentpoint'
911
+ }[byte1 ]
912
+ else :
913
+ yield {
914
+ 1 : 'hstem' ,
915
+ 3 : 'vstem' ,
916
+ 4 : 'vmoveto' ,
917
+ 5 : 'rlineto' ,
918
+ 6 : 'hlineto' ,
919
+ 7 : 'vlineto' ,
920
+ 8 : 'rrcurveto' ,
921
+ 9 : 'closepath' ,
922
+ 10 : 'callsubr' ,
923
+ 11 : 'return' ,
924
+ 13 : 'hsbw' ,
925
+ 14 : 'endchar' ,
926
+ 21 : 'rmoveto' ,
927
+ 22 : 'hmoveto' ,
928
+ 30 : 'vhcurveto' ,
929
+ 31 : 'hvcurveto'
930
+ }[byte ]
931
+
932
+ def _step (self , buildchar_stack , postscript_stack , opcode ):
933
+ if isinstance (opcode , int ):
934
+ return set (), set (), buildchar_stack + [opcode ], postscript_stack
935
+ elif opcode in {
936
+ 'hsbw' , 'sbw' , 'closepath' , 'hlineto' , 'hmoveto' , 'hcurveto' ,
937
+ 'hvcurveto' , 'rlineto' , 'rmoveto' , 'rrcurveto' , 'vhcurveto' ,
938
+ 'vlineto' , 'vmoveto' , 'dotsection' , 'hstem' , 'hstem3' , 'vstem' ,
939
+ 'vstem3' , 'setcurrentpoint'
940
+ }:
941
+ return set (), set (), [], postscript_stack
942
+ elif opcode == 'seac' :
943
+ codes = buildchar_stack [3 :5 ]
944
+ glyphs = [self .prop ['Encoding' ][x ] for x in codes ]
945
+ return set (glyphs ), set (), [], postscript_stack
946
+ elif opcode == 'div' :
947
+ num1 , num2 = buildchar_stack [- 2 :]
948
+ return (
949
+ set (),
950
+ set (),
951
+ buildchar_stack [- 2 :] + [num1 / num2 ], postscript_stack
952
+ )
953
+ elif opcode == 'callothersubr' :
954
+ othersubr = buildchar_stack [- 1 ]
955
+ n = buildchar_stack [- 2 ]
956
+ args = buildchar_stack [- 2 - n :- 2 ]
957
+ if othersubr == 3 : # Section 8.1 in Type-1 spec
958
+ postscript_stack .append (args [0 ])
959
+ else :
960
+ postscript_stack .extend (args [::- 1 ])
961
+ return set (), set (), buildchar_stack [:- n - 2 ], postscript_stack
962
+ elif opcode == 'callsubr' :
963
+ subr = buildchar_stack [- 1 ]
964
+ glyphs , subrs , new_bc_stack , new_ps_stack = \
965
+ self ._simulate (subr , buildchar_stack [:- 1 ], postscript_stack )
966
+ return set (), subrs | {subr }, new_bc_stack , new_ps_stack
967
+ elif opcode == 'pop' :
968
+ return (
969
+ set (),
970
+ set (),
971
+ buildchar_stack + [postscript_stack [- 1 ]], postscript_stack [:- 1 ]
972
+ )
973
+ else :
974
+ raise RuntimeError (f'opcode { opcode } ' )
975
+
976
+ def _simulate (self , glyph_or_subr , buildchar_stack , postscript_stack ):
977
+ if isinstance (glyph_or_subr , str ):
978
+ program = self .prop ['CharStrings' ][glyph_or_subr ]
979
+ glyphs = {glyph_or_subr }
980
+ subrs = set ()
981
+ else :
982
+ program = self .prop ['Subrs' ][glyph_or_subr ]
983
+ glyphs = set ()
984
+ subrs = {glyph_or_subr }
985
+ for opcode in self ._charstring_tokens (program ):
986
+ if opcode in ('return' , 'endchar' ):
987
+ return glyphs , subrs , buildchar_stack , postscript_stack
988
+ newglyphs , newsubrs , buildchar_stack , postscript_stack = \
989
+ self ._step (buildchar_stack , postscript_stack , opcode )
990
+ glyphs .update (newglyphs )
991
+ subrs .update (newsubrs )
992
+
993
+ def _subset_encoding (self , encoding ):
994
+ result = [
995
+ '/Encoding 256 array\n 0 1 255 { 1 index exch /.notdef put } for'
996
+ ]
997
+ result .extend (
998
+ f'dup { i } /{ glyph } put'
999
+ for i , glyph in sorted (encoding .items ())
1000
+ if glyph != '.notdef'
1001
+ )
1002
+ result .extend ('readonly def\n ' )
1003
+ return '\n ' .join (result )
1004
+
1005
+ def _subset_charstrings (self , glyphs ):
1006
+ result = [f'/CharStrings { len (glyphs )} dict dup begin' ]
1007
+ encrypted = [self ._encrypt (self .prop ['CharStrings' ][glyph ],
1008
+ 'charstring' ,
1009
+ self .prop .get ('lenIV' , 4 )
1010
+ ).decode ('latin-1' )
1011
+ for glyph in glyphs ]
1012
+ RD , ND = self ._abbr ['RD' ], self ._abbr ['ND' ]
1013
+ result .extend (
1014
+ f'/{ glyph } { len (enc )} { RD } { enc } { ND } '
1015
+ for glyph , enc in zip (glyphs , encrypted )
1016
+ )
1017
+ result .append ('end\n ' )
1018
+ return '\n ' .join (result )
1019
+
1020
+ def _subset_subrs (self , indices ):
1021
+ # we can't remove subroutines, we just replace unused ones with a stub
1022
+ n_subrs = len (self .prop ['Subrs' ])
1023
+ result = [f'/Subrs { n_subrs } array' ]
1024
+ lenIV = self .prop .get ('lenIV' , 4 )
1025
+ stub = self ._encrypt (b'\x0b ' , 'charstring' , lenIV ).decode ('latin-1' )
1026
+ encrypted = [
1027
+ self ._encrypt (self .prop ['Subrs' ][i ], 'charstring' , lenIV
1028
+ ).decode ('latin-1' )
1029
+ if i in indices else stub
1030
+ for i in range (n_subrs )
1031
+ ]
1032
+ RD , ND , NP = self ._abbr ['RD' ], self ._abbr ['ND' ], self ._abbr ['NP' ]
1033
+ result .extend (
1034
+ f'dup { i } { len (enc )} { RD } { enc } { NP } '
1035
+ for i , enc in enumerate (encrypted )
1036
+ )
1037
+ result .extend ((ND , '' ))
1038
+ return '\n ' .join (result )
1039
+
778
1040
779
1041
_StandardEncoding = {
780
1042
** {ord (letter ): letter for letter in string .ascii_letters },
0 commit comments