@@ -870,7 +870,7 @@ def subset(self, characters: Iterable[int], name_prefix: str) -> T.Self:
870
870
seen_subrs = {0 , 1 , 2 , 3 }
871
871
while todo :
872
872
glyph = todo .pop ()
873
- called_glyphs , called_subrs , _ , _ = self . _simulate (glyph , [], [] )
873
+ called_glyphs , called_subrs = _CharstringSimulator ( self ). run (glyph )
874
874
todo .update (called_glyphs - done )
875
875
seen_subrs .update (called_subrs )
876
876
done .add (glyph )
@@ -943,116 +943,6 @@ def _charstring_tokens(data: T.Iterable[int]) -> T.Generator[int | str, None, No
943
943
31 : 'hvcurveto'
944
944
}[byte ]
945
945
946
- def _simulate (
947
- self , glyph_or_subr : str | int , buildchar_stack : list [float ],
948
- postscript_stack : list [float ]
949
- ) -> tuple [set [str ], set [int ], list [float ], list [float ]]:
950
- """Run the charstring interpreter on a glyph or subroutine.
951
-
952
- This does not actually execute the code but simulates it to find out
953
- which subroutines get called when executing the glyph or subroutine.
954
-
955
- Parameters
956
- ----------
957
- glyph_or_subr : str or int
958
- The name of the glyph or the index of the subroutine to simulate.
959
- buildchar_stack : list[float]
960
- The buildchar stack at the start of the simulation.
961
- postscript_stack : list[float]
962
- The PostScript stack at the start of the simulation.
963
-
964
- Returns
965
- -------
966
- glyphs : set[str]
967
- The set of glyph names called by the glyph or subroutine.
968
- subrs : set[int]
969
- The set of subroutines called by the glyph or subroutine.
970
- buildchar_stack : list[float]
971
- The buildchar stack at the end of the simulation.
972
- postscript_stack : list[float]
973
- The PostScript stack at the end of the simulation.
974
- """
975
- if isinstance (glyph_or_subr , str ):
976
- program = self .prop ['CharStrings' ][glyph_or_subr ]
977
- glyphs = {glyph_or_subr }
978
- subrs = set ()
979
- else :
980
- program = self .prop ['Subrs' ][glyph_or_subr ]
981
- glyphs = set ()
982
- subrs = {glyph_or_subr }
983
- for opcode in self ._charstring_tokens (program ):
984
- if opcode in ('return' , 'endchar' ):
985
- return glyphs , subrs , buildchar_stack , postscript_stack
986
- newglyphs , newsubrs , buildchar_stack , postscript_stack = \
987
- self ._step (buildchar_stack , postscript_stack , opcode )
988
- glyphs .update (newglyphs )
989
- subrs .update (newsubrs )
990
- else :
991
- font_name = self .prop .get ('FontName' , '(unknown)' )
992
- _log .info (
993
- f"Glyph or subr { glyph_or_subr } in font { font_name } does not end "
994
- "with return or endchar"
995
- )
996
- return glyphs , subrs , buildchar_stack , postscript_stack
997
-
998
- def _step (
999
- self ,
1000
- buildchar_stack : list [float ],
1001
- postscript_stack : list [float ],
1002
- opcode : int | str ,
1003
- ) -> tuple [set , set , list [float ], list [float ]]:
1004
- """Run one step in the charstring interpreter."""
1005
- if isinstance (opcode , int ):
1006
- return set (), set (), buildchar_stack + [opcode ], postscript_stack
1007
- elif opcode in {
1008
- 'hsbw' , 'sbw' , 'closepath' , 'hlineto' , 'hmoveto' , 'hcurveto' ,
1009
- 'hvcurveto' , 'rlineto' , 'rmoveto' , 'rrcurveto' , 'vhcurveto' ,
1010
- 'vlineto' , 'vmoveto' , 'dotsection' , 'hstem' , 'hstem3' , 'vstem' ,
1011
- 'vstem3' , 'setcurrentpoint'
1012
- }:
1013
- return set (), set (), [], postscript_stack
1014
- elif opcode == 'seac' : # Standard Encoding Accented Character
1015
- codes = buildchar_stack [3 :5 ]
1016
- glyphs : set [str ] = {_StandardEncoding [int (x )] for x in codes }
1017
- return glyphs , set (), [], postscript_stack
1018
- elif opcode == 'div' :
1019
- num1 , num2 = buildchar_stack [- 2 :]
1020
- return set (), set (), buildchar_stack [- 2 :] + [num1 / num2 ], postscript_stack
1021
- elif opcode == 'callothersubr' :
1022
- othersubr = buildchar_stack [- 1 ]
1023
- n = buildchar_stack [- 2 ]
1024
- if not isinstance (n , int ):
1025
- _log .warning (
1026
- f"callothersubr { othersubr } with non-integer argument count in "
1027
- f"font { self .prop ['FontName' ]} "
1028
- )
1029
- n = int (n )
1030
- args = buildchar_stack [- 2 - n :- 2 ]
1031
- if othersubr == 3 : # Section 8.1 in Type-1 spec
1032
- postscript_stack .append (args [0 ])
1033
- else :
1034
- postscript_stack .extend (args [::- 1 ])
1035
- return set (), set (), buildchar_stack [:- n - 2 ], postscript_stack
1036
- elif opcode == 'callsubr' :
1037
- subr = buildchar_stack [- 1 ]
1038
- if not isinstance (subr , int ):
1039
- _log .warning (
1040
- f"callsubr with non-integer argument { subr } in font "
1041
- f"{ self .prop ['FontName' ]} "
1042
- )
1043
- subr = int (subr )
1044
- glyphs , subrs , new_bc_stack , new_ps_stack = \
1045
- self ._simulate (subr , buildchar_stack [:- 1 ], postscript_stack )
1046
- return set (), subrs | {subr }, new_bc_stack , new_ps_stack
1047
- elif opcode == 'pop' :
1048
- return (
1049
- set (),
1050
- set (),
1051
- buildchar_stack + [postscript_stack [- 1 ]], postscript_stack [:- 1 ]
1052
- )
1053
- else :
1054
- raise RuntimeError (f'opcode { opcode } ' )
1055
-
1056
946
def _postscript_encoding (self , encoding : dict [int , str ]) -> str :
1057
947
"""Return a PostScript encoding array for the encoding."""
1058
948
return '\n ' .join ([
@@ -1106,6 +996,116 @@ def _subset_subrs(self, indices: set[int]) -> str:
1106
996
])
1107
997
1108
998
999
+ class _CharstringSimulator :
1000
+ __slots__ = ('font' , 'buildchar_stack' , 'postscript_stack' , 'glyphs' , 'subrs' )
1001
+ font : Type1Font
1002
+ buildchar_stack : list [float ]
1003
+ postscript_stack : list [float ]
1004
+ glyphs : set [str ]
1005
+ subrs : set [int ]
1006
+
1007
+ def __init__ (self , font : Type1Font ):
1008
+ self .font = font
1009
+ self .buildchar_stack = []
1010
+ self .postscript_stack = []
1011
+ self .glyphs = set ()
1012
+ self .subrs = set ()
1013
+
1014
+ def run (self , glyph_or_subr : str | int ) -> tuple [set [str ], set [int ]]:
1015
+ """Run the charstring interpreter on a glyph or subroutine.
1016
+
1017
+ This does not actually execute the code but simulates it to find out
1018
+ which subroutines get called when executing the glyph or subroutine.
1019
+
1020
+ Parameters
1021
+ ----------
1022
+ glyph_or_subr : str or int
1023
+ The name of the glyph or the index of the subroutine to simulate.
1024
+
1025
+ Returns
1026
+ -------
1027
+ glyphs : set[str]
1028
+ The set of glyph names called by the glyph or subroutine.
1029
+ subrs : set[int]
1030
+ The set of subroutines called by the glyph or subroutine.
1031
+ """
1032
+ if isinstance (glyph_or_subr , str ):
1033
+ program = self .font .prop ['CharStrings' ][glyph_or_subr ]
1034
+ self .glyphs .add (glyph_or_subr )
1035
+ else :
1036
+ program = self .font .prop ['Subrs' ][glyph_or_subr ]
1037
+ self .subrs .add (glyph_or_subr )
1038
+ for opcode in self .font ._charstring_tokens (program ):
1039
+ if opcode in ('return' , 'endchar' ):
1040
+ return self .glyphs , self .subrs
1041
+ self ._step (opcode )
1042
+ else :
1043
+ font_name = self .font .prop .get ('FontName' , '(unknown)' )
1044
+ _log .info (
1045
+ f"Glyph or subr { glyph_or_subr } in font { font_name } does not end "
1046
+ "with return or endchar"
1047
+ )
1048
+ return self .glyphs , self .subrs
1049
+
1050
+ def _step (self , opcode : int | str ) -> None :
1051
+ """Run one step in the charstring interpreter."""
1052
+ match opcode :
1053
+ case _ if isinstance (opcode , int ):
1054
+ self .buildchar_stack .append (opcode )
1055
+ case (
1056
+ 'hsbw' | 'sbw' | 'closepath' | 'hlineto' | 'hmoveto' | 'hcurveto' |
1057
+ 'hvcurveto' | 'rlineto' | 'rmoveto' | 'rrcurveto' | 'vhcurveto' |
1058
+ 'vlineto' | 'vmoveto' | 'dotsection' | 'hstem' | 'hstem3' |
1059
+ 'vstem' | 'vstem3' | 'setcurrentpoint'
1060
+ ):
1061
+ self .buildchar_stack .clear ()
1062
+ case 'seac' : # Standard Encoding Accented Character
1063
+ codes = self .buildchar_stack [3 :5 ]
1064
+ self .glyphs .update (_StandardEncoding [int (x )] for x in codes )
1065
+ self .buildchar_stack .clear ()
1066
+ case 'div' :
1067
+ num1 , num2 = self .buildchar_stack [- 2 :]
1068
+ if num2 == 0 :
1069
+ _log .warning (
1070
+ f"Division by zero in font { self .font .prop ['FontName' ]} "
1071
+ )
1072
+ self .buildchar_stack [- 2 :] = [0 ]
1073
+ else :
1074
+ self .buildchar_stack [- 2 :] = [num1 / num2 ]
1075
+ case 'callothersubr' :
1076
+ n , othersubr = self .buildchar_stack [- 2 :]
1077
+ if not isinstance (n , int ):
1078
+ _log .warning (
1079
+ f"callothersubr { othersubr } with non-integer argument "
1080
+ f"count in font { self .font .prop ['FontName' ]} "
1081
+ )
1082
+ n = int (n )
1083
+ args = self .buildchar_stack [- 2 - n :- 2 ]
1084
+ if othersubr == 3 :
1085
+ self .postscript_stack .append (args [0 ])
1086
+ else :
1087
+ self .postscript_stack .extend (args [::- 1 ])
1088
+ self .buildchar_stack [- 2 - n :] = []
1089
+ case 'callsubr' :
1090
+ subr = self .buildchar_stack .pop ()
1091
+ if not isinstance (subr , int ):
1092
+ _log .warning (
1093
+ f"callsubr with non-integer argument { subr } in font "
1094
+ f"{ self .font .prop ['FontName' ]} "
1095
+ )
1096
+ subr = int (subr )
1097
+ self .run (subr )
1098
+ case 'pop' :
1099
+ if not self .postscript_stack :
1100
+ _log .warning (
1101
+ f"pop with empty stack in font { self .font .prop ['FontName' ]} "
1102
+ )
1103
+ self .postscript_stack .append (0 )
1104
+ self .buildchar_stack .append (self .postscript_stack .pop ())
1105
+ case _:
1106
+ raise RuntimeError (f'opcode { opcode } ' )
1107
+
1108
+
1109
1109
_StandardEncoding = {
1110
1110
** {ord (letter ): letter for letter in string .ascii_letters },
1111
1111
0 : '.notdef' ,
0 commit comments