4
4
"""
5
5
from __future__ import (absolute_import , division , print_function ,
6
6
unicode_literals )
7
-
8
7
import six
9
8
10
9
import numpy as np
11
10
11
+ import matplotlib .cbook as cbook
12
+ import matplotlib .colors as mcolors
12
13
import matplotlib .units as units
13
14
import matplotlib .ticker as ticker
14
15
22
23
def to_array (data , maxlen = 100 ):
23
24
if NP_NEW :
24
25
return np .array (data , dtype = np .unicode )
26
+ if cbook .is_scalar_or_string (data ):
27
+ data = [data ]
25
28
try :
26
29
vals = np .array (data , dtype = ('|S' , maxlen ))
27
30
except UnicodeEncodeError :
28
- # pure hack
31
+ # this yields gibberish
29
32
vals = np .array ([convert_to_string (d ) for d in data ])
30
33
return vals
31
34
@@ -36,49 +39,83 @@ def convert(value, unit, axis):
36
39
"""Uses axis.unit_data map to encode
37
40
data as floats
38
41
"""
39
- vmap = dict (axis .unit_data )
42
+ vmap = dict (zip ( axis .unit_data . seq , axis . unit_data . locs ) )
40
43
41
44
if isinstance (value , six .string_types ):
42
45
return vmap [value ]
43
46
44
47
vals = to_array (value )
45
- for lab , loc in axis . unit_data :
48
+ for lab , loc in vmap . items () :
46
49
vals [vals == lab ] = loc
47
50
48
51
return vals .astype ('float' )
49
52
50
53
@staticmethod
51
54
def axisinfo (unit , axis ):
52
- seq , locs = zip (* axis .unit_data )
53
- majloc = StrCategoryLocator (locs )
54
- majfmt = StrCategoryFormatter (seq )
55
+ majloc = StrCategoryLocator (axis .unit_data .locs )
56
+ majfmt = StrCategoryFormatter (axis .unit_data .seq )
55
57
return units .AxisInfo (majloc = majloc , majfmt = majfmt )
56
58
57
59
@staticmethod
58
60
def default_units (data , axis ):
59
61
# the conversion call stack is:
60
62
# default_units->axis_info->convert
61
- axis .unit_data = map_categories (data , axis .unit_data )
63
+ if axis .unit_data is None :
64
+ axis .unit_data = UnitData (data )
65
+ else :
66
+ axis .unit_data .update (data )
62
67
return None
63
68
64
69
65
70
class StrCategoryLocator (ticker .FixedLocator ):
66
71
def __init__ (self , locs ):
67
- super (StrCategoryLocator , self ).__init__ (locs , None )
72
+ self .locs = locs
73
+ self .nbins = None
68
74
69
75
70
76
class StrCategoryFormatter (ticker .FixedFormatter ):
71
77
def __init__ (self , seq ):
72
- super (StrCategoryFormatter , self ).__init__ (seq )
78
+ self .seq = seq
79
+ self .offset_string = ''
80
+
81
+
82
+ def colors_from_categories (codings ):
83
+ """
84
+ A helper routine to generate a cmap and a norm instance where
85
+ a given key in coding is associated with a color value in coding
86
+
87
+ Parameters
88
+ ----------
89
+ coding : sequence of [(key, value)] pairs where key is the
90
+ categorical variable, and value is its associated
91
+ color
92
+
93
+ Returns
94
+ -------
95
+ (cmap, norm) : tuple containing a :class:`Colormap` and a \
96
+ :class:`Normalize` instance
97
+ """
98
+ if isinstance (codings , dict ):
99
+ codings = codings .items ()
100
+ if six .PY3 :
101
+ codings = list (codings )
102
+
103
+ codings .sort ()
104
+
105
+ cats , cols = zip (* codings )
106
+ cmap = mcolors .ListedColormap (cols )
107
+ cats = list (cats ) + [np .inf ]
108
+ norm = mcolors .BoundaryNorm (cats , cmap .N )
109
+ return cmap , norm
73
110
74
111
75
112
def convert_to_string (value ):
76
113
"""Helper function for numpy 1.6, can be replaced with
77
114
np.array(...,dtype=unicode) for all later versions of numpy"""
78
115
79
116
if isinstance (value , six .string_types ):
80
- return value
81
- if np .isfinite (value ):
117
+ pass
118
+ elif np .isfinite (value ):
82
119
value = np .asarray (value , dtype = str )[np .newaxis ][0 ]
83
120
elif np .isnan (value ):
84
121
value = 'nan'
@@ -91,59 +128,38 @@ def convert_to_string(value):
91
128
return value
92
129
93
130
94
- def map_categories (data , old_map = None ):
95
- """Create mapping between unique categorical
96
- values and numerical identifier.
97
-
98
- Paramters
99
- ---------
100
- data: iterable
101
- sequence of values
102
- old_map: list of tuple, optional
103
- if not `None`, than old_mapping will be updated with new values and
104
- previous mappings will remain unchanged)
105
- sort: bool, optional
106
- sort keys by ASCII value
107
-
108
- Returns
109
- -------
110
- list of tuple
111
- [(label, ticklocation),...]
112
-
113
- """
114
-
115
- # code typical missing data in the negative range because
116
- # everything else will always have positive encoding
117
- # question able if it even makes sense
131
+ class UnitData (object ):
132
+ # debatable makes sense to special code missing values
118
133
spdict = {'nan' : - 1.0 , 'inf' : - 2.0 , '-inf' : - 3.0 }
119
134
120
- if isinstance (data , six .string_types ):
121
- data = [data ]
122
-
123
- # will update this post cbook/dict support
124
- strdata = to_array (data )
125
- uniq = np .unique (strdata )
126
-
127
- if old_map :
128
- olabs , okeys = zip (* old_map )
129
- svalue = max (okeys ) + 1
130
- else :
131
- old_map , olabs , okeys = [], [], []
132
- svalue = 0
133
-
134
- category_map = old_map [:]
135
-
136
- new_labs = [u for u in uniq if u not in olabs ]
137
- missing = [nl for nl in new_labs if nl in spdict .keys ()]
138
-
139
- category_map .extend ([(m , spdict [m ]) for m in missing ])
140
-
141
- new_labs = [nl for nl in new_labs if nl not in missing ]
142
-
143
- new_locs = np .arange (svalue , svalue + len (new_labs ), dtype = 'float' )
144
- category_map .extend (list (zip (new_labs , new_locs )))
145
- return category_map
146
-
135
+ def __init__ (self , data ):
136
+ """Create mapping between unique categorical values
137
+ and numerical identifier
138
+ Paramters
139
+ ---------
140
+ data: iterable
141
+ sequence of values
142
+ """
143
+ self .seq , self .locs = [], []
144
+ self ._set_seq_locs (data , 0 )
145
+
146
+ def update (self , new_data ):
147
+ # so as not to conflict with spdict
148
+ value = max (max (self .locs ) + 1 , 0 )
149
+ self ._set_seq_locs (new_data , value )
150
+
151
+ def _set_seq_locs (self , data , value ):
152
+ # magic to make it work under np1.6
153
+ strdata = to_array (data )
154
+ # np.unique makes dateframes work
155
+ new_s = [d for d in np .unique (strdata ) if d not in self .seq ]
156
+ for ns in new_s :
157
+ self .seq .append (convert_to_string (ns ))
158
+ if ns in UnitData .spdict .keys ():
159
+ self .locs .append (UnitData .spdict [ns ])
160
+ else :
161
+ self .locs .append (value )
162
+ value += 1
147
163
148
164
# Connects the convertor to matplotlib
149
165
units .registry [str ] = StrCategoryConverter ()
0 commit comments