53
53
Apply a window along a given axis
54
54
"""
55
55
56
- import csv
57
56
import functools
58
57
from numbers import Number
59
58
@@ -985,286 +984,6 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
985
984
return Cxy , f
986
985
987
986
988
- def _csv2rec (fname , comments = '#' , skiprows = 0 , checkrows = 0 , delimiter = ',' ,
989
- converterd = None , names = None , missing = '' , missingd = None ,
990
- use_mrecords = False , dayfirst = False , yearfirst = False ):
991
- """
992
- Load data from comma/space/tab delimited file in *fname* into a
993
- numpy record array and return the record array.
994
-
995
- If *names* is *None*, a header row is required to automatically
996
- assign the recarray names. The headers will be lower cased,
997
- spaces will be converted to underscores, and illegal attribute
998
- name characters removed. If *names* is not *None*, it is a
999
- sequence of names to use for the column names. In this case, it
1000
- is assumed there is no header row.
1001
-
1002
-
1003
- - *fname*: can be a filename or a file handle. Support for gzipped
1004
- files is automatic, if the filename ends in '.gz'
1005
-
1006
- - *comments*: the character used to indicate the start of a comment
1007
- in the file, or *None* to switch off the removal of comments
1008
-
1009
- - *skiprows*: is the number of rows from the top to skip
1010
-
1011
- - *checkrows*: is the number of rows to check to validate the column
1012
- data type. When set to zero all rows are validated.
1013
-
1014
- - *converterd*: if not *None*, is a dictionary mapping column number or
1015
- munged column name to a converter function.
1016
-
1017
- - *names*: if not None, is a list of header names. In this case, no
1018
- header will be read from the file
1019
-
1020
- - *missingd* is a dictionary mapping munged column names to field values
1021
- which signify that the field does not contain actual data and should
1022
- be masked, e.g., '0000-00-00' or 'unused'
1023
-
1024
- - *missing*: a string whose value signals a missing field regardless of
1025
- the column it appears in
1026
-
1027
- - *use_mrecords*: if True, return an mrecords.fromrecords record array if
1028
- any of the data are missing
1029
-
1030
- - *dayfirst*: default is False so that MM-DD-YY has precedence over
1031
- DD-MM-YY. See
1032
- http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
1033
- for further information.
1034
-
1035
- - *yearfirst*: default is False so that MM-DD-YY has precedence over
1036
- YY-MM-DD. See
1037
- http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
1038
- for further information.
1039
-
1040
- If no rows are found, *None* is returned
1041
- """
1042
-
1043
- if converterd is None :
1044
- converterd = dict ()
1045
-
1046
- if missingd is None :
1047
- missingd = {}
1048
-
1049
- import dateutil .parser
1050
- import datetime
1051
-
1052
- fh = cbook .to_filehandle (fname )
1053
-
1054
- delimiter = str (delimiter )
1055
-
1056
- class FH :
1057
- """
1058
- For space-delimited files, we want different behavior than
1059
- comma or tab. Generally, we want multiple spaces to be
1060
- treated as a single separator, whereas with comma and tab we
1061
- want multiple commas to return multiple (empty) fields. The
1062
- join/strip trick below effects this.
1063
- """
1064
- def __init__ (self , fh ):
1065
- self .fh = fh
1066
-
1067
- def close (self ):
1068
- self .fh .close ()
1069
-
1070
- def seek (self , arg ):
1071
- self .fh .seek (arg )
1072
-
1073
- def fix (self , s ):
1074
- return ' ' .join (s .split ())
1075
-
1076
- def __next__ (self ):
1077
- return self .fix (next (self .fh ))
1078
-
1079
- def __iter__ (self ):
1080
- for line in self .fh :
1081
- yield self .fix (line )
1082
-
1083
- if delimiter == ' ' :
1084
- fh = FH (fh )
1085
-
1086
- reader = csv .reader (fh , delimiter = delimiter )
1087
-
1088
- def process_skiprows (reader ):
1089
- if skiprows :
1090
- for i , row in enumerate (reader ):
1091
- if i >= (skiprows - 1 ):
1092
- break
1093
-
1094
- return fh , reader
1095
-
1096
- process_skiprows (reader )
1097
-
1098
- def ismissing (name , val ):
1099
- """Return whether the value val in column name should be masked."""
1100
- return val == missing or val == missingd .get (name ) or val == ''
1101
-
1102
- def with_default_value (func , default ):
1103
- def newfunc (name , val ):
1104
- if ismissing (name , val ):
1105
- return default
1106
- else :
1107
- return func (val )
1108
- return newfunc
1109
-
1110
- def mybool (x ):
1111
- if x == 'True' :
1112
- return True
1113
- elif x == 'False' :
1114
- return False
1115
- else :
1116
- raise ValueError ('invalid bool' )
1117
-
1118
- dateparser = dateutil .parser .parse
1119
-
1120
- def mydateparser (x ):
1121
- # try and return a datetime object
1122
- d = dateparser (x , dayfirst = dayfirst , yearfirst = yearfirst )
1123
- return d
1124
-
1125
- mydateparser = with_default_value (mydateparser , datetime .datetime (1 , 1 , 1 ))
1126
-
1127
- myfloat = with_default_value (float , np .nan )
1128
- myint = with_default_value (int , - 1 )
1129
- mystr = with_default_value (str , '' )
1130
- mybool = with_default_value (mybool , None )
1131
-
1132
- def mydate (x ):
1133
- # try and return a date object
1134
- d = dateparser (x , dayfirst = dayfirst , yearfirst = yearfirst )
1135
-
1136
- if d .hour > 0 or d .minute > 0 or d .second > 0 :
1137
- raise ValueError ('not a date' )
1138
- return d .date ()
1139
- mydate = with_default_value (mydate , datetime .date (1 , 1 , 1 ))
1140
-
1141
- def get_func (name , item , func ):
1142
- # promote functions in this order
1143
- funcs = [mybool , myint , myfloat , mydate , mydateparser , mystr ]
1144
- for func in funcs [funcs .index (func ):]:
1145
- try :
1146
- func (name , item )
1147
- except Exception :
1148
- continue
1149
- return func
1150
- raise ValueError ('Could not find a working conversion function' )
1151
-
1152
- # map column names that clash with builtins -- TODO - extend this list
1153
- itemd = {
1154
- 'return' : 'return_' ,
1155
- 'file' : 'file_' ,
1156
- 'print' : 'print_' ,
1157
- }
1158
-
1159
- def get_converters (reader , comments ):
1160
-
1161
- converters = None
1162
- i = 0
1163
- for row in reader :
1164
- if (len (row ) and comments is not None and
1165
- row [0 ].startswith (comments )):
1166
- continue
1167
- if i == 0 :
1168
- converters = [mybool ]* len (row )
1169
- if checkrows and i > checkrows :
1170
- break
1171
- i += 1
1172
-
1173
- for j , (name , item ) in enumerate (zip (names , row )):
1174
- func = converterd .get (j )
1175
- if func is None :
1176
- func = converterd .get (name )
1177
- if func is None :
1178
- func = converters [j ]
1179
- if len (item .strip ()):
1180
- func = get_func (name , item , func )
1181
- else :
1182
- # how should we handle custom converters and defaults?
1183
- func = with_default_value (func , None )
1184
- converters [j ] = func
1185
- return converters
1186
-
1187
- # Get header and remove invalid characters
1188
- needheader = names is None
1189
-
1190
- if needheader :
1191
- for row in reader :
1192
- if (len (row ) and comments is not None and
1193
- row [0 ].startswith (comments )):
1194
- continue
1195
- headers = row
1196
- break
1197
-
1198
- # remove these chars
1199
- delete = set (r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""" )
1200
- delete .add ('"' )
1201
-
1202
- names = []
1203
- seen = dict ()
1204
- for i , item in enumerate (headers ):
1205
- item = item .strip ().lower ().replace (' ' , '_' )
1206
- item = '' .join ([c for c in item if c not in delete ])
1207
- if not len (item ):
1208
- item = 'column%d' % i
1209
-
1210
- item = itemd .get (item , item )
1211
- cnt = seen .get (item , 0 )
1212
- if cnt > 0 :
1213
- names .append (item + '_%d' % cnt )
1214
- else :
1215
- names .append (item )
1216
- seen [item ] = cnt + 1
1217
-
1218
- else :
1219
- if isinstance (names , str ):
1220
- names = [n .strip () for n in names .split (',' )]
1221
-
1222
- # get the converter functions by inspecting checkrows
1223
- converters = get_converters (reader , comments )
1224
- if converters is None :
1225
- raise ValueError ('Could not find any valid data in CSV file' )
1226
-
1227
- # reset the reader and start over
1228
- fh .seek (0 )
1229
- reader = csv .reader (fh , delimiter = delimiter )
1230
- process_skiprows (reader )
1231
-
1232
- if needheader :
1233
- while True :
1234
- # skip past any comments and consume one line of column header
1235
- row = next (reader )
1236
- if (len (row ) and comments is not None and
1237
- row [0 ].startswith (comments )):
1238
- continue
1239
- break
1240
-
1241
- # iterate over the remaining rows and convert the data to date
1242
- # objects, ints, or floats as appropriate
1243
- rows = []
1244
- rowmasks = []
1245
- for i , row in enumerate (reader ):
1246
- if not len (row ):
1247
- continue
1248
- if comments is not None and row [0 ].startswith (comments ):
1249
- continue
1250
- # Ensure that the row returned always has the same nr of elements
1251
- row .extend (['' ] * (len (converters ) - len (row )))
1252
- rows .append ([func (name , val )
1253
- for func , name , val in zip (converters , names , row )])
1254
- rowmasks .append ([ismissing (name , val )
1255
- for name , val in zip (names , row )])
1256
- fh .close ()
1257
-
1258
- if not len (rows ):
1259
- return None
1260
-
1261
- if use_mrecords and np .any (rowmasks ):
1262
- r = np .ma .mrecords .fromrecords (rows , names = names , mask = rowmasks )
1263
- else :
1264
- r = np .rec .fromrecords (rows , names = names )
1265
- return r
1266
-
1267
-
1268
987
class GaussianKDE :
1269
988
"""
1270
989
Representation of a kernel-density estimate using Gaussian kernels.
0 commit comments