@@ -1275,7 +1275,7 @@ cdef class Tree:
1275
1275
1276
1276
return np.asarray(importances)
1277
1277
1278
- cdef void _compute_oob_node_values_and_predictions(self , object X_test, intp_t[:, ::1 ] y_test, float64_t[:, :, ::1 ] oob_pred, int32_t[::1 ] has_oob_sample, float64_t[:, :, ::1 ] oob_node_values, str method):
1278
+ cdef void _compute_oob_node_values_and_predictions(self , object X_test, intp_t[:, ::1 ] y_test, float64_t[:: 1 ] sample_weight, float64_t[: , :, ::1 ] oob_pred, int32_t[::1 ] has_oob_sample, float64_t[:, :, ::1 ] oob_node_values, str method):
1279
1279
if issparse(X_test):
1280
1280
raise (NotImplementedError (" does not support sparse X yet" ))
1281
1281
if not isinstance (X_test, np.ndarray):
@@ -1290,7 +1290,7 @@ cdef class Tree:
1290
1290
cdef intp_t n_outputs = self .n_outputs
1291
1291
cdef intp_t max_n_classes = self .max_n_classes
1292
1292
cdef int k, c, node_idx, sample_idx = 0
1293
- cdef int32_t [:, ::1 ] count_oob_values = np.zeros((node_count, n_outputs), dtype = np.int32 )
1293
+ cdef float64_t [:, ::1 ] total_oob_weight = np.zeros((node_count, n_outputs), dtype = np.float64 )
1294
1294
cdef int node_value_idx = - 1
1295
1295
1296
1296
cdef Node* node
@@ -1308,17 +1308,15 @@ cdef class Tree:
1308
1308
if n_classes[k] > 1 :
1309
1309
for c in range (n_classes[k]):
1310
1310
if y_test[k, sample_idx] == c:
1311
- oob_node_values[node_idx, c, k] += 1.0
1312
- # TODO use sample weight instead of 1
1313
- count_oob_values[node_idx, k] += 1
1311
+ oob_node_values[node_idx, c, k] += sample_weight[sample_idx]
1314
1312
else :
1315
1313
if method == " ufi" :
1316
1314
node_value_idx = node_idx * self .value_stride + k * max_n_classes
1317
- oob_node_values[node_idx, 0 , k] += (y_test[k, sample_idx] - self .value[node_value_idx]) ** 2.0
1315
+ oob_node_values[node_idx, 0 , k] += sample_weight[sample_idx] * (y_test[k, sample_idx] - self .value[node_value_idx]) ** 2.0
1318
1316
else :
1319
- oob_node_values[node_idx, 0 , k] += y_test[k, sample_idx]
1320
- count_oob_values [node_idx, k] += 1
1321
- # TODO use sample weight instead of 1
1317
+ oob_node_values[node_idx, 0 , k] += sample_weight[sample_idx] * y_test[k, sample_idx]
1318
+ total_oob_weight [node_idx, k] += sample_weight[sample_idx]
1319
+
1322
1320
# child nodes
1323
1321
while node.left_child != _TREE_LEAF and node.right_child != _TREE_LEAF:
1324
1322
if X_ndarray[sample_idx, node.feature] <= node.threshold:
@@ -1331,26 +1329,26 @@ cdef class Tree:
1331
1329
if n_classes[k] > 1 :
1332
1330
for c in range (n_classes[k]):
1333
1331
if y_test[k, sample_idx] == c:
1334
- oob_node_values[node_idx, c, k] += 1.0
1332
+ oob_node_values[node_idx, c, k] += sample_weight[sample_idx]
1335
1333
# TODO use sample weight instead of 1
1336
- count_oob_values [node_idx, k] += 1
1334
+ total_oob_weight [node_idx, k] += sample_weight[sample_idx]
1337
1335
else :
1338
1336
if method == " ufi" :
1339
1337
node_value_idx = node_idx * self .value_stride + k * max_n_classes
1340
- oob_node_values[node_idx, 0 , k] += (y_test[k, sample_idx] - self .value[node_value_idx]) ** 2.0
1338
+ oob_node_values[node_idx, 0 , k] += sample_weight[sample_idx] * (y_test[k, sample_idx] - self .value[node_value_idx]) ** 2.0
1341
1339
else :
1342
- oob_node_values[node_idx, 0 , k] += y_test[k, sample_idx]
1343
- count_oob_values [node_idx, k] += 1
1340
+ oob_node_values[node_idx, 0 , k] += sample_weight[sample_idx] * y_test[k, sample_idx]
1341
+ total_oob_weight [node_idx, k] += sample_weight[sample_idx]
1344
1342
# TODO use sample weight instead of 1
1345
1343
# store the id of the leaf where each sample ends up
1346
1344
y_leafs[sample_idx] = node_idx
1347
1345
1348
1346
# convert the counts to proportions
1349
1347
for node_idx in range (node_count):
1350
1348
for k in range (n_outputs):
1351
- if count_oob_values [node_idx, k] > 0 :
1349
+ if total_oob_weight [node_idx, k] > 0. 0 :
1352
1350
for c in range (n_classes[k]):
1353
- oob_node_values[node_idx, c, k] /= count_oob_values [node_idx, k]
1351
+ oob_node_values[node_idx, c, k] /= total_oob_weight [node_idx, k]
1354
1352
# if leaf store the predictive proba
1355
1353
if self .nodes[node_idx].left_child == _TREE_LEAF and self .nodes[node_idx].right_child == _TREE_LEAF:
1356
1354
for sample_idx in range (n_samples):
@@ -1360,7 +1358,7 @@ cdef class Tree:
1360
1358
node_value_idx = node_idx * self .value_stride + k * max_n_classes + c
1361
1359
oob_pred[sample_idx, c, k] = self .value[node_value_idx]
1362
1360
1363
- cpdef compute_unbiased_feature_importance_and_oob_predictions(self , object X_test, object y_test, criterion, method = " ufi" ):
1361
+ cpdef compute_unbiased_feature_importance_and_oob_predictions(self , object X_test, object y_test, object sample_weight, criterion, method = " ufi" ):
1364
1362
cdef intp_t n_samples = X_test.shape[0 ]
1365
1363
cdef intp_t n_features = X_test.shape[1 ]
1366
1364
cdef intp_t n_outputs = self .n_outputs
@@ -1378,7 +1376,8 @@ cdef class Tree:
1378
1376
cdef int left_idx, right_idx = - 1
1379
1377
1380
1378
cdef intp_t[:, ::1 ] y_view = np.ascontiguousarray(y_test, dtype = np.intp)
1381
- self ._compute_oob_node_values_and_predictions(X_test, y_view, oob_pred, has_oob_sample, oob_node_values, method)
1379
+ cdef float64_t[::1 ] sample_weight_view = np.ascontiguousarray(sample_weight, dtype = np.float64)
1380
+ self ._compute_oob_node_values_and_predictions(X_test, y_view, sample_weight_view, oob_pred, has_oob_sample, oob_node_values, method)
1382
1381
1383
1382
for node_idx in range (self .node_count):
1384
1383
node = nodes[node_idx]
0 commit comments