Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
73 changes: 39 additions & 34 deletions 73 bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,14 +389,13 @@ def convert_data(data, dtype, data_order, data_format):
# Secondly, change format of data
if data_format == 'numpy':
return data
elif data_format == 'pandas':
if data_format == 'pandas':
import pandas as pd

if data.ndim == 1:
return pd.Series(data)
else:
return pd.DataFrame(data)
elif data_format == 'cudf':
return pd.DataFrame(data)
if data_format == 'cudf':
import cudf
import pandas as pd

Expand Down Expand Up @@ -512,36 +511,42 @@ def gen_basic_dict(library, algorithm, stage, params, data, alg_instance=None,
def print_output(library, algorithm, stages, params, functions,
times, metric_type, metrics, data, alg_instance=None,
alg_params=None):
if params.output_format == 'json':
output = []
for i, stage in enumerate(stages):
result = gen_basic_dict(library, algorithm, stage, params,
data[i], alg_instance, alg_params)
result.update({'time[s]': times[i]})
if metric_type is not None:
if isinstance(metric_type, str):
result.update({f'{metric_type}': metrics[i]})
elif isinstance(metric_type, list):
for ind, val in enumerate(metric_type):
if metrics[ind][i] is not None:
result.update({f'{val}': metrics[ind][i]})
if hasattr(params, 'n_classes'):
result['input_data'].update({'classes': params.n_classes})
if hasattr(params, 'n_clusters'):
if algorithm == 'kmeans':
result['input_data'].update(
{'n_clusters': params.n_clusters})
elif algorithm == 'dbscan':
result.update({'n_clusters': params.n_clusters})
# replace non-string init with string for kmeans benchmarks
if alg_instance is not None:
if 'init' in result['algorithm_parameters'].keys():
if not isinstance(result['algorithm_parameters']['init'], str):
result['algorithm_parameters']['init'] = 'random'
if 'handle' in result['algorithm_parameters'].keys():
del result['algorithm_parameters']['handle']
output.append(result)
print(json.dumps(output, indent=4))
if params.output_format != 'json':
return

output = []
for i, stage in enumerate(stages):
result = gen_basic_dict(library, algorithm, stage, params,
data[i], alg_instance, alg_params)
result.update({'time[s]': times[i]})

if metric_type is not None:
if isinstance(metric_type, str):
result.update({f'{metric_type}': metrics[i]})
elif isinstance(metric_type, list):
for ind, val in enumerate(metric_type):
if metrics[ind][i] is not None:
result.update({f'{val}': metrics[ind][i]})

if hasattr(params, 'n_classes'):
result['input_data'].update({'classes': params.n_classes})
if hasattr(params, 'n_clusters'):
if algorithm == 'kmeans':
result['input_data'].update(
{'n_clusters': params.n_clusters})
elif algorithm == 'dbscan':
result.update({'n_clusters': params.n_clusters})

# replace non-string init with string for kmeans benchmarks
if alg_instance is not None:
if 'init' in result['algorithm_parameters'].keys():
if not isinstance(result['algorithm_parameters']['init'], str):
result['algorithm_parameters']['init'] = 'random'
if 'handle' in result['algorithm_parameters'].keys():
del result['algorithm_parameters']['handle']
output.append(result)

print(json.dumps(output, indent=4))


def run_with_context(params, function):
Expand Down
9 changes: 7 additions & 2 deletions 9 configs/xgboost/xgb_gpu_additional_config.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
{
"common": {
"lib": "xgboost",
"data-format": "cudf",
"data-order": "F",
"dtype": "float32",
"algorithm": "gbt",
"tree-method": "gpu_hist",
"count-dmatrix": "",
"max-depth": 8,
"learning-rate": 0.1,
"reg-lambda": 1,
Expand All @@ -15,6 +13,7 @@
"cases": [
{
"objective": "binary:logistic",
"data-format": "pandas",
"scale-pos-weight": 2.1067817411664587,
"dataset": [
{
Expand All @@ -33,6 +32,7 @@
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"scale-pos-weight": 173.63348001466812,
"dataset": [
{
Expand All @@ -51,6 +51,7 @@
},
{
"objective": "multi:softmax",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -68,6 +69,7 @@
},
{
"objective": "binary:logistic",
"data-format": "pandas",
"scale-pos-weight": 2.0017715678375363,
"dataset": [
{
Expand All @@ -86,6 +88,7 @@
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"scale-pos-weight": 578.2868020304569,
"dataset": [
{
Expand All @@ -104,6 +107,7 @@
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"scale-pos-weight": 1.8872389605086624,
"dataset": [
{
Expand All @@ -122,6 +126,7 @@
},
{
"objective": "reg:squarederror",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand Down
36 changes: 21 additions & 15 deletions 36 configs/xgboost/xgb_gpu_main_config.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"common": {
"lib": "xgboost",
"data-format": "cudf",
"data-order": "F",
"dtype": "float32",
"algorithm": "gbt",
"tree-method": "gpu_hist",
"count-dmatrix": ""
"tree-method": "gpu_hist"
},
"cases": [
{
"objective": "reg:squarederror",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -26,10 +26,11 @@
],
"learning-rate": 0.03,
"max-depth": 6,
"n-estimators": 1000,
"objective": "reg:squarederror"
"n-estimators": 1000
},
{
"objective": "binary:logistic",
"data-format": "pandas",
"dataset": [
{
"source": "npy",
Expand All @@ -53,10 +54,11 @@
"min-child-weight": 0,
"max-depth": 8,
"max-leaves": 256,
"n-estimators": 1000,
"objective": "binary:logistic"
"n-estimators": 1000
},
{
"objective": "binary:logistic",
"data-format": "pandas",
"dataset": [
{
"source": "npy",
Expand All @@ -81,10 +83,11 @@
"max-depth": 8,
"max-leaves": 256,
"n-estimators": 1000,
"objective": "binary:logistic",
"inplace-predict": ""
},
{
"objective": "multi:softprob",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -101,10 +104,11 @@
],
"learning-rate": 0.03,
"max-depth": 6,
"n-estimators": 1000,
"objective": "multi:softprob"
"n-estimators": 1000
},
{
"objective": "multi:softprob",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -122,10 +126,11 @@
"min-child-weight": 1,
"min-split-loss": 0.1,
"max-depth": 8,
"n-estimators": 200,
"objective": "multi:softprob"
"n-estimators": 200
},
{
"objective": "reg:squarederror",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -137,7 +142,6 @@
}
],
"n-estimators": 100,
"objective": "reg:squarederror",
"max-depth": 8,
"scale-pos-weight": 2,
"learning-rate": 0.1,
Expand All @@ -148,6 +152,8 @@
"max-leaves": 256
},
{
"objective": "multi:softprob",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -163,12 +169,13 @@
}
],
"n-estimators": 60,
"objective": "multi:softprob",
"max-depth": 7,
"subsample": 0.7,
"colsample-bytree": 0.7
},
{
"objective": "binary:logistic",
"data-format": "cudf",
"dataset": [
{
"source": "npy",
Expand All @@ -184,7 +191,6 @@
}
],
"n-estimators": 10000,
"objective": "binary:logistic",
"max-depth": 1,
"subsample": 0.5,
"eta": 0.1,
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.