forked from openml/openml-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathintroduction_tutorial.py
More file actions
75 lines (69 loc) · 3.33 KB
/
introduction_tutorial.py
File metadata and controls
75 lines (69 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Introduction
===================
An introduction to OpenML, followed up by a simple example.
"""
############################################################################
# OpenML is an online collaboration platform for machine learning which allows
# you to:
#
# * Find or share interesting, well-documented datasets
# * Define research / modelling goals (tasks)
# * Explore large amounts of machine learning algorithms, with APIs in Java, R, Python
# * Log and share reproducible experiments, models, results
# * Works seamlessly with scikit-learn and other libraries
# * Large scale benchmarking, compare to state of the art
#
# Installation
# ^^^^^^^^^^^^
# Installation is done via ``pip``:
#
# .. code:: bash
#
# pip install openml
#
# For further information, please check out the installation guide at https://openml.github.io/openml-python/stable/contributing.html#installation
#
# Authentication
# ^^^^^^^^^^^^^^
#
# The OpenML server can only be accessed by users who have signed up on the OpenML platform. If you don’t have an account yet, sign up now.
# You will receive an API key, which will authenticate you to the server and allow you to download and upload datasets, tasks, runs and flows.
#
# * Create an OpenML account (free) on http://www.openml.org.
# * After logging in, open your account page (avatar on the top right)
# * Open 'Account Settings', then 'API authentication' to find your API key.
#
# There are two ways to authenticate:
#
# * Create a plain text file **~/.openml/config** with the line **'apikey=MYKEY'**, replacing **MYKEY** with your API key. The config file must be in the directory ~/.openml/config and exist prior to importing the openml module
# * Run the code below, replacing 'YOURKEY' with your API key.
############################################################################
import openml
from sklearn import neighbors
# Uncomment and set your OpenML key. Don't share your key with others.
# openml.config.apikey = 'YOURKEY'
############################################################################
# Caching
# ^^^^^^^
# When downloading datasets, tasks, runs and flows, they will be cached to retrieve them without calling the server later. As with the API key, the cache directory can be either specified through the config file or through the API:
#
# * Add the line **cachedir = 'MYDIR'** to the config file, replacing 'MYDIR' with the path to the cache directory. By default, OpenML will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.
import os
# Uncomment and set your OpenML cache directory
# openml.config.cache_directory = os.path.expanduser('YOURDIR')
############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
flow = openml.flows.sklearn_to_flow(clf)
run = openml.runs.run_flow_on_task(flow, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))