Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions 2 doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ Changelog
OpenML.
* ADD #564: New helpers to access the structure of a flow (and find its
subflows).
* ADD #618: The software will from now on retry to connect to the server if a
connection failed. The number of retries can be configured.
janvanrijn marked this conversation as resolved.
Show resolved Hide resolved
* FIX #538: Support loading clustering tasks.
* FIX #464: Fixes a bug related to listing functions (returns correct listing
size).
Expand Down
72 changes: 53 additions & 19 deletions 72 openml/_api_calls.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
import io
import os
import time
import requests
import warnings

import arff
import xmltodict

from . import config
from .exceptions import (OpenMLServerError, OpenMLServerException,
OpenMLServerNoResult)


def _perform_api_call(call, data=None, file_elements=None,
add_authentication=True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't you need the authentication flag? GET requests don't require authentication, POST/DELETE requests do.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is handled internally and does not need to be an argument to the openml-api call API.

def _perform_api_call(call, data=None, file_elements=None):
"""
Perform an API call at the OpenML server.
return self._read_url(url, data=data, filePath=filePath,
def _read_url(self, url, add_authentication=False, data=None, filePath=None):

Parameters
----------
Expand All @@ -27,8 +22,6 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
file_elements : dict
Mapping of {filename: str} of strings which should be uploaded as
files to the server.
add_authentication : bool
Whether to add authentication (api key) to the request.

Returns
-------
Expand All @@ -50,12 +43,12 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):


def _file_id_to_url(file_id, filename=None):
'''
"""
Presents the URL how to download a given file id
filename is optional
'''
"""
openml_url = config.server.split('/api/')
url = openml_url[0] + '/data/download/%s' %file_id
url = openml_url[0] + '/data/download/%s' % file_id
if filename is not None:
url += '/' + filename
return url
Expand All @@ -71,7 +64,12 @@ def _read_url_files(url, data=None, file_elements=None):
file_elements = {}
# Using requests.post sets header 'Accept-encoding' automatically to
# 'gzip,deflate'
response = requests.post(url, data=data, files=file_elements)
response = send_request(
request_method='post',
url=url,
data=data,
files=file_elements,
)
if response.status_code != 200:
raise _parse_server_exception(response, url=url)
if 'Content-Encoding' not in response.headers or \
Expand All @@ -87,12 +85,16 @@ def _read_url(url, data=None):
data['api_key'] = config.apikey

if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
# do a GET
response = requests.get(url, params=data)
else: # an actual post request
response = send_request(
request_method='get', url=url, data=data,
)

else:
# Using requests.post sets header 'Accept-encoding' automatically to
# 'gzip,deflate'
response = requests.post(url, data=data)
response = send_request(
request_method='post', url=url, data=data,
)

if response.status_code != 200:
raise _parse_server_exception(response, url=url)
Expand All @@ -102,12 +104,44 @@ def _read_url(url, data=None):
return response.text


def send_request(
request_method,
url,
data,
files=None,
):
n_retries = config.connection_n_retries
response = None
with requests.Session() as session:
# Start at one to have a non-zero multiplier for the sleep
for i in range(1, n_retries + 1):
try:
if request_method == 'get':
response = session.get(url, params=data)
elif request_method == 'post':
response = session.post(url, data=data, files=files)
else:
raise NotImplementedError()
break
except (
requests.exceptions.ConnectionError,
requests.exceptions.SSLError,
) as e:
if i == n_retries:
raise e
else:
time.sleep(0.1 * i)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

if response is None:
raise ValueError('This should never happen!')
return response


def _parse_server_exception(response, url=None):
# OpenML has a sopisticated error system
# where information about failures is provided. try to parse this
try:
server_exception = xmltodict.parse(response.text)
except:
except Exception:
raise OpenMLServerError(('Unexpected server error. Please '
'contact the developers!\nStatus code: '
'%d\n' % response.status_code) + response.text)
Expand All @@ -117,7 +151,7 @@ def _parse_server_exception(response, url=None):
additional = None
if 'oml:additional_information' in server_exception['oml:error']:
additional = server_exception['oml:error']['oml:additional_information']
if code in [372, 512, 500, 482, 542, 674]: # datasets,
if code in [372, 512, 500, 482, 542, 674]:
# 512 for runs, 372 for datasets, 500 for flows
# 482 for tasks, 542 for evaluations, 674 for setups
return OpenMLServerNoResult(code, message, additional)
Expand Down
11 changes: 11 additions & 0 deletions 11 openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
'verbosity': 0,
'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
'avoid_duplicate_runs': 'True',
'connection_n_retries': 2,
}

config_file = os.path.expanduser(os.path.join('~', '.openml' 'config'))
Expand All @@ -32,6 +33,9 @@
# The current cache directory (without the server name)
cache_directory = ""

# Number of retries if the connection breaks
connection_n_retries = 2


def _setup():
"""Setup openml package. Called on first import.
Expand All @@ -46,6 +50,7 @@ def _setup():
global server
global cache_directory
global avoid_duplicate_runs
global connection_n_retries
# read config file, create cache directory
try:
os.mkdir(os.path.expanduser(os.path.join('~', '.openml')))
Expand All @@ -57,6 +62,12 @@ def _setup():
server = config.get('FAKE_SECTION', 'server')
cache_directory = os.path.expanduser(config.get('FAKE_SECTION', 'cachedir'))
avoid_duplicate_runs = config.getboolean('FAKE_SECTION', 'avoid_duplicate_runs')
connection_n_retries = config.get('FAKE_SECTION', 'connection_n_retries')
if connection_n_retries > 20:
raise ValueError(
'A higher number of retries than 20 is not allowed to keep the '
'server load reasonable'
)


def _parse_config():
Expand Down
5 changes: 5 additions & 0 deletions 5 openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def setUp(self):
with open(openml.config.config_file, 'w') as fh:
fh.write('apikey = %s' % openml.config.apikey)

# Increase the number of retries to avoid spurios server failures
self.connection_n_retries = openml.config.connection_n_retries
openml.config.connection_n_retries = 10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused about the number of retries being stored in the _default dict, the openml.config file and self.connection_n_retries. So, by default it is 2, then when that doesn't work, it is increased to 10 and stored in the openml.config, and then it stops (it doesn't get higher than 10)?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, it's increased to 10 for testing in every case. It is stored in the class to restore the original value afterwards.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that it is unfortunate that the value of 2 is twice in the original file, I just made a note to change it later.


def tearDown(self):
os.chdir(self.cwd)
try:
Expand All @@ -76,6 +80,7 @@ def tearDown(self):
else:
raise
openml.config.server = self.production_server
openml.config.connection_n_retries = self.connection_n_retries

def _get_sentinel(self, sentinel=None):
if sentinel is None:
Expand Down
25 changes: 14 additions & 11 deletions 25 tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,17 +735,20 @@ def test_get_run_trace(self):
if 'Run already exists in server' not in e.message:
# in this case the error was not the one we expected
raise e
# run was already
flow = openml.flows.sklearn_to_flow(clf)
flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
self.assertIsInstance(flow_exists, int)
self.assertGreater(flow_exists, 0)
downloaded_flow = openml.flows.get_flow(flow_exists,
reinstantiate=True)
setup_exists = openml.setups.setup_exists(downloaded_flow)
self.assertIsInstance(setup_exists, int)
self.assertGreater(setup_exists, 0)
run_ids = _run_exists(task.task_id, setup_exists)
# run was already performed
message = e.message
if sys.version_info[0] == 2:
# Parse a string like:
# 'Run already exists in server. Run id(s): set([37501])'
run_ids = (
message.split('[')[1].replace(']', '').
replace(')', '').split(',')
)
else:
# Parse a string like:
# "Run already exists in server. Run id(s): {36980}"
run_ids = message.split('{')[1].replace('}', '').split(',')
run_ids = [int(run_id) for run_id in run_ids]
self.assertGreater(len(run_ids), 0)
run_id = random.choice(list(run_ids))

Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.