From cabdc89839724866a10d01ccd5a52a0f457f1f84 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Thu, 14 Feb 2019 09:23:02 +0100 Subject: [PATCH 1/6] TST add connection retries test-wise --- openml/_api_calls.py | 83 ++++++++++++++++++++++++++++++++++++++------ openml/config.py | 11 ++++++ openml/testing.py | 5 +++ 3 files changed, 88 insertions(+), 11 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 6a1086221..bc30fdc75 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -1,9 +1,7 @@ -import io -import os +import time import requests import warnings -import arff import xmltodict from . import config @@ -71,7 +69,12 @@ def _read_url_files(url, data=None, file_elements=None): file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to # 'gzip,deflate' - response = requests.post(url, data=data, files=file_elements) + #response = requests.post(url, data=data, files=file_elements) + response = send_request( + request_method='post', + url=url, + kwargs={'data': data, 'files': file_elements}, + ) if response.status_code != 200: raise _parse_server_exception(response, url=url) if 'Content-Encoding' not in response.headers or \ @@ -86,13 +89,43 @@ def _read_url(url, data=None): if config.apikey is not None: data['api_key'] = config.apikey - if len(data) == 0 or (len(data) == 1 and 'api_key' in data): - # do a GET - response = requests.get(url, params=data) - else: # an actual post request - # Using requests.post sets header 'Accept-encoding' automatically to - # 'gzip,deflate' - response = requests.post(url, data=data) + with requests.Session() as session: + if len(data) == 0 or (len(data) == 1 and 'api_key' in data): + response = send_request( + request_method='get', url=url, kwargs={'params': data}, + ) + # do a GET + # for i in range(n_retries): + # try: + # response = session.get(url, params=data) + # break + # except ( + # requests.exceptions.ConnectionError, + # requests.exceptions.SSLError, + # ) as e: + # if i == n_retries - 1: + # raise e + # else: + # time.sleep(0.1 * i) + + else: # an actual post request + # Using requests.post sets header 'Accept-encoding' automatically to + # 'gzip,deflate' + response = send_request( + request_method='post', url=url, kwargs={'data': data}, + ) + # for i in range(n_retries): + # try: + # response = session.post(url, data=data) + # break + # except ( + # requests.exceptions.ConnectionError, + # requests.exceptions.SSLError, + # ) as e: + # if i == n_retries - 1: + # raise e + # else: + # time.sleep(0.1 * i) if response.status_code != 200: raise _parse_server_exception(response, url=url) @@ -102,6 +135,34 @@ def _read_url(url, data=None): return response.text +def send_request(request_method, url, kwargs): + n_retries = config.connection_n_retries + response = None + with requests.Session() as session: + # Start at one to have a non-zero multiplier for the sleep + for i in range(1, n_retries + 1): + try: + if request_method == 'get': + response = session.get(url, **kwargs) + elif request_method == 'post': + response = session.post(url, **kwargs) + else: + raise NotImplementedError() + break + except ( + requests.exceptions.ConnectionError, + requests.exceptions.SSLError, + ) as e: + if i == n_retries: + raise e + else: + time.sleep(0.1 * i) + if response is None: + raise ValueError('This should never happen!') + return response + + + def _parse_server_exception(response, url=None): # OpenML has a sopisticated error system # where information about failures is provided. try to parse this diff --git a/openml/config.py b/openml/config.py index 897eadd2b..0ca5936a0 100644 --- a/openml/config.py +++ b/openml/config.py @@ -21,6 +21,7 @@ 'verbosity': 0, 'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')), 'avoid_duplicate_runs': 'True', + 'connection_n_retries': 2, } config_file = os.path.expanduser(os.path.join('~', '.openml' 'config')) @@ -32,6 +33,9 @@ # The current cache directory (without the server name) cache_directory = "" +# Number of retries if the connection breaks +connection_n_retries = 2 + def _setup(): """Setup openml package. Called on first import. @@ -46,6 +50,7 @@ def _setup(): global server global cache_directory global avoid_duplicate_runs + global connection_n_retries # read config file, create cache directory try: os.mkdir(os.path.expanduser(os.path.join('~', '.openml'))) @@ -57,6 +62,12 @@ def _setup(): server = config.get('FAKE_SECTION', 'server') cache_directory = os.path.expanduser(config.get('FAKE_SECTION', 'cachedir')) avoid_duplicate_runs = config.getboolean('FAKE_SECTION', 'avoid_duplicate_runs') + connection_n_retries = config.get('FAKE_SECTION', 'connection_n_retries') + if connection_n_retries > 20: + raise ValueError( + 'A higher number of retries than 20 is not allowed to keep the ' + 'server load reasonable' + ) def _parse_config(): diff --git a/openml/testing.py b/openml/testing.py index 80c4b3183..586345a9c 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -65,6 +65,10 @@ def setUp(self): with open(openml.config.config_file, 'w') as fh: fh.write('apikey = %s' % openml.config.apikey) + # Increase the number of retries to avoid spurios server failures + self.connection_n_retries = openml.config.connection_n_retries + openml.config.connection_n_retries = 10 + def tearDown(self): os.chdir(self.cwd) try: @@ -76,6 +80,7 @@ def tearDown(self): else: raise openml.config.server = self.production_server + openml.config.connection_n_retries = self.connection_n_retries def _get_sentinel(self, sentinel=None): if sentinel is None: From 681a7f6a244b10dcd64b8ffeebd3e3147198d2b4 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Thu, 14 Feb 2019 14:45:29 +0100 Subject: [PATCH 2/6] Improve file style --- openml/_api_calls.py | 67 +++++++++++--------------------------------- 1 file changed, 17 insertions(+), 50 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index bc30fdc75..7477e63b8 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -9,12 +9,9 @@ OpenMLServerNoResult) -def _perform_api_call(call, data=None, file_elements=None, - add_authentication=True): +def _perform_api_call(call, data=None, file_elements=None): """ Perform an API call at the OpenML server. - return self._read_url(url, data=data, filePath=filePath, - def _read_url(self, url, add_authentication=False, data=None, filePath=None): Parameters ---------- @@ -25,8 +22,6 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None): file_elements : dict Mapping of {filename: str} of strings which should be uploaded as files to the server. - add_authentication : bool - Whether to add authentication (api key) to the request. Returns ------- @@ -48,12 +43,12 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None): def _file_id_to_url(file_id, filename=None): - ''' + """ Presents the URL how to download a given file id filename is optional - ''' + """ openml_url = config.server.split('/api/') - url = openml_url[0] + '/data/download/%s' %file_id + url = openml_url[0] + '/data/download/%s' % file_id if filename is not None: url += '/' + filename return url @@ -69,7 +64,6 @@ def _read_url_files(url, data=None, file_elements=None): file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to # 'gzip,deflate' - #response = requests.post(url, data=data, files=file_elements) response = send_request( request_method='post', url=url, @@ -89,43 +83,17 @@ def _read_url(url, data=None): if config.apikey is not None: data['api_key'] = config.apikey - with requests.Session() as session: - if len(data) == 0 or (len(data) == 1 and 'api_key' in data): - response = send_request( - request_method='get', url=url, kwargs={'params': data}, - ) - # do a GET - # for i in range(n_retries): - # try: - # response = session.get(url, params=data) - # break - # except ( - # requests.exceptions.ConnectionError, - # requests.exceptions.SSLError, - # ) as e: - # if i == n_retries - 1: - # raise e - # else: - # time.sleep(0.1 * i) - - else: # an actual post request - # Using requests.post sets header 'Accept-encoding' automatically to - # 'gzip,deflate' - response = send_request( - request_method='post', url=url, kwargs={'data': data}, - ) - # for i in range(n_retries): - # try: - # response = session.post(url, data=data) - # break - # except ( - # requests.exceptions.ConnectionError, - # requests.exceptions.SSLError, - # ) as e: - # if i == n_retries - 1: - # raise e - # else: - # time.sleep(0.1 * i) + if len(data) == 0 or (len(data) == 1 and 'api_key' in data): + response = send_request( + request_method='get', url=url, kwargs={'params': data}, + ) + + else: + # Using requests.post sets header 'Accept-encoding' automatically to + # 'gzip,deflate' + response = send_request( + request_method='post', url=url, kwargs={'data': data}, + ) if response.status_code != 200: raise _parse_server_exception(response, url=url) @@ -162,13 +130,12 @@ def send_request(request_method, url, kwargs): return response - def _parse_server_exception(response, url=None): # OpenML has a sopisticated error system # where information about failures is provided. try to parse this try: server_exception = xmltodict.parse(response.text) - except: + except Exception: raise OpenMLServerError(('Unexpected server error. Please ' 'contact the developers!\nStatus code: ' '%d\n' % response.status_code) + response.text) @@ -178,7 +145,7 @@ def _parse_server_exception(response, url=None): additional = None if 'oml:additional_information' in server_exception['oml:error']: additional = server_exception['oml:error']['oml:additional_information'] - if code in [372, 512, 500, 482, 542, 674]: # datasets, + if code in [372, 512, 500, 482, 542, 674]: # 512 for runs, 372 for datasets, 500 for flows # 482 for tasks, 542 for evaluations, 674 for setups return OpenMLServerNoResult(code, message, additional) From 53f4e21edf602b9d3ba6c20e7d01dfc83b9bfa39 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Thu, 14 Feb 2019 14:47:54 +0100 Subject: [PATCH 3/6] MAINT update changelog --- doc/progress.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/progress.rst b/doc/progress.rst index dac22ff22..c6ce7f30e 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -20,6 +20,8 @@ Changelog OpenML. * ADD #564: New helpers to access the structure of a flow (and find its subflows). +* ADD #618: The software will from now on retry to connect to the server if a + connection failed. The number of retries can be configured. * FIX #538: Support loading clustering tasks. * FIX #464: Fixes a bug related to listing functions (returns correct listing size). From 17ea6cb747b0de45fe170a3208af75e300c616aa Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Thu, 14 Feb 2019 16:08:05 +0100 Subject: [PATCH 4/6] MAINT simplify unit test, change code as requested by Jan --- openml/_api_calls.py | 18 ++++++++++++------ tests/test_runs/test_run_functions.py | 17 ++++++----------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 7477e63b8..707516651 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -67,7 +67,8 @@ def _read_url_files(url, data=None, file_elements=None): response = send_request( request_method='post', url=url, - kwargs={'data': data, 'files': file_elements}, + data=data, + files=file_elements, ) if response.status_code != 200: raise _parse_server_exception(response, url=url) @@ -85,14 +86,14 @@ def _read_url(url, data=None): if len(data) == 0 or (len(data) == 1 and 'api_key' in data): response = send_request( - request_method='get', url=url, kwargs={'params': data}, + request_method='get', url=url, data=data, ) else: # Using requests.post sets header 'Accept-encoding' automatically to # 'gzip,deflate' response = send_request( - request_method='post', url=url, kwargs={'data': data}, + request_method='post', url=url, data=data, ) if response.status_code != 200: @@ -103,7 +104,12 @@ def _read_url(url, data=None): return response.text -def send_request(request_method, url, kwargs): +def send_request( + request_method, + url, + data, + files=None, +): n_retries = config.connection_n_retries response = None with requests.Session() as session: @@ -111,9 +117,9 @@ def send_request(request_method, url, kwargs): for i in range(1, n_retries + 1): try: if request_method == 'get': - response = session.get(url, **kwargs) + response = session.get(url, params=data) elif request_method == 'post': - response = session.post(url, **kwargs) + response = session.post(url, data=data, files=files) else: raise NotImplementedError() break diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 16e433979..95302485e 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -735,17 +735,12 @@ def test_get_run_trace(self): if 'Run already exists in server' not in e.message: # in this case the error was not the one we expected raise e - # run was already - flow = openml.flows.sklearn_to_flow(clf) - flow_exists = openml.flows.flow_exists(flow.name, flow.external_version) - self.assertIsInstance(flow_exists, int) - self.assertGreater(flow_exists, 0) - downloaded_flow = openml.flows.get_flow(flow_exists, - reinstantiate=True) - setup_exists = openml.setups.setup_exists(downloaded_flow) - self.assertIsInstance(setup_exists, int) - self.assertGreater(setup_exists, 0) - run_ids = _run_exists(task.task_id, setup_exists) + # run was already performed + message = e.message + # Parse a string like: + # "Run already exists in server. Run id(s): {36980}" + run_ids = message.split('{')[1].replace('}', '').split(',') + run_ids = [int(run_id) for run_id in run_ids] self.assertGreater(len(run_ids), 0) run_id = random.choice(list(run_ids)) From 6d94d63f6e4bdb66f0904c05ed03e71ecf77a132 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Thu, 14 Feb 2019 16:33:28 +0100 Subject: [PATCH 5/6] TST fix python2/3 bug --- tests/test_runs/test_run_functions.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 95302485e..ff1a61f4b 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -737,9 +737,17 @@ def test_get_run_trace(self): raise e # run was already performed message = e.message - # Parse a string like: - # "Run already exists in server. Run id(s): {36980}" - run_ids = message.split('{')[1].replace('}', '').split(',') + if sys.version_info[0] == 2: + # Parse a string like: + # 'Run already exists in server. Run id(s): set([37501])' + run_ids = ( + message.split('[')[1].replace(']', ''). + replace(')', '').split(',') + ) + else: + # Parse a string like: + # "Run already exists in server. Run id(s): {36980}" + run_ids = message.split('{')[1].replace('}', '').split(',') run_ids = [int(run_id) for run_id in run_ids] self.assertGreater(len(run_ids), 0) run_id = random.choice(list(run_ids)) From 7bb6d0d121c5165df7fd73c06c6f75e3428c4851 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Thu, 14 Feb 2019 16:45:33 +0100 Subject: [PATCH 6/6] please flake --- tests/test_runs/test_run_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index ff1a61f4b..8c542e39b 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -742,7 +742,7 @@ def test_get_run_trace(self): # 'Run already exists in server. Run id(s): set([37501])' run_ids = ( message.split('[')[1].replace(']', ''). - replace(')', '').split(',') + replace(')', '').split(',') ) else: # Parse a string like: