From cabdc89839724866a10d01ccd5a52a0f457f1f84 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Feb 2019 09:23:02 +0100
Subject: [PATCH 1/6] TST add connection retries test-wise

---
 openml/_api_calls.py | 83 ++++++++++++++++++++++++++++++++++++++------
 openml/config.py     | 11 ++++++
 openml/testing.py    |  5 +++
 3 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 6a1086221..bc30fdc75 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -1,9 +1,7 @@
-import io
-import os
+import time
 import requests
 import warnings
 
-import arff
 import xmltodict
 
 from . import config
@@ -71,7 +69,12 @@ def _read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = requests.post(url, data=data, files=file_elements)
+    #response = requests.post(url, data=data, files=file_elements)
+    response = send_request(
+        request_method='post',
+        url=url,
+        kwargs={'data': data, 'files': file_elements},
+    )
     if response.status_code != 200:
         raise _parse_server_exception(response, url=url)
     if 'Content-Encoding' not in response.headers or \
@@ -86,13 +89,43 @@ def _read_url(url, data=None):
     if config.apikey is not None:
         data['api_key'] = config.apikey
 
-    if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
-        # do a GET
-        response = requests.get(url, params=data)
-    else: # an actual post request
-        # Using requests.post sets header 'Accept-encoding' automatically to
-        #  'gzip,deflate'
-        response = requests.post(url, data=data)
+    with requests.Session() as session:
+        if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
+            response = send_request(
+                request_method='get', url=url, kwargs={'params': data},
+            )
+            # do a GET
+            # for i in range(n_retries):
+            #     try:
+            #         response = session.get(url, params=data)
+            #         break
+            #     except (
+            #         requests.exceptions.ConnectionError,
+            #         requests.exceptions.SSLError,
+            #     ) as e:
+            #         if i == n_retries - 1:
+            #             raise e
+            #         else:
+            #             time.sleep(0.1 * i)
+
+        else: # an actual post request
+            # Using requests.post sets header 'Accept-encoding' automatically to
+            #  'gzip,deflate'
+            response = send_request(
+                request_method='post', url=url, kwargs={'data': data},
+            )
+            # for i in range(n_retries):
+            #     try:
+            #         response = session.post(url, data=data)
+            #         break
+            #     except (
+            #         requests.exceptions.ConnectionError,
+            #         requests.exceptions.SSLError,
+            #     ) as e:
+            #         if i == n_retries - 1:
+            #             raise e
+            #         else:
+            #             time.sleep(0.1 * i)
 
     if response.status_code != 200:
         raise _parse_server_exception(response, url=url)
@@ -102,6 +135,34 @@ def _read_url(url, data=None):
     return response.text
 
 
+def send_request(request_method, url, kwargs):
+    n_retries = config.connection_n_retries
+    response = None
+    with requests.Session() as session:
+        # Start at one to have a non-zero multiplier for the sleep
+        for i in range(1, n_retries + 1):
+            try:
+                if request_method == 'get':
+                    response = session.get(url, **kwargs)
+                elif request_method == 'post':
+                    response = session.post(url, **kwargs)
+                else:
+                    raise NotImplementedError()
+                break
+            except (
+                    requests.exceptions.ConnectionError,
+                    requests.exceptions.SSLError,
+            ) as e:
+                if i == n_retries:
+                    raise e
+                else:
+                    time.sleep(0.1 * i)
+    if response is None:
+        raise ValueError('This should never happen!')
+    return response
+
+
+
 def _parse_server_exception(response, url=None):
     # OpenML has a sopisticated error system
     # where information about failures is provided. try to parse this
diff --git a/openml/config.py b/openml/config.py
index 897eadd2b..0ca5936a0 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -21,6 +21,7 @@
     'verbosity': 0,
     'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
     'avoid_duplicate_runs': 'True',
+    'connection_n_retries': 2,
 }
 
 config_file = os.path.expanduser(os.path.join('~', '.openml' 'config'))
@@ -32,6 +33,9 @@
 # The current cache directory (without the server name)
 cache_directory = ""
 
+# Number of retries if the connection breaks
+connection_n_retries = 2
+
 
 def _setup():
     """Setup openml package. Called on first import.
@@ -46,6 +50,7 @@ def _setup():
     global server
     global cache_directory
     global avoid_duplicate_runs
+    global connection_n_retries
     # read config file, create cache directory
     try:
         os.mkdir(os.path.expanduser(os.path.join('~', '.openml')))
@@ -57,6 +62,12 @@ def _setup():
     server = config.get('FAKE_SECTION', 'server')
     cache_directory = os.path.expanduser(config.get('FAKE_SECTION', 'cachedir'))
     avoid_duplicate_runs = config.getboolean('FAKE_SECTION', 'avoid_duplicate_runs')
+    connection_n_retries = config.get('FAKE_SECTION', 'connection_n_retries')
+    if connection_n_retries > 20:
+        raise ValueError(
+            'A higher number of retries than 20 is not allowed to keep the '
+            'server load reasonable'
+        )
 
 
 def _parse_config():
diff --git a/openml/testing.py b/openml/testing.py
index 80c4b3183..586345a9c 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -65,6 +65,10 @@ def setUp(self):
                 with open(openml.config.config_file, 'w') as fh:
                     fh.write('apikey = %s' % openml.config.apikey)
 
+        # Increase the number of retries to avoid spurios server failures
+        self.connection_n_retries = openml.config.connection_n_retries
+        openml.config.connection_n_retries = 10
+
     def tearDown(self):
         os.chdir(self.cwd)
         try:
@@ -76,6 +80,7 @@ def tearDown(self):
             else:
                 raise
         openml.config.server = self.production_server
+        openml.config.connection_n_retries = self.connection_n_retries
 
     def _get_sentinel(self, sentinel=None):
         if sentinel is None:

From 681a7f6a244b10dcd64b8ffeebd3e3147198d2b4 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Feb 2019 14:45:29 +0100
Subject: [PATCH 2/6] Improve file style

---
 openml/_api_calls.py | 67 +++++++++++---------------------------------
 1 file changed, 17 insertions(+), 50 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index bc30fdc75..7477e63b8 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -9,12 +9,9 @@
                          OpenMLServerNoResult)
 
 
-def _perform_api_call(call, data=None, file_elements=None,
-                      add_authentication=True):
+def _perform_api_call(call, data=None, file_elements=None):
     """
     Perform an API call at the OpenML server.
-    return self._read_url(url, data=data, filePath=filePath,
-    def _read_url(self, url, add_authentication=False, data=None, filePath=None):
 
     Parameters
     ----------
@@ -25,8 +22,6 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
     file_elements : dict
         Mapping of {filename: str} of strings which should be uploaded as
         files to the server.
-    add_authentication : bool
-        Whether to add authentication (api key) to the request.
 
     Returns
     -------
@@ -48,12 +43,12 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
 
 
 def _file_id_to_url(file_id, filename=None):
-    '''
+    """
      Presents the URL how to download a given file id
      filename is optional
-    '''
+    """
     openml_url = config.server.split('/api/')
-    url = openml_url[0] + '/data/download/%s' %file_id
+    url = openml_url[0] + '/data/download/%s' % file_id
     if filename is not None:
         url += '/' + filename
     return url
@@ -69,7 +64,6 @@ def _read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    #response = requests.post(url, data=data, files=file_elements)
     response = send_request(
         request_method='post',
         url=url,
@@ -89,43 +83,17 @@ def _read_url(url, data=None):
     if config.apikey is not None:
         data['api_key'] = config.apikey
 
-    with requests.Session() as session:
-        if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
-            response = send_request(
-                request_method='get', url=url, kwargs={'params': data},
-            )
-            # do a GET
-            # for i in range(n_retries):
-            #     try:
-            #         response = session.get(url, params=data)
-            #         break
-            #     except (
-            #         requests.exceptions.ConnectionError,
-            #         requests.exceptions.SSLError,
-            #     ) as e:
-            #         if i == n_retries - 1:
-            #             raise e
-            #         else:
-            #             time.sleep(0.1 * i)
-
-        else: # an actual post request
-            # Using requests.post sets header 'Accept-encoding' automatically to
-            #  'gzip,deflate'
-            response = send_request(
-                request_method='post', url=url, kwargs={'data': data},
-            )
-            # for i in range(n_retries):
-            #     try:
-            #         response = session.post(url, data=data)
-            #         break
-            #     except (
-            #         requests.exceptions.ConnectionError,
-            #         requests.exceptions.SSLError,
-            #     ) as e:
-            #         if i == n_retries - 1:
-            #             raise e
-            #         else:
-            #             time.sleep(0.1 * i)
+    if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
+        response = send_request(
+            request_method='get', url=url, kwargs={'params': data},
+        )
+
+    else:
+        # Using requests.post sets header 'Accept-encoding' automatically to
+        #  'gzip,deflate'
+        response = send_request(
+            request_method='post', url=url, kwargs={'data': data},
+        )
 
     if response.status_code != 200:
         raise _parse_server_exception(response, url=url)
@@ -162,13 +130,12 @@ def send_request(request_method, url, kwargs):
     return response
 
 
-
 def _parse_server_exception(response, url=None):
     # OpenML has a sopisticated error system
     # where information about failures is provided. try to parse this
     try:
         server_exception = xmltodict.parse(response.text)
-    except:
+    except Exception:
         raise OpenMLServerError(('Unexpected server error. Please '
                                  'contact the developers!\nStatus code: '
                                  '%d\n' % response.status_code) + response.text)
@@ -178,7 +145,7 @@ def _parse_server_exception(response, url=None):
     additional = None
     if 'oml:additional_information' in server_exception['oml:error']:
         additional = server_exception['oml:error']['oml:additional_information']
-    if code in [372, 512, 500, 482, 542, 674]: # datasets,
+    if code in [372, 512, 500, 482, 542, 674]:
         # 512 for runs, 372 for datasets, 500 for flows
         # 482 for tasks, 542 for evaluations, 674 for setups
         return OpenMLServerNoResult(code, message, additional)

From 53f4e21edf602b9d3ba6c20e7d01dfc83b9bfa39 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Feb 2019 14:47:54 +0100
Subject: [PATCH 3/6] MAINT update changelog

---
 doc/progress.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/progress.rst b/doc/progress.rst
index dac22ff22..c6ce7f30e 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -20,6 +20,8 @@ Changelog
   OpenML.
 * ADD #564: New helpers to access the structure of a flow (and find its
   subflows).
+* ADD #618: The software will from now on retry to connect to the server if a
+  connection failed. The number of retries can be configured.
 * FIX #538: Support loading clustering tasks.
 * FIX #464: Fixes a bug related to listing functions (returns correct listing
   size).

From 17ea6cb747b0de45fe170a3208af75e300c616aa Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Feb 2019 16:08:05 +0100
Subject: [PATCH 4/6] MAINT simplify unit test, change code as requested by Jan

---
 openml/_api_calls.py                  | 18 ++++++++++++------
 tests/test_runs/test_run_functions.py | 17 ++++++-----------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 7477e63b8..707516651 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -67,7 +67,8 @@ def _read_url_files(url, data=None, file_elements=None):
     response = send_request(
         request_method='post',
         url=url,
-        kwargs={'data': data, 'files': file_elements},
+        data=data,
+        files=file_elements,
     )
     if response.status_code != 200:
         raise _parse_server_exception(response, url=url)
@@ -85,14 +86,14 @@ def _read_url(url, data=None):
 
     if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
         response = send_request(
-            request_method='get', url=url, kwargs={'params': data},
+            request_method='get', url=url, data=data,
         )
 
     else:
         # Using requests.post sets header 'Accept-encoding' automatically to
         #  'gzip,deflate'
         response = send_request(
-            request_method='post', url=url, kwargs={'data': data},
+            request_method='post', url=url, data=data,
         )
 
     if response.status_code != 200:
@@ -103,7 +104,12 @@ def _read_url(url, data=None):
     return response.text
 
 
-def send_request(request_method, url, kwargs):
+def send_request(
+    request_method,
+    url,
+    data,
+    files=None,
+):
     n_retries = config.connection_n_retries
     response = None
     with requests.Session() as session:
@@ -111,9 +117,9 @@ def send_request(request_method, url, kwargs):
         for i in range(1, n_retries + 1):
             try:
                 if request_method == 'get':
-                    response = session.get(url, **kwargs)
+                    response = session.get(url, params=data)
                 elif request_method == 'post':
-                    response = session.post(url, **kwargs)
+                    response = session.post(url, data=data, files=files)
                 else:
                     raise NotImplementedError()
                 break
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 16e433979..95302485e 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -735,17 +735,12 @@ def test_get_run_trace(self):
             if 'Run already exists in server' not in e.message:
                 # in this case the error was not the one we expected
                 raise e
-            # run was already
-            flow = openml.flows.sklearn_to_flow(clf)
-            flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
-            self.assertIsInstance(flow_exists, int)
-            self.assertGreater(flow_exists, 0)
-            downloaded_flow = openml.flows.get_flow(flow_exists,
-                                                    reinstantiate=True)
-            setup_exists = openml.setups.setup_exists(downloaded_flow)
-            self.assertIsInstance(setup_exists, int)
-            self.assertGreater(setup_exists, 0)
-            run_ids = _run_exists(task.task_id, setup_exists)
+            # run was already performed
+            message = e.message
+            # Parse a string like:
+            # "Run already exists in server. Run id(s): {36980}"
+            run_ids = message.split('{')[1].replace('}', '').split(',')
+            run_ids = [int(run_id) for run_id in run_ids]
             self.assertGreater(len(run_ids), 0)
             run_id = random.choice(list(run_ids))
 

From 6d94d63f6e4bdb66f0904c05ed03e71ecf77a132 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Feb 2019 16:33:28 +0100
Subject: [PATCH 5/6] TST fix python2/3 bug

---
 tests/test_runs/test_run_functions.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 95302485e..ff1a61f4b 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -737,9 +737,17 @@ def test_get_run_trace(self):
                 raise e
             # run was already performed
             message = e.message
-            # Parse a string like:
-            # "Run already exists in server. Run id(s): {36980}"
-            run_ids = message.split('{')[1].replace('}', '').split(',')
+            if sys.version_info[0] == 2:
+                # Parse a string like:
+                # 'Run already exists in server. Run id(s): set([37501])'
+                run_ids = (
+                    message.split('[')[1].replace(']', '').
+                        replace(')', '').split(',')
+                )
+            else:
+                # Parse a string like:
+                # "Run already exists in server. Run id(s): {36980}"
+                run_ids = message.split('{')[1].replace('}', '').split(',')
             run_ids = [int(run_id) for run_id in run_ids]
             self.assertGreater(len(run_ids), 0)
             run_id = random.choice(list(run_ids))

From 7bb6d0d121c5165df7fd73c06c6f75e3428c4851 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 14 Feb 2019 16:45:33 +0100
Subject: [PATCH 6/6] please flake

---
 tests/test_runs/test_run_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index ff1a61f4b..8c542e39b 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -742,7 +742,7 @@ def test_get_run_trace(self):
                 # 'Run already exists in server. Run id(s): set([37501])'
                 run_ids = (
                     message.split('[')[1].replace(']', '').
-                        replace(')', '').split(',')
+                    replace(')', '').split(',')
                 )
             else:
                 # Parse a string like: