openml · mfeurer · Feb 14, 2019 · Feb 14, 2019 · Feb 14, 2019 · Feb 14, 2019
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -20,6 +20,8 @@ Changelog
  OpenML.
 * ADD #564: New helpers to access the structure of a flow (and find its
  subflows).
+* ADD #618: The software will from now on retry to connect to the server if a
+  connection failed. The number of retries can be configured.
 * FIX #538: Support loading clustering tasks.
 * FIX #464: Fixes a bug related to listing functions (returns correct listing
  size).

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -1,22 +1,17 @@
-import io
-import os
+import time
 import requests
 import warnings

-import arff
 import xmltodict

 from . import config
 from .exceptions import (OpenMLServerError, OpenMLServerException,
                         OpenMLServerNoResult)


-def _perform_api_call(call, data=None, file_elements=None,
-                      add_authentication=True):
+def _perform_api_call(call, data=None, file_elements=None):
    """
    Perform an API call at the OpenML server.
-    return self._read_url(url, data=data, filePath=filePath,
-    def _read_url(self, url, add_authentication=False, data=None, filePath=None):

    Parameters
    ----------
@@ -27,8 +22,6 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
    file_elements : dict
        Mapping of {filename: str} of strings which should be uploaded as
        files to the server.
-    add_authentication : bool
-        Whether to add authentication (api key) to the request.

    Returns
    -------
@@ -50,12 +43,12 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):


 def _file_id_to_url(file_id, filename=None):
-    '''
+    """
     Presents the URL how to download a given file id
     filename is optional
-    '''
+    """
    openml_url = config.server.split('/api/')
-    url = openml_url[0] + '/data/download/%s' %file_id
+    url = openml_url[0] + '/data/download/%s' % file_id
    if filename is not None:
        url += '/' + filename
    return url
@@ -71,7 +64,12 @@ def _read_url_files(url, data=None, file_elements=None):
        file_elements = {}
    # Using requests.post sets header 'Accept-encoding' automatically to
    # 'gzip,deflate'
-    response = requests.post(url, data=data, files=file_elements)
+    response = send_request(
+        request_method='post',
+        url=url,
+        data=data,
+        files=file_elements,
+    )
    if response.status_code != 200:
        raise _parse_server_exception(response, url=url)
    if 'Content-Encoding' not in response.headers or \
@@ -87,12 +85,16 @@ def _read_url(url, data=None):
        data['api_key'] = config.apikey

    if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
-        # do a GET
-        response = requests.get(url, params=data)
-    else: # an actual post request
+        response = send_request(
+            request_method='get', url=url, data=data,
+        )
+
+    else:
        # Using requests.post sets header 'Accept-encoding' automatically to
        #  'gzip,deflate'
-        response = requests.post(url, data=data)
+        response = send_request(
+            request_method='post', url=url, data=data,
+        )

    if response.status_code != 200:
        raise _parse_server_exception(response, url=url)
@@ -102,12 +104,44 @@ def _read_url(url, data=None):
    return response.text


+def send_request(
+    request_method,
+    url,
+    data,
+    files=None,
+):
+    n_retries = config.connection_n_retries
+    response = None
+    with requests.Session() as session:
+        # Start at one to have a non-zero multiplier for the sleep
+        for i in range(1, n_retries + 1):
+            try:
+                if request_method == 'get':
+                    response = session.get(url, params=data)
+                elif request_method == 'post':
+                    response = session.post(url, data=data, files=files)
+                else:
+                    raise NotImplementedError()
+                break
+            except (
+                    requests.exceptions.ConnectionError,
+                    requests.exceptions.SSLError,
+            ) as e:
+                if i == n_retries:
+                    raise e
+                else:
+                    time.sleep(0.1 * i)
+    if response is None:
+        raise ValueError('This should never happen!')
+    return response
+
+
 def _parse_server_exception(response, url=None):
    # OpenML has a sopisticated error system
    # where information about failures is provided. try to parse this
    try:
        server_exception = xmltodict.parse(response.text)
-    except:
+    except Exception:
        raise OpenMLServerError(('Unexpected server error. Please '
                                 'contact the developers!\nStatus code: '
                                 '%d\n' % response.status_code) + response.text)
@@ -117,7 +151,7 @@ def _parse_server_exception(response, url=None):
    additional = None
    if 'oml:additional_information' in server_exception['oml:error']:
        additional = server_exception['oml:error']['oml:additional_information']
-    if code in [372, 512, 500, 482, 542, 674]: # datasets,
+    if code in [372, 512, 500, 482, 542, 674]:
        # 512 for runs, 372 for datasets, 500 for flows
        # 482 for tasks, 542 for evaluations, 674 for setups
        return OpenMLServerNoResult(code, message, additional)

diff --git a/openml/config.py b/openml/config.py
@@ -21,6 +21,7 @@
    'verbosity': 0,
    'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
    'avoid_duplicate_runs': 'True',
+    'connection_n_retries': 2,
 }

 config_file = os.path.expanduser(os.path.join('~', '.openml' 'config'))
@@ -32,6 +33,9 @@
 # The current cache directory (without the server name)
 cache_directory = ""

+# Number of retries if the connection breaks
+connection_n_retries = 2
+

 def _setup():
    """Setup openml package. Called on first import.
@@ -46,6 +50,7 @@ def _setup():
    global server
    global cache_directory
    global avoid_duplicate_runs
+    global connection_n_retries
    # read config file, create cache directory
    try:
        os.mkdir(os.path.expanduser(os.path.join('~', '.openml')))
@@ -57,6 +62,12 @@ def _setup():
    server = config.get('FAKE_SECTION', 'server')
    cache_directory = os.path.expanduser(config.get('FAKE_SECTION', 'cachedir'))
    avoid_duplicate_runs = config.getboolean('FAKE_SECTION', 'avoid_duplicate_runs')
+    connection_n_retries = config.get('FAKE_SECTION', 'connection_n_retries')
+    if connection_n_retries > 20:
+        raise ValueError(
+            'A higher number of retries than 20 is not allowed to keep the '
+            'server load reasonable'
+        )


 def _parse_config():

diff --git a/openml/testing.py b/openml/testing.py
@@ -65,6 +65,10 @@ def setUp(self):
                with open(openml.config.config_file, 'w') as fh:
                    fh.write('apikey = %s' % openml.config.apikey)

+        # Increase the number of retries to avoid spurios server failures
+        self.connection_n_retries = openml.config.connection_n_retries
+        openml.config.connection_n_retries = 10
+
    def tearDown(self):
        os.chdir(self.cwd)
        try:
@@ -76,6 +80,7 @@ def tearDown(self):
            else:
                raise
        openml.config.server = self.production_server
+        openml.config.connection_n_retries = self.connection_n_retries

    def _get_sentinel(self, sentinel=None):
        if sentinel is None:

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -735,17 +735,20 @@ def test_get_run_trace(self):
            if 'Run already exists in server' not in e.message:
                # in this case the error was not the one we expected
                raise e
-            # run was already
-            flow = openml.flows.sklearn_to_flow(clf)
-            flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
-            self.assertIsInstance(flow_exists, int)
-            self.assertGreater(flow_exists, 0)
-            downloaded_flow = openml.flows.get_flow(flow_exists,
-                                                    reinstantiate=True)
-            setup_exists = openml.setups.setup_exists(downloaded_flow)
-            self.assertIsInstance(setup_exists, int)
-            self.assertGreater(setup_exists, 0)
-            run_ids = _run_exists(task.task_id, setup_exists)
+            # run was already performed
+            message = e.message
+            if sys.version_info[0] == 2:
+                # Parse a string like:
+                # 'Run already exists in server. Run id(s): set([37501])'
+                run_ids = (
+                    message.split('[')[1].replace(']', '').
+                    replace(')', '').split(',')
+                )
+            else:
+                # Parse a string like:
+                # "Run already exists in server. Run id(s): {36980}"
+                run_ids = message.split('{')[1].replace('}', '').split(',')
+            run_ids = [int(run_id) for run_id in run_ids]
            self.assertGreater(len(run_ids), 0)
            run_id = random.choice(list(run_ids))
-Original file line number
+Diff line change
@@ -1,22 +1,17 @@
-    import io
-    import os
+    import time
     import requests
     import warnings
-    import arff
     import xmltodict
     from . import config
     from .exceptions import (OpenMLServerError, OpenMLServerException,
                              OpenMLServerNoResult)
-    def _perform_api_call(call, data=None, file_elements=None,
-                          add_authentication=True):
               Copy link

  
        
      
    

    
        

  
    Contributor


        

    
  

  
    
      
          
      

      
            joaquinvanschoren
  

      

      

      


        Feb 14, 2019


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      don't you need the authentication flag? GET requests don't require authentication, POST/DELETE requests do.
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
              
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    


      



    
        
  
    
      
          
    
  


        
            
  
      
              Copy link

  
        
      
    

    
        

  
    Collaborator


        

  Author


    
  

  
    
      
          
      

      
            mfeurer
  

      

      

      


        Feb 14, 2019


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      That is handled internally and does not need to be an argument to the openml-api call API.
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
              
  
    
    
      
        
            
    All reactions
+    def _perform_api_call(call, data=None, file_elements=None):
         """
         Perform an API call at the OpenML server.
-        return self._read_url(url, data=data, filePath=filePath,
-        def _read_url(self, url, add_authentication=False, data=None, filePath=None):
         Parameters
         ----------
@@ -27,8 +22,6 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
         file_elements : dict
             Mapping of {filename: str} of strings which should be uploaded as
             files to the server.
-        add_authentication : bool
-            Whether to add authentication (api key) to the request.
         Returns
         -------
@@ -50,12 +43,12 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
     def _file_id_to_url(file_id, filename=None):
-        '''
+        """
          Presents the URL how to download a given file id
          filename is optional
-        '''
+        """
         openml_url = config.server.split('/api/')
-        url = openml_url[0] + '/data/download/%s' %file_id
+        url = openml_url[0] + '/data/download/%s' % file_id
         if filename is not None:
             url += '/' + filename
         return url
@@ -71,7 +64,12 @@ def _read_url_files(url, data=None, file_elements=None):
             file_elements = {}
         # Using requests.post sets header 'Accept-encoding' automatically to
         # 'gzip,deflate'
-        response = requests.post(url, data=data, files=file_elements)
+        response = send_request(
+            request_method='post',
+            url=url,
+            data=data,
+            files=file_elements,
+        )
         if response.status_code != 200:
             raise _parse_server_exception(response, url=url)
         if 'Content-Encoding' not in response.headers or \
@@ -87,12 +85,16 @@ def _read_url(url, data=None):
             data['api_key'] = config.apikey
         if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
-            # do a GET
-            response = requests.get(url, params=data)
-        else: # an actual post request
+            response = send_request(
+                request_method='get', url=url, data=data,
+            )
+        else:
             # Using requests.post sets header 'Accept-encoding' automatically to
             #  'gzip,deflate'
-            response = requests.post(url, data=data)
+            response = send_request(
+                request_method='post', url=url, data=data,
+            )
         if response.status_code != 200:
             raise _parse_server_exception(response, url=url)
@@ -102,12 +104,44 @@ def _read_url(url, data=None):
         return response.text
+    def send_request(
+        request_method,
+        url,
+        data,
+        files=None,
+    ):
+        n_retries = config.connection_n_retries
+        response = None
+        with requests.Session() as session:
+            # Start at one to have a non-zero multiplier for the sleep
+            for i in range(1, n_retries + 1):
+                try:
+                    if request_method == 'get':
+                        response = session.get(url, params=data)
+                    elif request_method == 'post':
+                        response = session.post(url, data=data, files=files)
+                    else:
+                        raise NotImplementedError()
+                    break
+                except (
+                        requests.exceptions.ConnectionError,
+                        requests.exceptions.SSLError,
+                ) as e:
+                    if i == n_retries:
+                        raise e
+                    else:
+                        time.sleep(0.1 * i)
               Copy link

  
        
      
    

    
        

  
    Contributor


        

    
  

  
    
      
          
      

      
            joaquinvanschoren
  

      

      

      


        Feb 14, 2019


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      nice
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
              
  
    
    
      
        
            
    All reactions
+        if response is None:
+            raise ValueError('This should never happen!')
+        return response
     def _parse_server_exception(response, url=None):
         # OpenML has a sopisticated error system
         # where information about failures is provided. try to parse this
         try:
             server_exception = xmltodict.parse(response.text)
-        except:
+        except Exception:
             raise OpenMLServerError(('Unexpected server error. Please '
                                      'contact the developers!\nStatus code: '
                                      '%d\n' % response.status_code) + response.text)
@@ -117,7 +151,7 @@ def _parse_server_exception(response, url=None):
         additional = None
         if 'oml:additional_information' in server_exception['oml:error']:
             additional = server_exception['oml:error']['oml:additional_information']
-        if code in [372, 512, 500, 482, 542, 674]: # datasets,
+        if code in [372, 512, 500, 482, 542, 674]:
             # 512 for runs, 372 for datasets, 500 for flows
             # 482 for tasks, 542 for evaluations, 674 for setups
             return OpenMLServerNoResult(code, message, additional)
-          Expand Down