From e7801268be189c7d09eb642c37c3be6eb06ed9fb Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Tue, 5 Jul 2016 11:32:13 -0400 Subject: [PATCH 1/7] [document-conversion] Adds index document API This commit adds the index_document API for the document conversion service for the python SDK. It also includes updated examples for using the index_document API. --- examples/document_conversion_v1.py | 76 ++++++++++++++++++- .../document_conversion_v1.py | 13 ++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/examples/document_conversion_v1.py b/examples/document_conversion_v1.py index 536ac0db0..8c7c17836 100644 --- a/examples/document_conversion_v1.py +++ b/examples/document_conversion_v1.py @@ -13,9 +13,83 @@ with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML} print(document_conversion.convert_document(document=document, config=config, media_type='text/html') - .content.decode('utf-8')) + .content) # Example with JSON with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2)) + +# Examples of index_document API +print("########## Example of a dry run of index_document with only a document ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'true' + } + } + print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2)) + +print("########## Example of a dry run of index_document with only metadata ##########") +config = { + 'retrieve_and_rank': { + 'dry_run':'true' + } +} +metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] +} +print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2)) + +print("########## Example of a dry run of index_document with document and metadata ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'true' + } + } + metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] + } + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) + +print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'convert_document': { + 'normalized_html': { + 'exclude_content': {"xpaths":["//body/div"]} + } + }, + 'retrieve_and_rank': { + 'dry_run':'true' + } + } + metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] + } + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) + +print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and " + "a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'false', + 'service_instance_id':'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID', + 'cluster_id':'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID', + 'search_collection':'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME' + } + } + metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] + } + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) diff --git a/watson_developer_cloud/document_conversion_v1.py b/watson_developer_cloud/document_conversion_v1.py index 31430053e..9ad5e29b3 100644 --- a/watson_developer_cloud/document_conversion_v1.py +++ b/watson_developer_cloud/document_conversion_v1.py @@ -37,3 +37,16 @@ def convert_document(self, document, config, media_type=None): accept_json = config['conversion_target'] == DocumentConversionV1.ANSWER_UNITS return self.request(method='POST', url='/v1/convert_document', files=files, params=params, accept_json=accept_json) + + def index_document(self, config, document=None, metadata=None, media_type=None): + if document == None and metadata == None: + raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.') + params = {'version': self.version} + files = [('config', ('config.json', json.dumps(config), 'application/json'))] + if document != None: + filename = os.path.basename(document.name) + file_tuple = (filename, document, media_type) if media_type else (filename, document) + files.append(('file', file_tuple)) + if metadata != None: + files.append(('metadata', ('metadata.json', json.dumps(metadata), 'application/json'))) + return self.request(method='POST', url='/v1/index_document', files=files, params=params, accept_json=True) From f7c791c7de880d4767f70e5ada43f7621b9f6d4a Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Tue, 5 Jul 2016 13:44:40 -0400 Subject: [PATCH 2/7] [document-conversion] Fixes conditional statement Fixes a conditional statement to resolve Travis CI failure. --- watson_developer_cloud/document_conversion_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/watson_developer_cloud/document_conversion_v1.py b/watson_developer_cloud/document_conversion_v1.py index 9ad5e29b3..ac1b1bba2 100644 --- a/watson_developer_cloud/document_conversion_v1.py +++ b/watson_developer_cloud/document_conversion_v1.py @@ -39,7 +39,7 @@ def convert_document(self, document, config, media_type=None): accept_json=accept_json) def index_document(self, config, document=None, metadata=None, media_type=None): - if document == None and metadata == None: + if document is None and metadata is None: raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.') params = {'version': self.version} files = [('config', ('config.json', json.dumps(config), 'application/json'))] From 0f41152a9f762bb531816b0d836ca453776ddb59 Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Tue, 5 Jul 2016 16:50:11 -0400 Subject: [PATCH 3/7] [document-conversion] Adds unit tests This commit adds convert_document and index_document unit tests for the python sdk project. --- resources/simple.html | 9 +++++ test/test_document_conversion_v1.py | 54 +++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 resources/simple.html create mode 100644 test/test_document_conversion_v1.py diff --git a/resources/simple.html b/resources/simple.html new file mode 100644 index 000000000..1ad0599fc --- /dev/null +++ b/resources/simple.html @@ -0,0 +1,9 @@ + + + Simple HTML Page + + +

Chapter 1

+

The content of the first chapter.

+ + \ No newline at end of file diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py new file mode 100644 index 000000000..30475e3f5 --- /dev/null +++ b/test/test_document_conversion_v1.py @@ -0,0 +1,54 @@ +# coding=utf-8 +import os +import responses +import watson_developer_cloud + + +@responses.activate +def test_success(): + convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document' + convert_response = '' \ + 'Simple HTML Page' \ + '

Chapter 1

The content of the first chapter.

' + document_conversion = watson_developer_cloud.DocumentConversionV1( + username="username", password="password") + + responses.add(responses.POST, convert_url, + body=convert_response, status=200, + content_type='application/json') + + with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document: + config = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML} + document_conversion.convert_document( + document=document, config=config, media_type='text/html') + + assert responses.calls[ + 1].request.url == convert_url + assert responses.calls[1].response.text == convert_response + + assert len(responses.calls) == 2 + + index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document' + index_response = '{"status": "success"}' + + responses.add(responses.POST, index_url, + body=index_response, status=200, + content_type='application/json') + + with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'false', + 'service_instance_id':'serviceInstanceId', + 'cluster_id':'clusterId', + 'search_collection':'searchCollectionName' + } + } + document_conversion.index_document( + config=config, document=document) + + assert responses.calls[ + 1].request.url == index_url + assert responses.calls[1].response.text == index_response + + assert len(responses.calls) == 2 From 0de97a50dfb2aaf4984b141b4fd9be76d4fa721f Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Tue, 5 Jul 2016 17:01:46 -0400 Subject: [PATCH 4/7] [document-conversion] Adds version param Adds version parameter to the DocumentConversionV1 object in the unit test. --- test/test_document_conversion_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py index 30475e3f5..8f9c6de85 100644 --- a/test/test_document_conversion_v1.py +++ b/test/test_document_conversion_v1.py @@ -11,7 +11,7 @@ def test_success(): 'Simple HTML Page' \ '

Chapter 1

The content of the first chapter.

' document_conversion = watson_developer_cloud.DocumentConversionV1( - username="username", password="password") + username="username", password="password", version='2015-12-15') responses.add(responses.POST, convert_url, body=convert_response, status=200, From c5f74f03ea75793dcc8c93c6519f7c52db85ef3b Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Wed, 6 Jul 2016 11:59:25 -0400 Subject: [PATCH 5/7] [document-conversion] Fixes unit tests This commit includes updated fixes for the document conversion unit tests. --- test/test_document_conversion_v1.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py index 8f9c6de85..7cd856e2a 100644 --- a/test/test_document_conversion_v1.py +++ b/test/test_document_conversion_v1.py @@ -18,15 +18,11 @@ def test_success(): content_type='application/json') with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document: - config = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML} - document_conversion.convert_document( - document=document, config=config, media_type='text/html') + convertConfig = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML} + document_conversion.convert_document(document=document, config=convertConfig, media_type='text/html') - assert responses.calls[ - 1].request.url == convert_url - assert responses.calls[1].response.text == convert_response - - assert len(responses.calls) == 2 + assert responses.calls[0].request.url == convert_url + assert responses.calls[0].response.text == convert_response index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document' index_response = '{"status": "success"}' @@ -35,8 +31,8 @@ def test_success(): body=index_response, status=200, content_type='application/json') - with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document: - config = { + with open(os.path.join(os.path.dirname(__file__), '../resources/example.html'), 'r') as document: + indexConfig = { 'retrieve_and_rank': { 'dry_run':'false', 'service_instance_id':'serviceInstanceId', @@ -44,11 +40,9 @@ def test_success(): 'search_collection':'searchCollectionName' } } - document_conversion.index_document( - config=config, document=document) + document_conversion.index_document(config=indexConfig, document=document) - assert responses.calls[ - 1].request.url == index_url + assert responses.calls[1].request.url == index_url assert responses.calls[1].response.text == index_response assert len(responses.calls) == 2 From 85fa70d7bb70b41a9125401ca9df9c720507fd05 Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Wed, 6 Jul 2016 12:13:19 -0400 Subject: [PATCH 6/7] [document-conversion] Add version param to url's Adding the version parameter to the convert and index url's for the document conversion unit tests. --- test/test_document_conversion_v1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py index 7cd856e2a..fa7bdb6f4 100644 --- a/test/test_document_conversion_v1.py +++ b/test/test_document_conversion_v1.py @@ -6,7 +6,7 @@ @responses.activate def test_success(): - convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document' + convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document?version=2015-12-15' convert_response = '' \ 'Simple HTML Page' \ '

Chapter 1

The content of the first chapter.

' @@ -24,7 +24,7 @@ def test_success(): assert responses.calls[0].request.url == convert_url assert responses.calls[0].response.text == convert_response - index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document' + index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document?version=2015-12-15' index_response = '{"status": "success"}' responses.add(responses.POST, index_url, From b1660570895e1e63844856a5cefcf54f5e37ef1a Mon Sep 17 00:00:00 2001 From: "Samir J. Patel" Date: Wed, 6 Jul 2016 12:28:58 -0400 Subject: [PATCH 7/7] [document-conversion] Fixes unit tests Fix for document conversion unit test failure - fixes assertion of convert & index url's --- test/test_document_conversion_v1.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py index fa7bdb6f4..8740cd38e 100644 --- a/test/test_document_conversion_v1.py +++ b/test/test_document_conversion_v1.py @@ -6,7 +6,7 @@ @responses.activate def test_success(): - convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document?version=2015-12-15' + convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document' convert_response = '' \ 'Simple HTML Page' \ '

Chapter 1

The content of the first chapter.

' @@ -21,10 +21,10 @@ def test_success(): convertConfig = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML} document_conversion.convert_document(document=document, config=convertConfig, media_type='text/html') - assert responses.calls[0].request.url == convert_url + assert responses.calls[0].request.url == convert_url + '?version=2015-12-15' assert responses.calls[0].response.text == convert_response - index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document?version=2015-12-15' + index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document' index_response = '{"status": "success"}' responses.add(responses.POST, index_url, @@ -42,7 +42,7 @@ def test_success(): } document_conversion.index_document(config=indexConfig, document=document) - assert responses.calls[1].request.url == index_url + assert responses.calls[1].request.url == index_url + '?version=2015-12-15' assert responses.calls[1].response.text == index_response assert len(responses.calls) == 2