diff --git a/examples/document_conversion_v1.py b/examples/document_conversion_v1.py index 536ac0db0..8c7c17836 100644 --- a/examples/document_conversion_v1.py +++ b/examples/document_conversion_v1.py @@ -13,9 +13,83 @@ with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML} print(document_conversion.convert_document(document=document, config=config, media_type='text/html') - .content.decode('utf-8')) + .content) # Example with JSON with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2)) + +# Examples of index_document API +print("########## Example of a dry run of index_document with only a document ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'true' + } + } + print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2)) + +print("########## Example of a dry run of index_document with only metadata ##########") +config = { + 'retrieve_and_rank': { + 'dry_run':'true' + } +} +metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] +} +print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2)) + +print("########## Example of a dry run of index_document with document and metadata ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'true' + } + } + metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] + } + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) + +print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'convert_document': { + 'normalized_html': { + 'exclude_content': {"xpaths":["//body/div"]} + } + }, + 'retrieve_and_rank': { + 'dry_run':'true' + } + } + metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] + } + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) + +print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and " + "a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########") +with open(join(dirname(__file__), '../resources/example.html'), 'r') as document: + config = { + 'retrieve_and_rank': { + 'dry_run':'false', + 'service_instance_id':'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID', + 'cluster_id':'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID', + 'search_collection':'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME' + } + } + metadata = { + 'metadata': [ + {'name':'id', 'value':'12345'} + ] + } + print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2)) diff --git a/resources/simple.html b/resources/simple.html new file mode 100644 index 000000000..1ad0599fc --- /dev/null +++ b/resources/simple.html @@ -0,0 +1,9 @@ + +
+The content of the first chapter.
+ + \ No newline at end of file diff --git a/test/test_document_conversion_v1.py b/test/test_document_conversion_v1.py new file mode 100644 index 000000000..8740cd38e --- /dev/null +++ b/test/test_document_conversion_v1.py @@ -0,0 +1,48 @@ +# coding=utf-8 +import os +import responses +import watson_developer_cloud + + +@responses.activate +def test_success(): + convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document' + convert_response = '' \ + 'The content of the first chapter.
' + document_conversion = watson_developer_cloud.DocumentConversionV1( + username="username", password="password", version='2015-12-15') + + responses.add(responses.POST, convert_url, + body=convert_response, status=200, + content_type='application/json') + + with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document: + convertConfig = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML} + document_conversion.convert_document(document=document, config=convertConfig, media_type='text/html') + + assert responses.calls[0].request.url == convert_url + '?version=2015-12-15' + assert responses.calls[0].response.text == convert_response + + index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document' + index_response = '{"status": "success"}' + + responses.add(responses.POST, index_url, + body=index_response, status=200, + content_type='application/json') + + with open(os.path.join(os.path.dirname(__file__), '../resources/example.html'), 'r') as document: + indexConfig = { + 'retrieve_and_rank': { + 'dry_run':'false', + 'service_instance_id':'serviceInstanceId', + 'cluster_id':'clusterId', + 'search_collection':'searchCollectionName' + } + } + document_conversion.index_document(config=indexConfig, document=document) + + assert responses.calls[1].request.url == index_url + '?version=2015-12-15' + assert responses.calls[1].response.text == index_response + + assert len(responses.calls) == 2 diff --git a/watson_developer_cloud/document_conversion_v1.py b/watson_developer_cloud/document_conversion_v1.py index 31430053e..ac1b1bba2 100644 --- a/watson_developer_cloud/document_conversion_v1.py +++ b/watson_developer_cloud/document_conversion_v1.py @@ -37,3 +37,16 @@ def convert_document(self, document, config, media_type=None): accept_json = config['conversion_target'] == DocumentConversionV1.ANSWER_UNITS return self.request(method='POST', url='/v1/convert_document', files=files, params=params, accept_json=accept_json) + + def index_document(self, config, document=None, metadata=None, media_type=None): + if document is None and metadata is None: + raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.') + params = {'version': self.version} + files = [('config', ('config.json', json.dumps(config), 'application/json'))] + if document != None: + filename = os.path.basename(document.name) + file_tuple = (filename, document, media_type) if media_type else (filename, document) + files.append(('file', file_tuple)) + if metadata != None: + files.append(('metadata', ('metadata.json', json.dumps(metadata), 'application/json'))) + return self.request(method='POST', url='/v1/index_document', files=files, params=params, accept_json=True)