Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 75 additions & 1 deletion 76 examples/document_conversion_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,83 @@
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
config = {'conversion_target': DocumentConversionV1.NORMALIZED_HTML}
print(document_conversion.convert_document(document=document, config=config, media_type='text/html')
.content.decode('utf-8'))
.content)

# Example with JSON
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
config['conversion_target'] = DocumentConversionV1.ANSWER_UNITS
print(json.dumps(document_conversion.convert_document(document=document, config=config), indent=2))

# Examples of index_document API
print("########## Example of a dry run of index_document with only a document ##########")
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
config = {
'retrieve_and_rank': {
'dry_run':'true'
}
}
print(json.dumps(document_conversion.index_document(config=config, document=document), indent=2))

print("########## Example of a dry run of index_document with only metadata ##########")
config = {
'retrieve_and_rank': {
'dry_run':'true'
}
}
metadata = {
'metadata': [
{'name':'id', 'value':'12345'}
]
}
print(json.dumps(document_conversion.index_document(config=config, metadata=metadata), indent=2))

print("########## Example of a dry run of index_document with document and metadata ##########")
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
config = {
'retrieve_and_rank': {
'dry_run':'true'
}
}
metadata = {
'metadata': [
{'name':'id', 'value':'12345'}
]
}
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))

print("########## Example of a dry run of index_document with document, metadata, and additional config for conversion ##########")
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
config = {
'convert_document': {
'normalized_html': {
'exclude_content': {"xpaths":["//body/div"]}
}
},
'retrieve_and_rank': {
'dry_run':'true'
}
}
metadata = {
'metadata': [
{'name':'id', 'value':'12345'}
]
}
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))

print("########## Example of index_document with document, metadata (A service instance id, SOLR cluster id, and "
"a SOLR collection name must be provided from the Retrieve and Rank service in order to index) ##########")
with open(join(dirname(__file__), '../resources/example.html'), 'r') as document:
config = {
'retrieve_and_rank': {
'dry_run':'false',
'service_instance_id':'YOUR RETRIEVE AND RANK SERVICE INSTANCE ID',
'cluster_id':'YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID',
'search_collection':'YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME'
}
}
metadata = {
'metadata': [
{'name':'id', 'value':'12345'}
]
}
print(json.dumps(document_conversion.index_document(config=config, document=document, metadata=metadata), indent=2))
9 changes: 9 additions & 0 deletions 9 resources/simple.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<html>
<head>
<title>Simple HTML Page</title>
</head>
<body>
<h1>Chapter 1</h1>
<p>The content of the first chapter.</p>
</body>
</html>
48 changes: 48 additions & 0 deletions 48 test/test_document_conversion_v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# coding=utf-8
import os
import responses
import watson_developer_cloud


@responses.activate
def test_success():
convert_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/convert_document'
convert_response = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><html>' \
'<head><title>Simple HTML Page</title></head>' \
'<body><h1>Chapter 1</h1><p>The content of the first chapter.</p></body></html>'
document_conversion = watson_developer_cloud.DocumentConversionV1(
username="username", password="password", version='2015-12-15')

responses.add(responses.POST, convert_url,
body=convert_response, status=200,
content_type='application/json')

with open(os.path.join(os.path.dirname(__file__), '../resources/simple.html'), 'r') as document:
convertConfig = {'conversion_target': watson_developer_cloud.DocumentConversionV1.NORMALIZED_HTML}
document_conversion.convert_document(document=document, config=convertConfig, media_type='text/html')

assert responses.calls[0].request.url == convert_url + '?version=2015-12-15'
assert responses.calls[0].response.text == convert_response

index_url = 'https://gateway.watsonplatform.net/document-conversion/api/v1/index_document'
index_response = '{"status": "success"}'

responses.add(responses.POST, index_url,
body=index_response, status=200,
content_type='application/json')

with open(os.path.join(os.path.dirname(__file__), '../resources/example.html'), 'r') as document:
indexConfig = {
'retrieve_and_rank': {
'dry_run':'false',
'service_instance_id':'serviceInstanceId',
'cluster_id':'clusterId',
'search_collection':'searchCollectionName'
}
}
document_conversion.index_document(config=indexConfig, document=document)

assert responses.calls[1].request.url == index_url + '?version=2015-12-15'
assert responses.calls[1].response.text == index_response

assert len(responses.calls) == 2
13 changes: 13 additions & 0 deletions 13 watson_developer_cloud/document_conversion_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,16 @@ def convert_document(self, document, config, media_type=None):
accept_json = config['conversion_target'] == DocumentConversionV1.ANSWER_UNITS
return self.request(method='POST', url='/v1/convert_document', files=files, params=params,
accept_json=accept_json)

def index_document(self, config, document=None, metadata=None, media_type=None):
if document is None and metadata is None:
raise AssertionError('Missing required parameters: document or metadata. At least one of those is required.')
params = {'version': self.version}
files = [('config', ('config.json', json.dumps(config), 'application/json'))]
if document != None:
filename = os.path.basename(document.name)
file_tuple = (filename, document, media_type) if media_type else (filename, document)
files.append(('file', file_tuple))
if metadata != None:
files.append(('metadata', ('metadata.json', json.dumps(metadata), 'application/json')))
return self.request(method='POST', url='/v1/index_document', files=files, params=params, accept_json=True)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.