Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b91f9af

Browse filesBrowse files
dizcologyJon Wayne Parrott
authored andcommitted
Language classify (GoogleCloudPlatform#1095)
* add classify text samples and tests * use longer text * move entity sentiment to v1 * flake * year when first written * year first written
1 parent b5d5cad commit b91f9af
Copy full SHA for b91f9af

File tree

Expand file treeCollapse file tree

5 files changed

+141
-69
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+141
-69
lines changed

‎language/cloud-client/v1/snippets.py

Copy file name to clipboardExpand all lines: language/cloud-client/v1/snippets.py
+74Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@
2222
"""
2323

2424
import argparse
25+
import sys
2526

2627
from google.cloud import language
2728
from google.cloud.language import enums
2829
from google.cloud.language import types
30+
2931
import six
3032

3133

@@ -192,12 +194,80 @@ def syntax_file(gcs_uri):
192194
# [END def_syntax_file]
193195

194196

197+
# [START def_entity_sentiment_text]
198+
def entity_sentiment_text(text):
199+
"""Detects entity sentiment in the provided text."""
200+
client = language.LanguageServiceClient()
201+
202+
if isinstance(text, six.binary_type):
203+
text = text.decode('utf-8')
204+
205+
document = types.Document(
206+
content=text.encode('utf-8'),
207+
type=enums.Document.Type.PLAIN_TEXT)
208+
209+
# Detect and send native Python encoding to receive correct word offsets.
210+
encoding = enums.EncodingType.UTF32
211+
if sys.maxunicode == 65535:
212+
encoding = enums.EncodingType.UTF16
213+
214+
result = client.analyze_entity_sentiment(document, encoding)
215+
216+
for entity in result.entities:
217+
print('Mentions: ')
218+
print(u'Name: "{}"'.format(entity.name))
219+
for mention in entity.mentions:
220+
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
221+
print(u' Content : {}'.format(mention.text.content))
222+
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
223+
print(u' Sentiment : {}'.format(mention.sentiment.score))
224+
print(u' Type : {}'.format(mention.type))
225+
print(u'Salience: {}'.format(entity.salience))
226+
print(u'Sentiment: {}\n'.format(entity.sentiment))
227+
# [END def_entity_sentiment_text]
228+
229+
230+
def entity_sentiment_file(gcs_uri):
231+
"""Detects entity sentiment in a Google Cloud Storage file."""
232+
client = language.LanguageServiceClient()
233+
234+
document = types.Document(
235+
gcs_content_uri=gcs_uri,
236+
type=enums.Document.Type.PLAIN_TEXT)
237+
238+
# Detect and send native Python encoding to receive correct word offsets.
239+
encoding = enums.EncodingType.UTF32
240+
if sys.maxunicode == 65535:
241+
encoding = enums.EncodingType.UTF16
242+
243+
result = client.analyze_entity_sentiment(document, encoding)
244+
245+
for entity in result.entities:
246+
print(u'Name: "{}"'.format(entity.name))
247+
for mention in entity.mentions:
248+
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
249+
print(u' Content : {}'.format(mention.text.content))
250+
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
251+
print(u' Sentiment : {}'.format(mention.sentiment.score))
252+
print(u' Type : {}'.format(mention.type))
253+
print(u'Salience: {}'.format(entity.salience))
254+
print(u'Sentiment: {}\n'.format(entity.sentiment))
255+
256+
195257
if __name__ == '__main__':
196258
parser = argparse.ArgumentParser(
197259
description=__doc__,
198260
formatter_class=argparse.RawDescriptionHelpFormatter)
199261
subparsers = parser.add_subparsers(dest='command')
200262

263+
sentiment_entities_text_parser = subparsers.add_parser(
264+
'sentiment-entities-text', help=entity_sentiment_text.__doc__)
265+
sentiment_entities_text_parser.add_argument('text')
266+
267+
sentiment_entities_file_parser = subparsers.add_parser(
268+
'sentiment-entities-file', help=entity_sentiment_file.__doc__)
269+
sentiment_entities_file_parser.add_argument('gcs_uri')
270+
201271
sentiment_text_parser = subparsers.add_parser(
202272
'sentiment-text', help=sentiment_text.__doc__)
203273
sentiment_text_parser.add_argument('text')
@@ -236,3 +306,7 @@ def syntax_file(gcs_uri):
236306
syntax_text(args.text)
237307
elif args.command == 'syntax-file':
238308
syntax_file(args.gcs_uri)
309+
elif args.command == 'sentiment-entities-text':
310+
entity_sentiment_text(args.text)
311+
elif args.command == 'sentiment-entities-file':
312+
entity_sentiment_file(args.gcs_uri)

‎language/cloud-client/v1/snippets_test.py

Copy file name to clipboardExpand all lines: language/cloud-client/v1/snippets_test.py
+22-1Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright 2016 Google, Inc.
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2017 Google, Inc.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
@@ -56,3 +57,23 @@ def test_syntax_file(capsys):
5657
snippets.syntax_file(TEST_FILE_URL)
5758
out, _ = capsys.readouterr()
5859
assert 'NOUN: President' in out
60+
61+
62+
def test_sentiment_entities_text(capsys):
63+
snippets.entity_sentiment_text(
64+
'President Obama is speaking at the White House.')
65+
out, _ = capsys.readouterr()
66+
assert 'Content : White House' in out
67+
68+
69+
def test_sentiment_entities_file(capsys):
70+
snippets.entity_sentiment_file(TEST_FILE_URL)
71+
out, _ = capsys.readouterr()
72+
assert 'Content : White House' in out
73+
74+
75+
def test_sentiment_entities_utf(capsys):
76+
snippets.entity_sentiment_text(
77+
'foo→bar')
78+
out, _ = capsys.readouterr()
79+
assert 'Begin Offset : 4' in out
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Android is a mobile operating system developed by Google, based on the Linux kernel and designed primarily for touchscreen mobile devices such as smartphones and tablets.

‎language/cloud-client/v1beta2/snippets.py

Copy file name to clipboardExpand all lines: language/cloud-client/v1beta2/snippets.py
+32-54Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22

3-
# Copyright 2017 Google, Inc.
3+
# Copyright 2016 Google, Inc.
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
@@ -22,7 +22,6 @@
2222
"""
2323

2424
import argparse
25-
import sys
2625

2726
# [START beta_import]
2827
from google.cloud import language_v1beta2
@@ -173,9 +172,9 @@ def syntax_file(gcs_uri):
173172
token.text.content))
174173

175174

176-
# [START def_entity_sentiment_text]
177-
def entity_sentiment_text(text):
178-
"""Detects entity sentiment in the provided text."""
175+
# [START def_classify_text]
176+
def classify_text(text):
177+
"""Classifies the provided text."""
179178
# [START beta_client]
180179
client = language_v1beta2.LanguageServiceClient()
181180
# [END beta_client]
@@ -187,52 +186,31 @@ def entity_sentiment_text(text):
187186
content=text.encode('utf-8'),
188187
type=enums.Document.Type.PLAIN_TEXT)
189188

190-
# Pass in encoding type to get useful offsets in the response.
191-
encoding = enums.EncodingType.UTF32
192-
if sys.maxunicode == 65535:
193-
encoding = enums.EncodingType.UTF16
194-
195-
result = client.analyze_entity_sentiment(document, encoding)
196-
197-
for entity in result.entities:
198-
print('Mentions: ')
199-
print(u'Name: "{}"'.format(entity.name))
200-
for mention in entity.mentions:
201-
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
202-
print(u' Content : {}'.format(mention.text.content))
203-
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
204-
print(u' Sentiment : {}'.format(mention.sentiment.score))
205-
print(u' Type : {}'.format(mention.type))
206-
print(u'Salience: {}'.format(entity.salience))
207-
print(u'Sentiment: {}\n'.format(entity.sentiment))
208-
# [END def_entity_sentiment_text]
209-
210-
211-
def entity_sentiment_file(gcs_uri):
212-
"""Detects entity sentiment in a Google Cloud Storage file."""
189+
categories = client.classify_text(document).categories
190+
191+
for category in categories:
192+
print(u'=' * 20)
193+
print(u'{:<16}: {}'.format('name', category.name))
194+
print(u'{:<16}: {}'.format('confidence', category.confidence))
195+
# [END def_classify_text]
196+
197+
198+
# [START def_classify_file]
199+
def classify_file(gcs_uri):
200+
"""Classifies the text in a Google Cloud Storage file."""
213201
client = language_v1beta2.LanguageServiceClient()
214202

215203
document = types.Document(
216204
gcs_content_uri=gcs_uri,
217205
type=enums.Document.Type.PLAIN_TEXT)
218206

219-
# Pass in encoding type to get useful offsets in the response.
220-
encoding = enums.EncodingType.UTF32
221-
if sys.maxunicode == 65535:
222-
encoding = enums.EncodingType.UTF16
223-
224-
result = client.analyze_entity_sentiment(document, encoding)
207+
categories = client.classify_text(document).categories
225208

226-
for entity in result.entities:
227-
print(u'Name: "{}"'.format(entity.name))
228-
for mention in entity.mentions:
229-
print(u' Begin Offset : {}'.format(mention.text.begin_offset))
230-
print(u' Content : {}'.format(mention.text.content))
231-
print(u' Magnitude : {}'.format(mention.sentiment.magnitude))
232-
print(u' Sentiment : {}'.format(mention.sentiment.score))
233-
print(u' Type : {}'.format(mention.type))
234-
print(u'Salience: {}'.format(entity.salience))
235-
print(u'Sentiment: {}\n'.format(entity.sentiment))
209+
for category in categories:
210+
print(u'=' * 20)
211+
print(u'{:<16}: {}'.format('name', category.name))
212+
print(u'{:<16}: {}'.format('confidence', category.confidence))
213+
# [END def_classify_file]
236214

237215

238216
if __name__ == '__main__':
@@ -241,13 +219,13 @@ def entity_sentiment_file(gcs_uri):
241219
formatter_class=argparse.RawDescriptionHelpFormatter)
242220
subparsers = parser.add_subparsers(dest='command')
243221

244-
sentiment_entities_text_parser = subparsers.add_parser(
245-
'sentiment-entities-text', help=entity_sentiment_text.__doc__)
246-
sentiment_entities_text_parser.add_argument('text')
222+
classify_text_parser = subparsers.add_parser(
223+
'classify-text', help=classify_text.__doc__)
224+
classify_text_parser.add_argument('text')
247225

248-
sentiment_entities_file_parser = subparsers.add_parser(
249-
'sentiment-entities-file', help=entity_sentiment_file.__doc__)
250-
sentiment_entities_file_parser.add_argument('gcs_uri')
226+
classify_text_parser = subparsers.add_parser(
227+
'classify-file', help=classify_file.__doc__)
228+
classify_text_parser.add_argument('gcs_uri')
251229

252230
sentiment_text_parser = subparsers.add_parser(
253231
'sentiment-text', help=sentiment_text.__doc__)
@@ -287,7 +265,7 @@ def entity_sentiment_file(gcs_uri):
287265
syntax_text(args.text)
288266
elif args.command == 'syntax-file':
289267
syntax_file(args.gcs_uri)
290-
elif args.command == 'sentiment-entities-text':
291-
entity_sentiment_text(args.text)
292-
elif args.command == 'sentiment-entities-file':
293-
entity_sentiment_file(args.gcs_uri)
268+
elif args.command == 'classify-text':
269+
classify_text(args.text)
270+
elif args.command == 'classify-file':
271+
classify_file(args.gcs_uri)

‎language/cloud-client/v1beta2/snippets_test.py

Copy file name to clipboardExpand all lines: language/cloud-client/v1beta2/snippets_test.py
+12-14Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
2121
TEST_FILE_URL = 'gs://{}/text.txt'.format(BUCKET)
22+
LONG_TEST_FILE_URL = 'gs://{}/android_text.txt'.format(BUCKET)
2223

2324

2425
def test_sentiment_text(capsys):
@@ -68,21 +69,18 @@ def test_syntax_file(capsys):
6869
assert 'NOUN: President' in out
6970

7071

71-
def test_sentiment_entities_text(capsys):
72-
snippets.entity_sentiment_text(
73-
'President Obama is speaking at the White House.')
72+
def test_classify_text(capsys):
73+
snippets.classify_text(
74+
'Android is a mobile operating system developed by Google, '
75+
'based on the Linux kernel and designed primarily for touchscreen '
76+
'mobile devices such as smartphones and tablets.')
7477
out, _ = capsys.readouterr()
75-
assert 'Content : White House' in out
76-
77-
78-
def test_sentiment_entities_file(capsys):
79-
snippets.entity_sentiment_file(TEST_FILE_URL)
80-
out, _ = capsys.readouterr()
81-
assert 'Content : White House' in out
78+
assert 'name' in out
79+
assert '/Computers & Electronics' in out
8280

8381

84-
def test_sentiment_entities_utf(capsys):
85-
snippets.entity_sentiment_text(
86-
'foo→bar')
82+
def test_classify_file(capsys):
83+
snippets.classify_file(LONG_TEST_FILE_URL)
8784
out, _ = capsys.readouterr()
88-
assert 'Begin Offset : 4' in out
85+
assert 'name' in out
86+
assert '/Computers & Electronics' in out

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.