Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a918a34

Browse filesBrowse files
authored
* automl initial commit * lint * fix import groupings * add requirements.txt * address review comments
1 parent c310941 commit a918a34
Copy full SHA for a918a34
Expand file treeCollapse file tree

24 files changed

+2836
-0
lines changed
+297Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""This application demonstrates how to perform basic operations on Dataset
18+
with the Google AutoML Natural Language API.
19+
20+
For more information, see the tutorial page at
21+
https://cloud.google.com/natural-language/automl/docs/
22+
"""
23+
24+
import argparse
25+
import os
26+
27+
28+
def create_dataset(project_id, compute_region, dataset_name, multilabel=False):
29+
"""Create a dataset."""
30+
# [START automl_natural_language_create_dataset]
31+
# TODO(developer): Uncomment and set the following variables
32+
# project_id = 'PROJECT_ID_HERE'
33+
# compute_region = 'COMPUTE_REGION_HERE'
34+
# dataset_name = 'DATASET_NAME_HERE'
35+
# multilabel = True for multilabel or False for multiclass
36+
37+
from google.cloud import automl_v1beta1 as automl
38+
39+
client = automl.AutoMlClient()
40+
41+
# A resource that represents Google Cloud Platform location.
42+
project_location = client.location_path(project_id, compute_region)
43+
44+
# Classification type is assigned based on multilabel value.
45+
classification_type = "MULTICLASS"
46+
if multilabel:
47+
classification_type = "MULTILABEL"
48+
49+
# Specify the text classification type for the dataset.
50+
dataset_metadata = {"classification_type": classification_type}
51+
52+
# Set dataset name and metadata.
53+
my_dataset = {
54+
"display_name": dataset_name,
55+
"text_classification_dataset_metadata": dataset_metadata,
56+
}
57+
58+
# Create a dataset with the dataset metadata in the region.
59+
dataset = client.create_dataset(project_location, my_dataset)
60+
61+
# Display the dataset information.
62+
print("Dataset name: {}".format(dataset.name))
63+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
64+
print("Dataset display name: {}".format(dataset.display_name))
65+
print("Text classification dataset metadata:")
66+
print("\t{}".format(dataset.text_classification_dataset_metadata))
67+
print("Dataset example count: {}".format(dataset.example_count))
68+
print("Dataset create time:")
69+
print("\tseconds: {}".format(dataset.create_time.seconds))
70+
print("\tnanos: {}".format(dataset.create_time.nanos))
71+
72+
# [END automl_natural_language_create_dataset]
73+
74+
75+
def list_datasets(project_id, compute_region, filter_):
76+
"""List all datasets."""
77+
# [START automl_natural_language_list_datasets]
78+
# TODO(developer): Uncomment and set the following variables
79+
# project_id = 'PROJECT_ID_HERE'
80+
# compute_region = 'COMPUTE_REGION_HERE'
81+
# filter_ = 'filter expression here'
82+
83+
from google.cloud import automl_v1beta1 as automl
84+
85+
client = automl.AutoMlClient()
86+
87+
# A resource that represents Google Cloud Platform location.
88+
project_location = client.location_path(project_id, compute_region)
89+
90+
# List all the datasets available in the region by applying filter.
91+
response = client.list_datasets(project_location, filter_)
92+
93+
print("List of datasets:")
94+
for dataset in response:
95+
# Display the dataset information.
96+
print("Dataset name: {}".format(dataset.name))
97+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
98+
print("Dataset display name: {}".format(dataset.display_name))
99+
print("Text classification dataset metadata:")
100+
print("\t{}".format(dataset.text_classification_dataset_metadata))
101+
print("Dataset example count: {}".format(dataset.example_count))
102+
print("Dataset create time:")
103+
print("\tseconds: {}".format(dataset.create_time.seconds))
104+
print("\tnanos: {}".format(dataset.create_time.nanos))
105+
106+
# [END automl_natural_language_list_datasets]
107+
108+
109+
def get_dataset(project_id, compute_region, dataset_id):
110+
"""Get the dataset."""
111+
# [START automl_natural_language_get_dataset]
112+
# TODO(developer): Uncomment and set the following variables
113+
# project_id = 'PROJECT_ID_HERE'
114+
# compute_region = 'COMPUTE_REGION_HERE'
115+
# dataset_id = 'DATASET_ID_HERE'
116+
117+
from google.cloud import automl_v1beta1 as automl
118+
119+
client = automl.AutoMlClient()
120+
121+
# Get the full path of the dataset
122+
dataset_full_id = client.dataset_path(
123+
project_id, compute_region, dataset_id
124+
)
125+
126+
# Get complete detail of the dataset.
127+
dataset = client.get_dataset(dataset_full_id)
128+
129+
# Display the dataset information.
130+
print("Dataset name: {}".format(dataset.name))
131+
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
132+
print("Dataset display name: {}".format(dataset.display_name))
133+
print("Text classification dataset metadata:")
134+
print("\t{}".format(dataset.text_classification_dataset_metadata))
135+
print("Dataset example count: {}".format(dataset.example_count))
136+
print("Dataset create time:")
137+
print("\tseconds: {}".format(dataset.create_time.seconds))
138+
print("\tnanos: {}".format(dataset.create_time.nanos))
139+
140+
# [END automl_natural_language_get_dataset]
141+
142+
143+
def import_data(project_id, compute_region, dataset_id, path):
144+
"""Import labelled items."""
145+
# [START automl_natural_language_import_data]
146+
# TODO(developer): Uncomment and set the following variables
147+
# project_id = 'PROJECT_ID_HERE'
148+
# compute_region = 'COMPUTE_REGION_HERE'
149+
# dataset_id = 'DATASET_ID_HERE'
150+
# path = 'gs://path/to/file.csv'
151+
152+
from google.cloud import automl_v1beta1 as automl
153+
154+
client = automl.AutoMlClient()
155+
156+
# Get the full path of the dataset.
157+
dataset_full_id = client.dataset_path(
158+
project_id, compute_region, dataset_id
159+
)
160+
161+
# Get the multiple Google Cloud Storage URIs.
162+
input_uris = path.split(",")
163+
input_config = {"gcs_source": {"input_uris": input_uris}}
164+
165+
# Import the dataset from the input URI.
166+
response = client.import_data(dataset_full_id, input_config)
167+
168+
print("Processing import...")
169+
# synchronous check of operation status.
170+
print("Data imported. {}".format(response.result()))
171+
172+
# [END automl_natural_language_import_data]
173+
174+
175+
def export_data(project_id, compute_region, dataset_id, output_uri):
176+
"""Export a dataset to a Google Cloud Storage bucket."""
177+
# [START automl_natural_language_export_data]
178+
# TODO(developer): Uncomment and set the following variables
179+
# project_id = 'PROJECT_ID_HERE'
180+
# compute_region = 'COMPUTE_REGION_HERE'
181+
# dataset_id = 'DATASET_ID_HERE'
182+
# output_uri: 'gs://location/to/export/data'
183+
184+
from google.cloud import automl_v1beta1 as automl
185+
186+
client = automl.AutoMlClient()
187+
188+
# Get the full path of the dataset.
189+
dataset_full_id = client.dataset_path(
190+
project_id, compute_region, dataset_id
191+
)
192+
193+
# Set the output URI
194+
output_config = {"gcs_destination": {"output_uri_prefix": output_uri}}
195+
196+
# Export the data to the output URI.
197+
response = client.export_data(dataset_full_id, output_config)
198+
199+
print("Processing export...")
200+
# synchronous check of operation status.
201+
print("Data exported. {}".format(response.result()))
202+
203+
# [END automl_natural_language_export_data]
204+
205+
206+
def delete_dataset(project_id, compute_region, dataset_id):
207+
"""Delete a dataset."""
208+
# [START automl_natural_language_delete_dataset]
209+
# TODO(developer): Uncomment and set the following variables
210+
# project_id = 'PROJECT_ID_HERE'
211+
# compute_region = 'COMPUTE_REGION_HERE'
212+
# dataset_id = 'DATASET_ID_HERE'
213+
214+
from google.cloud import automl_v1beta1 as automl
215+
216+
client = automl.AutoMlClient()
217+
218+
# Get the full path of the dataset.
219+
dataset_full_id = client.dataset_path(
220+
project_id, compute_region, dataset_id
221+
)
222+
223+
# Delete a dataset.
224+
response = client.delete_dataset(dataset_full_id)
225+
226+
# synchronous check of operation status.
227+
print("Dataset deleted. {}".format(response.result()))
228+
229+
# [END automl_natural_language_delete_dataset]
230+
231+
232+
if __name__ == "__main__":
233+
parser = argparse.ArgumentParser(
234+
description=__doc__,
235+
formatter_class=argparse.RawDescriptionHelpFormatter,
236+
)
237+
subparsers = parser.add_subparsers(dest="command")
238+
239+
create_dataset_parser = subparsers.add_parser(
240+
"create_dataset", help=create_dataset.__doc__
241+
)
242+
create_dataset_parser.add_argument("dataset_name")
243+
create_dataset_parser.add_argument(
244+
"multilabel", nargs="?", choices=["False", "True"], default="False"
245+
)
246+
247+
list_datasets_parser = subparsers.add_parser(
248+
"list_datasets", help=list_datasets.__doc__
249+
)
250+
list_datasets_parser.add_argument(
251+
"filter_", nargs="?", default="text_classification_dataset_metadata:*"
252+
)
253+
254+
get_dataset_parser = subparsers.add_parser(
255+
"get_dataset", help=get_dataset.__doc__
256+
)
257+
get_dataset_parser.add_argument("dataset_id")
258+
259+
import_data_parser = subparsers.add_parser(
260+
"import_data", help=import_data.__doc__
261+
)
262+
import_data_parser.add_argument("dataset_id")
263+
import_data_parser.add_argument("path")
264+
265+
export_data_parser = subparsers.add_parser(
266+
"export_data", help=export_data.__doc__
267+
)
268+
export_data_parser.add_argument("dataset_id")
269+
export_data_parser.add_argument("output_uri")
270+
271+
delete_dataset_parser = subparsers.add_parser(
272+
"delete_dataset", help=delete_dataset.__doc__
273+
)
274+
delete_dataset_parser.add_argument("dataset_id")
275+
276+
project_id = os.environ["PROJECT_ID"]
277+
compute_region = os.environ["REGION_NAME"]
278+
279+
args = parser.parse_args()
280+
281+
if args.command == "create_dataset":
282+
multilabel = True if args.multilabel == "True" else False
283+
create_dataset(
284+
project_id, compute_region, args.dataset_name, multilabel
285+
)
286+
if args.command == "list_datasets":
287+
list_datasets(project_id, compute_region, args.filter_)
288+
if args.command == "get_dataset":
289+
get_dataset(project_id, compute_region, args.dataset_id)
290+
if args.command == "import_data":
291+
import_data(project_id, compute_region, args.dataset_id, args.path)
292+
if args.command == "export_data":
293+
export_data(
294+
project_id, compute_region, args.dataset_id, args.output_uri
295+
)
296+
if args.command == "delete_dataset":
297+
delete_dataset(project_id, compute_region, args.dataset_id)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.