Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

automl: add base dataset samples for automl ga #2608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cc7696a
automl: add base dataset samples for automl ga
nnegrey Dec 12, 2019
b738a75
Use a unique prefix
nnegrey Dec 12, 2019
e6efade
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Dec 13, 2019
742a40c
Move test imports to top / misc feedback cleanup
nnegrey Dec 17, 2019
2faddb1
Update tests
nnegrey Dec 17, 2019
bd93782
Use centralized testing project for automl resources
nnegrey Dec 18, 2019
5e04d30
Test fix
nnegrey Dec 18, 2019
48750b9
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Dec 18, 2019
6e8ba57
Consistently use double quotes
nnegrey Dec 20, 2019
8817279
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Jan 2, 2020
b16154f
License year 2020
nnegrey Jan 2, 2020
6449029
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Jan 2, 2020
b881b20
Use a fake dataset to fix the flaky test as only one operation can wo…
nnegrey Jan 2, 2020
f97c8d6
Merge branch 'automl-ga-base-dataset-samples' of https://github.com/G…
nnegrey Jan 2, 2020
e4d0ef0
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Jan 3, 2020
f531b31
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Jan 7, 2020
a70f4dd
use centralized automl testing project
nnegrey Jan 7, 2020
4dbaf3b
use different automl product to import data
nnegrey Jan 7, 2020
ca45621
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Jan 7, 2020
7717393
Merge branch 'master' into automl-ga-base-dataset-samples
nnegrey Jan 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions 33 automl/cloud-client/delete_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def delete_dataset(project_id, dataset_id):
"""Delete a dataset."""
# [START automl_delete_dataset]
from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(
project_id, "us-central1", dataset_id
)
response = client.delete_dataset(dataset_full_id)

print("Dataset deleted. {}".format(response.result()))
# [END automl_delete_dataset]
46 changes: 46 additions & 0 deletions 46 automl/cloud-client/delete_dataset_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import os

from google.cloud import automl
import pytest

import delete_dataset

PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
BUCKET_ID = "{}-lcm".format(PROJECT_ID)


@pytest.fixture(scope="function")
def create_dataset():
client = automl.AutoMlClient()
project_location = client.location_path(PROJECT_ID, "us-central1")
display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
metadata = automl.types.TextExtractionDatasetMetadata()
dataset = automl.types.Dataset(
display_name=display_name, text_extraction_dataset_metadata=metadata
)
response = client.create_dataset(project_location, dataset)
dataset_id = response.result().name.split("/")[-1]

yield dataset_id


def test_delete_dataset(capsys, create_dataset):
# delete dataset
delete_dataset.delete_dataset(PROJECT_ID, create_dataset)
out, _ = capsys.readouterr()
assert "Dataset deleted." in out
38 changes: 38 additions & 0 deletions 38 automl/cloud-client/export_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def export_dataset(project_id, dataset_id, gcs_uri):
"""Export a dataset."""
# [START automl_export_dataset]
from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# gcs_uri = "gs://YOUR_BUCKET_ID/path/to/export/"

client = automl.AutoMlClient()

# Get the full path of the dataset
dataset_full_id = client.dataset_path(
project_id, "us-central1", dataset_id
)

gcs_destination = automl.types.GcsDestination(output_uri_prefix=gcs_uri)
output_config = automl.types.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(dataset_full_id, output_config)
print("Dataset exported. {}".format(response.result()))
# [END automl_export_dataset]
46 changes: 46 additions & 0 deletions 46 automl/cloud-client/export_dataset_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import os

import export_dataset

PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
BUCKET_ID = "{}-lcm".format(PROJECT_ID)
PREFIX = "TEST_EXPORT_OUTPUT_" + datetime.datetime.now().strftime(
"%Y%m%d%H%M%S"
)
DATASET_ID = "TEN0000000000000000000"


def test_export_dataset(capsys):
# As exporting a dataset can take a long time and only one operation can be
# run on a dataset at once. Try to export a nonexistent dataset and confirm
# that the dataset was not found, but other elements of the request were\
# valid.
try:
export_dataset.export_dataset(
PROJECT_ID, DATASET_ID, "gs://{}/{}/".format(BUCKET_ID, PREFIX)
)
out, _ = capsys.readouterr()
assert (
"The Dataset doesn't exist or is inaccessible for use with AutoMl."
in out
)
except Exception as e:
assert (
"The Dataset doesn't exist or is inaccessible for use with AutoMl."
in e.message
)
100 changes: 100 additions & 0 deletions 100 automl/cloud-client/get_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def get_dataset(project_id, dataset_id):
"""Get a dataset."""
# [START automl_language_entity_extraction_get_dataset]
# [START automl_language_sentiment_analysis_get_dataset]
# [START automl_language_text_classification_get_dataset]
# [START automl_translate_get_dataset]
# [START automl_vision_classification_get_dataset]
# [START automl_vision_object_detection_get_dataset]
from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"

client = automl.AutoMlClient()
# Get the full path of the dataset
dataset_full_id = client.dataset_path(
project_id, "us-central1", dataset_id
)
dataset = client.get_dataset(dataset_full_id)

# Display the dataset information
print("Dataset name: {}".format(dataset.name))
print("Dataset id: {}".format(dataset.name.split("/")[-1]))
print("Dataset display name: {}".format(dataset.display_name))
print("Dataset create time:")
print("\tseconds: {}".format(dataset.create_time.seconds))
print("\tnanos: {}".format(dataset.create_time.nanos))
# [END automl_language_sentiment_analysis_get_dataset]
# [END automl_language_text_classification_get_dataset]
# [END automl_translate_get_dataset]
# [END automl_vision_classification_get_dataset]
# [END automl_vision_object_detection_get_dataset]
print(
"Text extraction dataset metadata: {}".format(
dataset.text_extraction_dataset_metadata
)
)
# [END automl_language_entity_extraction_get_dataset]

# [START automl_language_sentiment_analysis_get_dataset]
print(
"Text sentiment dataset metadata: {}".format(
dataset.text_sentiment_dataset_metadata
)
)
# [END automl_language_sentiment_analysis_get_dataset]

# [START automl_language_text_classification_get_dataset]
print(
"Text classification dataset metadata: {}".format(
dataset.text_classification_dataset_metadata
)
)
# [END automl_language_text_classification_get_dataset]

# [START automl_translate_get_dataset]
print("Translation dataset metadata:")
print(
"\tsource_language_code: {}".format(
dataset.translation_dataset_metadata.source_language_code
)
)
print(
"\ttarget_language_code: {}".format(
dataset.translation_dataset_metadata.target_language_code
)
)
# [END automl_translate_get_dataset]

# [START automl_vision_classification_get_dataset]
print(
"Image classification dataset metadata: {}".format(
dataset.image_classification_dataset_metadata
)
)
# [END automl_vision_classification_get_dataset]

# [START automl_vision_object_detection_get_dataset]
print(
"Image object detection dataset metadata: {}".format(
dataset.image_object_detection_dataset_metadata
)
)
# [END automl_vision_object_detection_get_dataset]
26 changes: 26 additions & 0 deletions 26 automl/cloud-client/get_dataset_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import get_dataset

PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
DATASET_ID = os.environ["ENTITY_EXTRACTION_DATASET_ID"]


def test_get_dataset(capsys):
get_dataset.get_dataset(PROJECT_ID, DATASET_ID)
out, _ = capsys.readouterr()
assert "Dataset name: " in out
40 changes: 40 additions & 0 deletions 40 automl/cloud-client/import_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def import_dataset(project_id, dataset_id, path):
"""Import a dataset."""
# [START automl_import_data]
from google.cloud import automl

# TODO(developer): Uncomment and set the following variables
# project_id = "YOUR_PROJECT_ID"
# dataset_id = "YOUR_DATASET_ID"
# path = "gs://YOUR_BUCKET_ID/path/to/data.csv"

client = automl.AutoMlClient()
# Get the full path of the dataset.
dataset_full_id = client.dataset_path(
project_id, "us-central1", dataset_id
)
# Get the multiple Google Cloud Storage URIs
input_uris = path.split(",")
gcs_source = automl.types.GcsSource(input_uris=input_uris)
input_config = automl.types.InputConfig(gcs_source=gcs_source)
# Import data from the input URI
response = client.import_data(dataset_full_id, input_config)

print("Processing import...")
print("Data imported. {}".format(response.result()))
# [END automl_import_data]
60 changes: 60 additions & 0 deletions 60 automl/cloud-client/import_dataset_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import os

from google.cloud import automl
import pytest

import import_dataset

PROJECT_ID = os.environ["AUTOML_PROJECT_ID"]
BUCKET_ID = "{}-lcm".format(PROJECT_ID)


@pytest.fixture(scope="function")
def create_dataset():
client = automl.AutoMlClient()
project_location = client.location_path(PROJECT_ID, "us-central1")
display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
metadata = automl.types.TextSentimentDatasetMetadata(
sentiment_max=4
)
dataset = automl.types.Dataset(
display_name=display_name, text_sentiment_dataset_metadata=metadata
)
response = client.create_dataset(project_location, dataset)
dataset_id = response.result().name.split("/")[-1]

yield dataset_id


@pytest.mark.slow
def test_import_dataset(capsys, create_dataset):
data = (
"gs://{}/sentiment-analysis/dataset.csv".format(BUCKET_ID)
)
dataset_id = create_dataset
import_dataset.import_dataset(PROJECT_ID, dataset_id, data)
out, _ = capsys.readouterr()
assert "Data imported." in out

# delete created dataset
client = automl.AutoMlClient()
dataset_full_id = client.dataset_path(
PROJECT_ID, "us-central1", dataset_id
)
response = client.delete_dataset(dataset_full_id)
response.result()
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.