diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9e8dbe5e1e1..2ca758af79b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -33,6 +33,7 @@ /container/**/* @GoogleCloudPlatform/dee-platform-ops @GoogleCloudPlatform/python-samples-reviewers /data-science-onramp/ @leahecole @bradmiro @GoogleCloudPlatform/python-samples-reviewers /dataflow/**/* @davidcavazos @GoogleCloudPlatform/python-samples-reviewers +/datalabeling/**/* @GoogleCloudPlatform/python-samples-reviewers @ivanmkc /datastore/**/* @GoogleCloudPlatform/cloud-native-db-dpes @GoogleCloudPlatform/python-samples-reviewers /dns/**/* @GoogleCloudPlatform/python-samples-reviewers /endpoints/**/* @GoogleCloudPlatform/python-samples-reviewers diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index 30060651e97..0bc1e8c4bc2 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -112,6 +112,11 @@ assign_issues_by: - 'api: translate' to: - nicain +- labels: + - 'api: datalabeling' + to: + - GoogleCloudPlatform/python-samples-reviewers + - ivanmkc - labels: - 'api: monitoring' to: diff --git a/datalabeling/AUTHORING_GUIDE.md b/datalabeling/AUTHORING_GUIDE.md new file mode 100644 index 00000000000..f2725aaf3f4 --- /dev/null +++ b/datalabeling/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md diff --git a/datalabeling/CONTRIBUTING.md b/datalabeling/CONTRIBUTING.md new file mode 100644 index 00000000000..354af3073ee --- /dev/null +++ b/datalabeling/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/CONTRIBUTING.md diff --git a/datalabeling/snippets/README.rst b/datalabeling/snippets/README.rst new file mode 100644 index 00000000000..bf5949b8cb7 --- /dev/null +++ b/datalabeling/snippets/README.rst @@ -0,0 +1,78 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Data Labeling Service Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=datalabeling/README.rst + + +This directory contains samples for Google Cloud Data Labeling Service. `Google Cloud Data Labeling Service`_ allows developers to request having human labelers label a collection of data that you plan to use to train a custom machine learning model. + + + + +.. _Google Cloud Data Labeling Service: https://cloud.google.com/data-labeling/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/datalabeling/snippets/README.rst.in b/datalabeling/snippets/README.rst.in new file mode 100644 index 00000000000..c87a1ff89b4 --- /dev/null +++ b/datalabeling/snippets/README.rst.in @@ -0,0 +1,18 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Data Labeling Service + short_name: Cloud Data Labeling + url: https://cloud.google.com/data-labeling/docs/ + description: > + `Google Cloud Data Labeling Service`_ allows developers to request having + human labelers label a collection of data that you plan to use to train a + custom machine learning model. + +setup: +- auth +- install_deps + +cloud_client_library: true + +folder: datalabeling \ No newline at end of file diff --git a/datalabeling/snippets/create_annotation_spec_set.py b/datalabeling/snippets/create_annotation_spec_set.py new file mode 100644 index 00000000000..240b50fd415 --- /dev/null +++ b/datalabeling/snippets/create_annotation_spec_set.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_create_annotation_spec_set_beta] +def create_annotation_spec_set(project_id): + """Creates a data labeling annotation spec set for the given + Google Cloud project. + """ + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_create_annotation_spec_set_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_create_annotation_spec_set_beta] + + project_path = f"projects/{project_id}" + + annotation_spec_1 = datalabeling.AnnotationSpec( + display_name="label_1", description="label_description_1" + ) + + annotation_spec_2 = datalabeling.AnnotationSpec( + display_name="label_2", description="label_description_2" + ) + + annotation_spec_set = datalabeling.AnnotationSpecSet( + display_name="YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME", + description="YOUR_DESCRIPTION", + annotation_specs=[annotation_spec_1, annotation_spec_2], + ) + + response = client.create_annotation_spec_set( + request={"parent": project_path, "annotation_spec_set": annotation_spec_set} + ) + + # The format of the resource name: + # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id} + print("The annotation_spec_set resource name: {}".format(response.name)) + print("Display name: {}".format(response.display_name)) + print("Description: {}".format(response.description)) + print("Annotation specs:") + for annotation_spec in response.annotation_specs: + print("\tDisplay name: {}".format(annotation_spec.display_name)) + print("\tDescription: {}\n".format(annotation_spec.description)) + + return response + + +# [END datalabeling_create_annotation_spec_set_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument("--project-id", help="Project ID. Required.", required=True) + + args = parser.parse_args() + + create_annotation_spec_set(args.project_id) diff --git a/datalabeling/snippets/create_annotation_spec_set_test.py b/datalabeling/snippets/create_annotation_spec_set_test.py new file mode 100644 index 00000000000..2653169009e --- /dev/null +++ b/datalabeling/snippets/create_annotation_spec_set_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +import pytest + +import create_annotation_spec_set +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +@pytest.fixture(scope="module") +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.delete_annotation_spec_set(resource_name) + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_create_annotation_spec_set(cleaner, capsys): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + return create_annotation_spec_set.create_annotation_spec_set(PROJECT_ID) + + response = run_sample() + + # For cleanup + cleaner.append(response.name) + + out, _ = capsys.readouterr() + assert "The annotation_spec_set resource name:" in out diff --git a/datalabeling/snippets/create_instruction.py b/datalabeling/snippets/create_instruction.py new file mode 100644 index 00000000000..c6041b3618d --- /dev/null +++ b/datalabeling/snippets/create_instruction.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_create_instruction_beta] +def create_instruction(project_id, data_type, instruction_gcs_uri): + """Creates a data labeling PDF instruction for the given Google Cloud + project. The PDF file should be uploaded to the project in + Google Cloud Storage. + """ + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_create_instruction_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_create_instruction_beta] + + project_path = f"projects/{project_id}" + + pdf_instruction = datalabeling.PdfInstruction(gcs_file_uri=instruction_gcs_uri) + + instruction = datalabeling.Instruction( + display_name="YOUR_INSTRUCTION_DISPLAY_NAME", + description="YOUR_DESCRIPTION", + data_type=data_type, + pdf_instruction=pdf_instruction, + ) + + operation = client.create_instruction( + request={"parent": project_path, "instruction": instruction} + ) + + result = operation.result() + + # The format of the resource name: + # project_id/{project_id}/instruction/{instruction_id} + print("The instruction resource name: {}".format(result.name)) + print("Display name: {}".format(result.display_name)) + print("Description: {}".format(result.description)) + print("Create time:") + print("\tseconds: {}".format(result.create_time.timestamp_pb().seconds)) + print("\tnanos: {}".format(result.create_time.timestamp_pb().nanos)) + print("Data type: {}".format(datalabeling.DataType(result.data_type).name)) + print("Pdf instruction:") + print("\tGcs file uri: {}\n".format(result.pdf_instruction.gcs_file_uri)) + + return result + + +# [END datalabeling_create_instruction_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument("--project-id", help="Project ID. Required.", required=True) + + parser.add_argument( + "--data-type", + help="Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.", + required=True, + ) + + parser.add_argument( + "--instruction-gcs-uri", + help="The URI of Google Cloud Storage of the instruction. Required.", + required=True, + ) + + args = parser.parse_args() + + create_instruction(args.project_id, args.data_type, args.instruction_gcs_uri) diff --git a/datalabeling/snippets/create_instruction_test.py b/datalabeling/snippets/create_instruction_test.py new file mode 100644 index 00000000000..0ee09b34fdd --- /dev/null +++ b/datalabeling/snippets/create_instruction_test.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +from google.cloud import datalabeling +import pytest + +import create_instruction +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +INSTRUCTION_GCS_URI = "gs://cloud-samples-data/datalabeling" "/instruction/test.pdf" + + +@pytest.fixture(scope="module") +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.delete_instruction(resource_name) + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_create_instruction(cleaner, capsys): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + return create_instruction.create_instruction( + PROJECT_ID, datalabeling.DataType.IMAGE, INSTRUCTION_GCS_URI + ) + + instruction = run_sample() + cleaner.append(instruction.name) + + out, _ = capsys.readouterr() + assert "The instruction resource name: " in out diff --git a/datalabeling/snippets/export_data.py b/datalabeling/snippets/export_data.py new file mode 100644 index 00000000000..f2e2dc0f6df --- /dev/null +++ b/datalabeling/snippets/export_data.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_export_data_beta] +def export_data(dataset_resource_name, annotated_dataset_resource_name, export_gcs_uri): + """Exports a dataset from the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_export_data_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_export_data_beta] + + gcs_destination = datalabeling.GcsDestination( + output_uri=export_gcs_uri, mime_type="text/csv" + ) + + output_config = datalabeling.OutputConfig(gcs_destination=gcs_destination) + + response = client.export_data( + request={ + "name": dataset_resource_name, + "annotated_dataset": annotated_dataset_resource_name, + "output_config": output_config, + } + ) + + print("Dataset ID: {}\n".format(response.result().dataset)) + print("Output config:") + print("\tGcs destination:") + print( + "\t\tOutput URI: {}\n".format( + response.result().output_config.gcs_destination.output_uri + ) + ) + + +# [END datalabeling_export_data_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--dataset-resource-name", + help="Dataset resource name. Required.", + required=True, + ) + + parser.add_argument( + "--annotated-dataset-resource-name", + help="Annotated Dataset resource name. Required.", + required=True, + ) + + parser.add_argument( + "--export-gcs-uri", help="The export GCS URI. Required.", required=True + ) + + args = parser.parse_args() + + export_data( + args.dataset_resource_name, + args.annotated_dataset_resource_name, + args.export_gcs_uri, + ) diff --git a/datalabeling/snippets/import_data.py b/datalabeling/snippets/import_data.py new file mode 100644 index 00000000000..fa354ee04ea --- /dev/null +++ b/datalabeling/snippets/import_data.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_import_data_beta] +def import_data(dataset_resource_name, data_type, input_gcs_uri): + """Imports data to the given Google Cloud project and dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_import_data_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_import_data_beta] + + gcs_source = datalabeling.GcsSource(input_uri=input_gcs_uri, mime_type="text/csv") + + csv_input_config = datalabeling.InputConfig( + data_type=data_type, gcs_source=gcs_source + ) + + response = client.import_data( + request={"name": dataset_resource_name, "input_config": csv_input_config} + ) + + result = response.result() + + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print("Dataset resource name: {}\n".format(result.dataset)) + + return result + + +# [END datalabeling_import_data_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--dataset-resource-name", + help="Dataset resource name. Required.", + required=True, + ) + + parser.add_argument( + "--data-type", + help="Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.", + required=True, + ) + + parser.add_argument( + "--input-gcs-uri", + help="The GCS URI of the input dataset. Required.", + required=True, + ) + + args = parser.parse_args() + + import_data(args.dataset_resource_name, args.data_type, args.input_gcs_uri) diff --git a/datalabeling/snippets/import_data_test.py b/datalabeling/snippets/import_data_test.py new file mode 100644 index 00000000000..8d836515960 --- /dev/null +++ b/datalabeling/snippets/import_data_test.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +from google.cloud import datalabeling +import pytest + +import import_data +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +INPUT_GCS_URI = "gs://cloud-samples-data/datalabeling/image/image_dataset.csv" + + +@pytest.fixture(scope="module") +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_import_data(capsys, dataset): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + import_data.import_data( + dataset.name, datalabeling.DataType.IMAGE, INPUT_GCS_URI + ) + + run_sample() + out, _ = capsys.readouterr() + assert "Dataset resource name: " in out diff --git a/datalabeling/snippets/label_image.py b/datalabeling/snippets/label_image.py new file mode 100644 index 00000000000..d566fcd9dd8 --- /dev/null +++ b/datalabeling/snippets/label_image.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_label_image_beta] +def label_image( + dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name +): + """Labels an image dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_label_image_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_label_image_beta] + + basic_config = datalabeling.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME", + label_group="YOUR_LABEL_GROUP", + replica_count=1, + ) + + feature = datalabeling.LabelImageRequest.Feature.CLASSIFICATION + + # annotation_spec_set_resource_name needs to be created beforehand. + # See the examples in the following: + # https://cloud.google.com/ai-platform/data-labeling/docs/label-sets + config = datalabeling.ImageClassificationConfig( + annotation_spec_set=annotation_spec_set_resource_name, + allow_multi_label=False, + answer_aggregation_type=datalabeling.StringAggregationType.MAJORITY_VOTE, + ) + + response = client.label_image( + request={ + "parent": dataset_resource_name, + "basic_config": basic_config, + "feature": feature, + "image_classification_config": config, + } + ) + + print("Label_image operation name: {}".format(response.operation.name)) + return response + + +# [END datalabeling_label_image_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--dataset-resource-name", + help="Dataset resource name. Required.", + required=True, + ) + + parser.add_argument( + "--instruction-resource-name", + help="Instruction resource name. Required.", + required=True, + ) + + parser.add_argument( + "--annotation-spec-set-resource-name", + help="Annotation spec set resource name. Required.", + required=True, + ) + + args = parser.parse_args() + + label_image( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name, + ) diff --git a/datalabeling/snippets/label_image_test.py b/datalabeling/snippets/label_image_test.py new file mode 100644 index 00000000000..636b65c2b83 --- /dev/null +++ b/datalabeling/snippets/label_image_test.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +from google.cloud import datalabeling +import pytest + +import label_image +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +INPUT_GCS_URI = "gs://cloud-samples-data/datalabeling/image/image_dataset.csv" +INSTRUCTION_GCS_URI = "gs://cloud-samples-data/datalabeling" "/instruction/test.pdf" + + +@pytest.fixture(scope="module") +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + testing_lib.import_data(dataset.name, datalabeling.DataType.IMAGE, INPUT_GCS_URI) + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope="module") +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = testing_lib.create_annotation_spec_set(PROJECT_ID) + + yield response + + testing_lib.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope="module") +def instruction(): + # create a temporary instruction + instruction = testing_lib.create_instruction( + PROJECT_ID, datalabeling.DataType.IMAGE, INSTRUCTION_GCS_URI + ) + + yield instruction + + # tear down + testing_lib.delete_instruction(instruction.name) + + +@pytest.fixture(scope="module") +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.cancel_operation(resource_name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.skip(reason="currently unavailable") +def test_label_image(capsys, annotation_spec_set, instruction, dataset, cleaner): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + # Start labeling. + return label_image.label_image( + dataset.name, instruction.name, annotation_spec_set.name + ) + + response = run_sample() + cleaner.append(response.operation.name) + + out, _ = capsys.readouterr() + assert "Label_image operation name: " in out diff --git a/datalabeling/snippets/label_text.py b/datalabeling/snippets/label_text.py new file mode 100644 index 00000000000..780bc437429 --- /dev/null +++ b/datalabeling/snippets/label_text.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_label_text_beta] +def label_text( + dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name +): + """Labels a text dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_label_text_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_label_text_beta] + + basic_config = datalabeling.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME", + label_group="YOUR_LABEL_GROUP", + replica_count=1, + ) + + feature = datalabeling.LabelTextRequest.Feature.TEXT_ENTITY_EXTRACTION + + config = datalabeling.TextEntityExtractionConfig( + annotation_spec_set=annotation_spec_set_resource_name + ) + + response = client.label_text( + request={ + "parent": dataset_resource_name, + "basic_config": basic_config, + "feature": feature, + "text_classification_config": config, + } + ) + + print("Label_text operation name: {}".format(response.operation.name)) + return response + + +# [END datalabeling_label_text_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--dataset-resource-name", + help="Dataset resource name. Required.", + required=True, + ) + + parser.add_argument( + "--instruction-resource-name", + help="Instruction resource name. Required.", + required=True, + ) + + parser.add_argument( + "--annotation-spec-set-resource-name", + help="Annotation spec set resource name. Required.", + required=True, + ) + + args = parser.parse_args() + + label_text( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name, + ) diff --git a/datalabeling/snippets/label_text_test.py b/datalabeling/snippets/label_text_test.py new file mode 100644 index 00000000000..b91f086e180 --- /dev/null +++ b/datalabeling/snippets/label_text_test.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +from google.cloud import datalabeling +import pytest + +import label_text +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +INPUT_GCS_URI = "gs://cloud-samples-data/datalabeling/text/input.csv" +INSTRUCTION_GCS_URI = "gs://cloud-samples-data/datalabeling" "/instruction/test.pdf" + + +@pytest.fixture(scope="module") +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + testing_lib.import_data(dataset.name, "TEXT", INPUT_GCS_URI) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope="module") +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = testing_lib.create_annotation_spec_set(PROJECT_ID) + + yield response + + testing_lib.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope="module") +def instruction(): + # create a temporary instruction + instruction = testing_lib.create_instruction( + PROJECT_ID, datalabeling.DataType.IMAGE, INSTRUCTION_GCS_URI + ) + + yield instruction + + # tear down + testing_lib.delete_instruction(instruction.name) + + +@pytest.fixture(scope="module") +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.cancel_operation(resource_name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.skip("Constantly failing") +def test_label_text(capsys, annotation_spec_set, instruction, dataset, cleaner): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + # Start labeling. + return label_text.label_text( + dataset.name, instruction.name, annotation_spec_set.name + ) + + response = run_sample() + cleaner.append(response.operation.name) + + out, _ = capsys.readouterr() + assert "Label_text operation name: " in out + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True diff --git a/datalabeling/snippets/label_video.py b/datalabeling/snippets/label_video.py new file mode 100644 index 00000000000..7831e17cb1f --- /dev/null +++ b/datalabeling/snippets/label_video.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_label_video_beta] +def label_video( + dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name +): + """Labels a video dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_label_video_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_label_video_beta] + + basic_config = datalabeling.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME", + label_group="YOUR_LABEL_GROUP", + replica_count=1, + ) + + feature = datalabeling.LabelVideoRequest.Feature.OBJECT_TRACKING + + config = datalabeling.ObjectTrackingConfig( + annotation_spec_set=annotation_spec_set_resource_name + ) + + response = client.label_video( + request={ + "parent": dataset_resource_name, + "basic_config": basic_config, + "feature": feature, + "object_tracking_config": config, + } + ) + + print("Label_video operation name: {}".format(response.operation.name)) + return response + + +# [END datalabeling_label_video_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--dataset-resource-name", + help="Dataset resource name. Required.", + required=True, + ) + + parser.add_argument( + "--instruction-resource-name", + help="Instruction resource name. Required.", + required=True, + ) + + parser.add_argument( + "--annotation-spec-set-resource-name", + help="Annotation spec set resource name. Required.", + required=True, + ) + + args = parser.parse_args() + + label_video( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name, + ) diff --git a/datalabeling/snippets/label_video_test.py b/datalabeling/snippets/label_video_test.py new file mode 100644 index 00000000000..eefcce68144 --- /dev/null +++ b/datalabeling/snippets/label_video_test.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import ServerError +from google.cloud import datalabeling +import pytest + +import label_video +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +INPUT_GCS_URI = "gs://cloud-samples-data/datalabeling/videos/video_dataset.csv" +INSTRUCTION_GCS_URI = "gs://cloud-samples-data/datalabeling" "/instruction/test.pdf" + + +@pytest.fixture(scope="module") +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + testing_lib.import_data(dataset.name, datalabeling.DataType.VIDEO, INPUT_GCS_URI) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope="module") +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = testing_lib.create_annotation_spec_set(PROJECT_ID) + + yield response + + testing_lib.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope="module") +def instruction(): + # create a temporary instruction + instruction = testing_lib.create_instruction( + PROJECT_ID, datalabeling.DataType.VIDEO, INSTRUCTION_GCS_URI + ) + + yield instruction + + # tear down + testing_lib.delete_instruction(instruction.name) + + +@pytest.fixture(scope="module") +def cleaner(): + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.cancel_operation(resource_name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.skip(reason="currently unavailable") +def test_label_video(capsys, annotation_spec_set, instruction, dataset, cleaner): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + # Start labeling. + return label_video.label_video( + dataset.name, instruction.name, annotation_spec_set.name + ) + + response = run_sample() + cleaner.append(response.operation.name) + + out, _ = capsys.readouterr() + assert "Label_video operation name: " in out + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True diff --git a/datalabeling/snippets/manage_dataset.py b/datalabeling/snippets/manage_dataset.py new file mode 100644 index 00000000000..168cb66230f --- /dev/null +++ b/datalabeling/snippets/manage_dataset.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +from google.api_core.client_options import ClientOptions + + +# [START datalabeling_create_dataset_beta] +def create_dataset(project_id): + """Creates a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_create_dataset_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_create_dataset_beta] + + formatted_project_name = f"projects/{project_id}" + + dataset = datalabeling.Dataset( + display_name="YOUR_DATASET_SET_DISPLAY_NAME", description="YOUR_DESCRIPTION" + ) + + response = client.create_dataset( + request={"parent": formatted_project_name, "dataset": dataset} + ) + + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print("The dataset resource name: {}".format(response.name)) + print("Display name: {}".format(response.display_name)) + print("Description: {}".format(response.description)) + print("Create time:") + print("\tseconds: {}".format(response.create_time.timestamp_pb().seconds)) + print("\tnanos: {}\n".format(response.create_time.timestamp_pb().nanos)) + + return response + + +# [END datalabeling_create_dataset_beta] + + +# [START datalabeling_list_datasets_beta] +def list_datasets(project_id): + """Lists datasets for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_list_datasets_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_list_datasets_beta] + + formatted_project_name = f"projects/{project_id}" + + response = client.list_datasets(request={"parent": formatted_project_name}) + for element in response: + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print("The dataset resource name: {}\n".format(element.name)) + print("Display name: {}".format(element.display_name)) + print("Description: {}".format(element.description)) + print("Create time:") + print("\tseconds: {}".format(element.create_time.timestamp_pb().seconds)) + print("\tnanos: {}".format(element.create_time.timestamp_pb().nanos)) + + +# [END datalabeling_list_datasets_beta] + + +# [START datalabeling_get_dataset_beta] +def get_dataset(dataset_resource_name): + """Gets a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_get_dataset_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_get_dataset_beta] + + response = client.get_dataset(request={"name": dataset_resource_name}) + + print("The dataset resource name: {}\n".format(response.name)) + print("Display name: {}".format(response.display_name)) + print("Description: {}".format(response.description)) + print("Create time:") + print("\tseconds: {}".format(response.create_time.timestamp_pb().seconds)) + print("\tnanos: {}".format(response.create_time.timestamp_pb().nanos)) + + +# [END datalabeling_get_dataset_beta] + + +# [START datalabeling_delete_dataset_beta] +def delete_dataset(dataset_resource_name): + """Deletes a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + + client = datalabeling.DataLabelingServiceClient() + # [END datalabeling_delete_dataset_beta] + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + # [START datalabeling_delete_dataset_beta] + + response = client.delete_dataset(request={"name": dataset_resource_name}) + + print("Dataset deleted. {}\n".format(response)) + + +# [END datalabeling_delete_dataset_beta] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + subparsers = parser.add_subparsers(dest="command") + + create_parser = subparsers.add_parser("create", help="Create a new dataset.") + create_parser.add_argument( + "--project-id", help="Project ID. Required.", required=True + ) + + list_parser = subparsers.add_parser("list", help="List all datasets.") + list_parser.add_argument( + "--project-id", help="Project ID. Required.", required=True + ) + + get_parser = subparsers.add_parser( + "get", help="Get a dataset by the dataset resource name." + ) + get_parser.add_argument( + "--dataset-resource-name", + help="The dataset resource name. Used in the get or delete operation.", + required=True, + ) + + delete_parser = subparsers.add_parser( + "delete", help="Delete a dataset by the dataset resource name." + ) + delete_parser.add_argument( + "--dataset-resource-name", + help="The dataset resource name. Used in the get or delete operation.", + required=True, + ) + + args = parser.parse_args() + + if args.command == "create": + create_dataset(args.project_id) + elif args.command == "list": + list_datasets(args.project_id) + elif args.command == "get": + get_dataset(args.dataset_resource_name) + elif args.command == "delete": + delete_dataset(args.dataset_resource_name) diff --git a/datalabeling/snippets/manage_dataset_test.py b/datalabeling/snippets/manage_dataset_test.py new file mode 100644 index 00000000000..bed3df4caf3 --- /dev/null +++ b/datalabeling/snippets/manage_dataset_test.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python + +# Copyright 2022 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import backoff +from google.api_core.exceptions import RetryError, ServerError +import pytest + +import manage_dataset +import testing_lib + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +@pytest.mark.skip(reason="service is limited due to covid") +@pytest.fixture(scope="module") +def dataset(): + # create a temporary dataset + dataset = testing_lib.create_dataset(PROJECT_ID) + + yield dataset + + # tear down + testing_lib.delete_dataset(dataset.name) + + +@pytest.fixture(scope="module") +def cleaner(): + # First delete old datasets. + try: + testing_lib.delete_old_datasets(PROJECT_ID) + # We see occational RetryError while deleting old datasets. + # We can just ignore it and move on. + except RetryError as e: + print("delete_old_datasets failed: detail {}".format(e)) + + resource_names = [] + + yield resource_names + + for resource_name in resource_names: + testing_lib.delete_dataset(resource_name) + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_create_dataset(cleaner, capsys): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + return manage_dataset.create_dataset(PROJECT_ID) + + response = run_sample() + cleaner.append(response.name) + + out, _ = capsys.readouterr() + assert "The dataset resource name:" in out + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_list_dataset(capsys, dataset): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + manage_dataset.list_datasets(PROJECT_ID) + + run_sample() + out, _ = capsys.readouterr() + assert dataset.name in out + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_get_dataset(capsys, dataset): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + manage_dataset.get_dataset(dataset.name) + + run_sample() + out, _ = capsys.readouterr() + assert "The dataset resource name:" in out + + +@pytest.mark.skip(reason="service is limited due to covid") +def test_delete_dataset(capsys, dataset): + @backoff.on_exception( + backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE + ) + def run_sample(): + manage_dataset.delete_dataset(dataset.name) + + run_sample() + out, _ = capsys.readouterr() + assert "Dataset deleted." in out diff --git a/datalabeling/snippets/noxfile_config.py b/datalabeling/snippets/noxfile_config.py new file mode 100644 index 00000000000..5c33914cc7a --- /dev/null +++ b/datalabeling/snippets/noxfile_config.py @@ -0,0 +1,41 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Declare optional test sessions you want to opt-in. Currently we + # have the following optional test sessions: + # 'cloud_run' # Test session for Cloud Run application. + "opt_in_sessions": [], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + # 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": { + # For Datalabeling samples to hit the testing endpoint + "DATALABELING_ENDPOINT": "test-datalabeling.sandbox.googleapis.com:443" + }, +} diff --git a/datalabeling/snippets/requirements-test.txt b/datalabeling/snippets/requirements-test.txt new file mode 100644 index 00000000000..b90fc387d01 --- /dev/null +++ b/datalabeling/snippets/requirements-test.txt @@ -0,0 +1,2 @@ +backoff==2.2.1 +pytest==7.2.0 diff --git a/datalabeling/snippets/requirements.txt b/datalabeling/snippets/requirements.txt new file mode 100644 index 00000000000..a764353bb42 --- /dev/null +++ b/datalabeling/snippets/requirements.txt @@ -0,0 +1 @@ +google-cloud-datalabeling==1.6.3 diff --git a/datalabeling/snippets/testing_lib.py b/datalabeling/snippets/testing_lib.py new file mode 100644 index 00000000000..ee68accfa07 --- /dev/null +++ b/datalabeling/snippets/testing_lib.py @@ -0,0 +1,103 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time + +import backoff +from google.api_core.client_options import ClientOptions +from google.api_core.exceptions import DeadlineExceeded, FailedPrecondition +from google.cloud import datalabeling_v1beta1 as datalabeling + +import create_annotation_spec_set as annotation_spec_set_sample +import create_instruction as instruction_sample +import import_data as import_sample +import manage_dataset as dataset_sample + +RETRY_DEADLINE = 60 + + +def create_client(): + # If provided, use a provided test endpoint - this will prevent tests on + # this snippet from triggering any action by a real human + if "DATALABELING_ENDPOINT" in os.environ: + opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) + client = datalabeling.DataLabelingServiceClient(client_options=opts) + else: + client = datalabeling.DataLabelingServiceClient() + return client + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def create_dataset(project_id): + return dataset_sample.create_dataset(project_id) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def delete_dataset(name): + return dataset_sample.delete_dataset(name) + + +def delete_old_datasets(project_id): + client = create_client() + formatted_project_name = f"projects/{project_id}" + + response = client.list_datasets(request={"parent": formatted_project_name}) + # It will delete datasets created more than 2 hours ago + cutoff_time = time.time() - 7200 + for element in response: + if element.create_time.timestamp_pb().seconds < cutoff_time: + print("Deleting {}".format(element.name)) + try: + dataset_sample.delete_dataset(element.name) + except FailedPrecondition as e: + # We're always getting FailedPrecondition with 400 + # resource conflict. I don't know why. + print("Deleting {} failed.".format(element.name)) + print("Detail: {}".format(e)) + # To avoid quota error + time.sleep(1) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def create_annotation_spec_set(project_id): + return annotation_spec_set_sample.create_annotation_spec_set(project_id) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def delete_annotation_spec_set(name): + client = create_client() + client.delete_annotation_spec_set(request={"name": name}) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def create_instruction(project_id, data_type, gcs_uri): + return instruction_sample.create_instruction(project_id, data_type, gcs_uri) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def delete_instruction(name): + client = create_client() + client.delete_instruction(request={"name": name}) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def cancel_operation(name): + client = create_client() + client._transport.operations_client.cancel_operation(name) + + +@backoff.on_exception(backoff.expo, DeadlineExceeded, max_time=RETRY_DEADLINE) +def import_data(dataset_name, data_type, gcs_uri): + import_sample.import_data(dataset_name, data_type, gcs_uri)