Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit dc432cf

Browse filesBrowse files
feat(dlp): GoogleCloudPlatform#10786 with shorter commits - "Split Custom Infotype samples into multiple files " (GoogleCloudPlatform#10883)
* moved inspect_string_with_exclusion_dict into separate file * moved inspect_custom_regex into separate file * moved inspect_hotword_rule into separate file * moved inspect_hotword_rule into separate file * moved inspect_string_custom_excluding_substring into separate file * moved inspect_string_custom_hotword into separate file * moved inspect_string_custom_omit_overlap into separate file * moved inspect_string_multiple_rules into separate file * moved inspect_string_omit_overlap into separate file * moved inspect_string_with_exclusion_dict_substring into separate file * moved inspect_string_with_exclusion_regex into separate file * moved inspect_string_without_overlap into separate file * removed older file for custom_infotype
1 parent 338b39f commit dc432cf
Copy full SHA for dc432cf
Expand file treeCollapse file tree

24 files changed

+1439
-1058
lines changed
+80Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Custom infoType snippets.
15+
16+
This file contains sample code that uses the Data Loss Prevention API to create
17+
custom infoType detectors to refine scan results.
18+
"""
19+
20+
# [START dlp_inspect_custom_regex]
21+
import google.cloud.dlp
22+
23+
24+
def inspect_data_with_custom_regex_detector(
25+
project: str,
26+
content_string: str,
27+
) -> None:
28+
"""Uses the Data Loss Prevention API to analyze string with medical record
29+
number custom regex detector
30+
31+
Args:
32+
project: The Google Cloud project id to use as a parent resource.
33+
content_string: The string to inspect.
34+
35+
Returns:
36+
None; the response from the API is printed to the terminal.
37+
"""
38+
39+
# Instantiate a client.
40+
dlp = google.cloud.dlp_v2.DlpServiceClient()
41+
42+
# Construct a custom regex detector info type called "C_MRN",
43+
# with ###-#-##### pattern, where each # represents a digit from 1 to 9.
44+
# The detector has a detection likelihood of POSSIBLE.
45+
custom_info_types = [
46+
{
47+
"info_type": {"name": "C_MRN"},
48+
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
49+
"likelihood": google.cloud.dlp_v2.Likelihood.POSSIBLE,
50+
}
51+
]
52+
53+
# Construct the configuration dictionary with the custom regex info type.
54+
inspect_config = {
55+
"custom_info_types": custom_info_types,
56+
"include_quote": True,
57+
}
58+
59+
# Construct the `item`.
60+
item = {"value": content_string}
61+
62+
# Convert the project id into a full resource id.
63+
parent = f"projects/{project}"
64+
65+
# Call the API.
66+
response = dlp.inspect_content(
67+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
68+
)
69+
70+
# Print out the results.
71+
if response.result.findings:
72+
for finding in response.result.findings:
73+
print(f"Quote: {finding.quote}")
74+
print(f"Info type: {finding.info_type.name}")
75+
print(f"Likelihood: {finding.likelihood}")
76+
else:
77+
print("No findings.")
78+
79+
80+
# [END dlp_inspect_custom_regex]
+32Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import inspect_custom_regex as custom_infotype
18+
19+
import pytest
20+
21+
GCLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
22+
23+
24+
def test_inspect_data_with_custom_regex_detector(
25+
capsys: pytest.LogCaptureFixture,
26+
) -> None:
27+
custom_infotype.inspect_data_with_custom_regex_detector(
28+
GCLOUD_PROJECT, "Patients MRN 444-5-22222"
29+
)
30+
31+
out, _ = capsys.readouterr()
32+
assert "Info type: C_MRN" in out
+97Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Custom infoType snippets.
15+
16+
This file contains sample code that uses the Data Loss Prevention API to create
17+
custom infoType detectors to refine scan results.
18+
"""
19+
20+
# [START dlp_inspect_hotword_rule]
21+
import google.cloud.dlp
22+
23+
24+
def inspect_data_w_custom_hotwords(
25+
project: str,
26+
content_string: str,
27+
) -> None:
28+
"""Uses the Data Loss Prevention API to analyze string with medical record
29+
number custom regex detector, with custom hotwords rules to boost finding
30+
certainty under some circumstances.
31+
32+
Args:
33+
project: The Google Cloud project id to use as a parent resource.
34+
content_string: The string to inspect.
35+
36+
Returns:
37+
None; the response from the API is printed to the terminal.
38+
"""
39+
40+
# Instantiate a client.
41+
dlp = google.cloud.dlp_v2.DlpServiceClient()
42+
43+
# Construct a custom regex detector info type called "C_MRN",
44+
# with ###-#-##### pattern, where each # represents a digit from 1 to 9.
45+
# The detector has a detection likelihood of POSSIBLE.
46+
custom_info_types = [
47+
{
48+
"info_type": {"name": "C_MRN"},
49+
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
50+
"likelihood": google.cloud.dlp_v2.Likelihood.POSSIBLE,
51+
}
52+
]
53+
54+
# Construct a rule set with hotwords "mrn" and "medical", with a likelohood
55+
# boost to VERY_LIKELY when hotwords are present within the 10 character-
56+
# window preceding the PII finding.
57+
hotword_rule = {
58+
"hotword_regex": {"pattern": "(?i)(mrn|medical)(?-i)"},
59+
"likelihood_adjustment": {
60+
"fixed_likelihood": google.cloud.dlp_v2.Likelihood.VERY_LIKELY
61+
},
62+
"proximity": {"window_before": 10},
63+
}
64+
65+
rule_set = [
66+
{"info_types": [{"name": "C_MRN"}], "rules": [{"hotword_rule": hotword_rule}]}
67+
]
68+
69+
# Construct the configuration dictionary with the custom regex info type.
70+
inspect_config = {
71+
"custom_info_types": custom_info_types,
72+
"rule_set": rule_set,
73+
"include_quote": True,
74+
}
75+
76+
# Construct the `item`.
77+
item = {"value": content_string}
78+
79+
# Convert the project id into a full resource id.
80+
parent = f"projects/{project}"
81+
82+
# Call the API.
83+
response = dlp.inspect_content(
84+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
85+
)
86+
87+
# Print out the results.
88+
if response.result.findings:
89+
for finding in response.result.findings:
90+
print(f"Quote: {finding.quote}")
91+
print(f"Info type: {finding.info_type.name}")
92+
print(f"Likelihood: {finding.likelihood}")
93+
else:
94+
print("No findings.")
95+
96+
97+
# [END dlp_inspect_hotword_rule]
+45Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import inspect_hotword_rule as custom_infotype
18+
19+
import pytest
20+
21+
GCLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
22+
23+
24+
def test_inspect_with_medical_record_number_w_custom_hotwords_no_hotwords(
25+
capsys: pytest.LogCaptureFixture,
26+
) -> None:
27+
custom_infotype.inspect_data_w_custom_hotwords(
28+
GCLOUD_PROJECT, "just a number 444-5-22222"
29+
)
30+
31+
out, _ = capsys.readouterr()
32+
assert "Info type: C_MRN" in out
33+
assert "Likelihood: 3" in out
34+
35+
36+
def test_inspect_with_medical_record_number_w_custom_hotwords_has_hotwords(
37+
capsys: pytest.LogCaptureFixture,
38+
) -> None:
39+
custom_infotype.inspect_data_w_custom_hotwords(
40+
GCLOUD_PROJECT, "Patients MRN 444-5-22222"
41+
)
42+
43+
out, _ = capsys.readouterr()
44+
assert "Info type: C_MRN" in out
45+
assert "Likelihood: 5" in out
+98Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Custom infoType snippets.
15+
16+
This file contains sample code that uses the Data Loss Prevention API to create
17+
custom infoType detectors to refine scan results.
18+
"""
19+
20+
# [START dlp_inspect_string_custom_excluding_substring]
21+
from typing import List
22+
23+
import google.cloud.dlp
24+
25+
26+
def inspect_string_custom_excluding_substring(
27+
project: str, content_string: str, exclusion_list: List[str] = ["jimmy"]
28+
) -> None:
29+
"""Inspects the provided text with a custom detector, avoiding matches on specific tokens
30+
31+
Uses the Data Loss Prevention API to omit matches on a custom detector
32+
if they include tokens in the specified exclusion list.
33+
34+
Args:
35+
project: The Google Cloud project id to use as a parent resource.
36+
content_string: The string to inspect.
37+
exclusion_list: The list of strings to ignore matches on
38+
39+
Returns:
40+
None; the response from the API is printed to the terminal.
41+
"""
42+
43+
# Instantiate a client.
44+
dlp = google.cloud.dlp_v2.DlpServiceClient()
45+
46+
# Construct a custom regex detector for names
47+
custom_info_types = [
48+
{
49+
"info_type": {"name": "CUSTOM_NAME_DETECTOR"},
50+
"regex": {"pattern": "[A-Z][a-z]{1,15}, [A-Z][a-z]{1,15}"},
51+
}
52+
]
53+
54+
# Construct a rule set that will only match if the match text does not
55+
# contains tokens from the exclusion list.
56+
rule_set = [
57+
{
58+
"info_types": [{"name": "CUSTOM_NAME_DETECTOR"}],
59+
"rules": [
60+
{
61+
"exclusion_rule": {
62+
"dictionary": {"word_list": {"words": exclusion_list}},
63+
"matching_type": google.cloud.dlp_v2.MatchingType.MATCHING_TYPE_PARTIAL_MATCH,
64+
}
65+
}
66+
],
67+
}
68+
]
69+
70+
# Construct the configuration dictionary
71+
inspect_config = {
72+
"custom_info_types": custom_info_types,
73+
"rule_set": rule_set,
74+
"include_quote": True,
75+
}
76+
77+
# Construct the `item`.
78+
item = {"value": content_string}
79+
80+
# Convert the project id into a full resource id.
81+
parent = f"projects/{project}"
82+
83+
# Call the API.
84+
response = dlp.inspect_content(
85+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
86+
)
87+
88+
# Print out the results.
89+
if response.result.findings:
90+
for finding in response.result.findings:
91+
print(f"Quote: {finding.quote}")
92+
print(f"Info type: {finding.info_type.name}")
93+
print(f"Likelihood: {finding.likelihood}")
94+
else:
95+
print("No findings.")
96+
97+
98+
# [END dlp_inspect_string_custom_excluding_substring]
+33Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import inspect_string_custom_excluding_substring as custom_infotype
18+
19+
import pytest
20+
21+
GCLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
22+
23+
24+
def test_inspect_string_custom_excluding_substring(
25+
capsys: pytest.LogCaptureFixture,
26+
) -> None:
27+
custom_infotype.inspect_string_custom_excluding_substring(
28+
GCLOUD_PROJECT, "Danger, Jimmy | Wayne, Bruce", ["Jimmy"]
29+
)
30+
31+
out, _ = capsys.readouterr()
32+
assert "Wayne, Bruce" in out
33+
assert "Danger, Jimmy" not in out

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.