Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 87ce0ff

Browse filesBrowse files
mwdaubandrewsg
authored andcommitted
Add DLP code samples for custom info types (GoogleCloudPlatform#1524)
* Add custom info type samples to inspect_content.py Use flags to indicate dictionary word lists and regex patterns, then parse them into custom info types. * Make code compatible with python 2.7 * Add missing commas * Remove bad import * Add tests for custom info types * Add info_types parameter to deid.py * Update deid tests to use info_types parameter * Fix indentation * Add blank lines * Share logic for building custom info types * Fix line too long * Fix typo. * Revert "Fix typo." This reverts commit b4ffea6, so that the sharing of the custom info type logic can be reverted as well to make the code samples more readable. * Revert "Share logic for building custom info types" This reverts commit 47fc04f. This makes the code samples more readable. * Switch from indexes to using enumerate. * Updated help message for custom dictionaries. * Fix enumerate syntax error.
1 parent 5e5ea6d commit 87ce0ff
Copy full SHA for 87ce0ff

File tree

Expand file treeCollapse file tree

4 files changed

+268
-12
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+268
-12
lines changed

‎dlp/deid.py

Copy file name to clipboardExpand all lines: dlp/deid.py
+33-6Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121

2222
# [START dlp_deidentify_masking]
23-
def deidentify_with_mask(project, string, masking_character=None,
23+
def deidentify_with_mask(project, string, info_types, masking_character=None,
2424
number_to_mask=0):
2525
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
2626
string by masking it with a character.
@@ -44,6 +44,11 @@ def deidentify_with_mask(project, string, masking_character=None,
4444
# Convert the project id into a full resource id.
4545
parent = dlp.project_path(project)
4646

47+
# Construct inspect configuration dictionary
48+
inspect_config = {
49+
'info_types': [{'name': info_type} for info_type in info_types]
50+
}
51+
4752
# Construct deidentify configuration dictionary
4853
deidentify_config = {
4954
'info_type_transformations': {
@@ -65,15 +70,16 @@ def deidentify_with_mask(project, string, masking_character=None,
6570

6671
# Call the API
6772
response = dlp.deidentify_content(
68-
parent, deidentify_config=deidentify_config, item=item)
73+
parent, inspect_config=inspect_config,
74+
deidentify_config=deidentify_config, item=item)
6975

7076
# Print out the results.
7177
print(response.item.value)
7278
# [END dlp_deidentify_masking]
7379

7480

7581
# [START dlp_deidentify_fpe]
76-
def deidentify_with_fpe(project, string, alphabet=None,
82+
def deidentify_with_fpe(project, string, info_types, alphabet=None,
7783
surrogate_type=None, key_name=None, wrapped_key=None):
7884
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
7985
string using Format Preserving Encryption (FPE).
@@ -127,6 +133,11 @@ def deidentify_with_fpe(project, string, alphabet=None,
127133
'name': surrogate_type
128134
}
129135

136+
# Construct inspect configuration dictionary
137+
inspect_config = {
138+
'info_types': [{'name': info_type} for info_type in info_types]
139+
}
140+
130141
# Construct deidentify configuration dictionary
131142
deidentify_config = {
132143
'info_type_transformations': {
@@ -146,7 +157,8 @@ def deidentify_with_fpe(project, string, alphabet=None,
146157

147158
# Call the API
148159
response = dlp.deidentify_content(
149-
parent, deidentify_config=deidentify_config, item=item)
160+
parent, inspect_config=inspect_config,
161+
deidentify_config=deidentify_config, item=item)
150162

151163
# Print results
152164
print(response.item.value)
@@ -404,6 +416,13 @@ def write_data(data):
404416
'deid_mask',
405417
help='Deidentify sensitive data in a string by masking it with a '
406418
'character.')
419+
mask_parser.add_argument(
420+
'--info_types', action='append',
421+
help='Strings representing info types to look for. A full list of '
422+
'info categories and types is available from the API. Examples '
423+
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
424+
'If unspecified, the three above examples will be used.',
425+
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
407426
mask_parser.add_argument(
408427
'project',
409428
help='The Google Cloud project id to use as a parent resource.')
@@ -423,6 +442,13 @@ def write_data(data):
423442
'deid_fpe',
424443
help='Deidentify sensitive data in a string using Format Preserving '
425444
'Encryption (FPE).')
445+
fpe_parser.add_argument(
446+
'--info_types', action='append',
447+
help='Strings representing info types to look for. A full list of '
448+
'info categories and types is available from the API. Examples '
449+
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
450+
'If unspecified, the three above examples will be used.',
451+
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
426452
fpe_parser.add_argument(
427453
'project',
428454
help='The Google Cloud project id to use as a parent resource.')
@@ -532,11 +558,12 @@ def write_data(data):
532558
args = parser.parse_args()
533559

534560
if args.content == 'deid_mask':
535-
deidentify_with_mask(args.project, args.item,
561+
deidentify_with_mask(args.project, args.item, args.info_types,
536562
masking_character=args.masking_character,
537563
number_to_mask=args.number_to_mask)
538564
elif args.content == 'deid_fpe':
539-
deidentify_with_fpe(args.project, args.item, alphabet=args.alphabet,
565+
deidentify_with_fpe(args.project, args.item, args.info_types,
566+
alphabet=args.alphabet,
540567
wrapped_key=args.wrapped_key,
541568
key_name=args.key_name,
542569
surrogate_type=args.surrogate_type)

‎dlp/deid_test.py

Copy file name to clipboardExpand all lines: dlp/deid_test.py
+8-2Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def tempdir():
4343

4444

4545
def test_deidentify_with_mask(capsys):
46-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING)
46+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
47+
['US_SOCIAL_SECURITY_NUMBER'])
4748

4849
out, _ = capsys.readouterr()
4950
assert 'My SSN is *********' in out
@@ -60,14 +61,17 @@ def test_deidentify_with_mask_masking_character_specified(capsys):
6061
deid.deidentify_with_mask(
6162
GCLOUD_PROJECT,
6263
HARMFUL_STRING,
64+
['US_SOCIAL_SECURITY_NUMBER'],
6365
masking_character='#')
6466

6567
out, _ = capsys.readouterr()
6668
assert 'My SSN is #########' in out
6769

6870

6971
def test_deidentify_with_mask_masking_number_specified(capsys):
70-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING, number_to_mask=7)
72+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
73+
['US_SOCIAL_SECURITY_NUMBER'],
74+
number_to_mask=7)
7175

7276
out, _ = capsys.readouterr()
7377
assert 'My SSN is *******27' in out
@@ -77,6 +81,7 @@ def test_deidentify_with_fpe(capsys):
7781
deid.deidentify_with_fpe(
7882
GCLOUD_PROJECT,
7983
HARMFUL_STRING,
84+
['US_SOCIAL_SECURITY_NUMBER'],
8085
alphabet='NUMERIC',
8186
wrapped_key=WRAPPED_KEY,
8287
key_name=KEY_NAME)
@@ -90,6 +95,7 @@ def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
9095
deid.deidentify_with_fpe(
9196
GCLOUD_PROJECT,
9297
HARMFUL_STRING,
98+
['US_SOCIAL_SECURITY_NUMBER'],
9399
alphabet='NUMERIC',
94100
wrapped_key=WRAPPED_KEY,
95101
key_name=KEY_NAME,

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.