Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5ab334c

Browse filesBrowse files
authored
Update DLP samples for release (GoogleCloudPlatform#1415)
1 parent ae0fb45 commit 5ab334c
Copy full SHA for 5ab334c
Expand file treeCollapse file tree

20 files changed

+3552
-349
lines changed

‎dlp/deid.py

Copy file name to clipboardExpand all lines: dlp/deid.py
+549Lines changed: 549 additions & 0 deletions
Large diffs are not rendered by default.

‎dlp/deid_test.py

Copy file name to clipboard
+163Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import shutil
17+
import tempfile
18+
19+
import pytest
20+
21+
import deid
22+
23+
HARMFUL_STRING = 'My SSN is 372819127'
24+
HARMLESS_STRING = 'My favorite color is blue'
25+
GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT')
26+
WRAPPED_KEY = ('CiQAz0hX4+go8fJwn80Fr8pVImwx+tmZdqU7JL+7TN/S5JxBU9gSSQDhFHpFVy'
27+
'uzJps0YH9ls480mU+JLG7jI/0lL04i6XJRWqmI6gUSZRUtECYcLH5gXK4SXHlL'
28+
'rotx7Chxz/4z7SIpXFOBY61z0/U=')
29+
KEY_NAME = ('projects/python-docs-samples-tests/locations/global/keyRings/'
30+
'dlp-test/cryptoKeys/dlp-test')
31+
SURROGATE_TYPE = 'SSN_TOKEN'
32+
CSV_FILE = os.path.join(os.path.dirname(__file__), 'resources/dates.csv')
33+
DATE_SHIFTED_AMOUNT = 30
34+
DATE_FIELDS = ['birth_date', 'register_date']
35+
CSV_CONTEXT_FIELD = 'name'
36+
37+
38+
@pytest.fixture(scope='module')
39+
def tempdir():
40+
tempdir = tempfile.mkdtemp()
41+
yield tempdir
42+
shutil.rmtree(tempdir)
43+
44+
45+
def test_deidentify_with_mask(capsys):
46+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING)
47+
48+
out, _ = capsys.readouterr()
49+
assert 'My SSN is *********' in out
50+
51+
52+
def test_deidentify_with_mask_ignore_insensitive_data(capsys):
53+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMLESS_STRING)
54+
55+
out, _ = capsys.readouterr()
56+
assert HARMLESS_STRING in out
57+
58+
59+
def test_deidentify_with_mask_masking_character_specified(capsys):
60+
deid.deidentify_with_mask(
61+
GCLOUD_PROJECT,
62+
HARMFUL_STRING,
63+
masking_character='#')
64+
65+
out, _ = capsys.readouterr()
66+
assert 'My SSN is #########' in out
67+
68+
69+
def test_deidentify_with_mask_masking_number_specified(capsys):
70+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING, number_to_mask=7)
71+
72+
out, _ = capsys.readouterr()
73+
assert 'My SSN is *******27' in out
74+
75+
76+
def test_deidentify_with_fpe(capsys):
77+
deid.deidentify_with_fpe(
78+
GCLOUD_PROJECT,
79+
HARMFUL_STRING,
80+
alphabet='NUMERIC',
81+
wrapped_key=WRAPPED_KEY,
82+
key_name=KEY_NAME)
83+
84+
out, _ = capsys.readouterr()
85+
assert 'My SSN is' in out
86+
assert '372819127' not in out
87+
88+
89+
def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
90+
deid.deidentify_with_fpe(
91+
GCLOUD_PROJECT,
92+
HARMFUL_STRING,
93+
alphabet='NUMERIC',
94+
wrapped_key=WRAPPED_KEY,
95+
key_name=KEY_NAME,
96+
surrogate_type=SURROGATE_TYPE)
97+
98+
out, _ = capsys.readouterr()
99+
assert 'My SSN is SSN_TOKEN' in out
100+
assert '372819127' not in out
101+
102+
103+
def test_deidentify_with_fpe_ignores_insensitive_data(capsys):
104+
deid.deidentify_with_fpe(
105+
GCLOUD_PROJECT,
106+
HARMLESS_STRING,
107+
alphabet='NUMERIC',
108+
wrapped_key=WRAPPED_KEY,
109+
key_name=KEY_NAME)
110+
111+
out, _ = capsys.readouterr()
112+
assert HARMLESS_STRING in out
113+
114+
115+
def test_deidentify_with_date_shift(tempdir, capsys):
116+
output_filepath = os.path.join(tempdir, 'dates-shifted.csv')
117+
118+
deid.deidentify_with_date_shift(
119+
GCLOUD_PROJECT,
120+
input_csv_file=CSV_FILE,
121+
output_csv_file=output_filepath,
122+
lower_bound_days=DATE_SHIFTED_AMOUNT,
123+
upper_bound_days=DATE_SHIFTED_AMOUNT,
124+
date_fields=DATE_FIELDS)
125+
126+
out, _ = capsys.readouterr()
127+
128+
assert 'Successful' in out
129+
130+
131+
def test_deidentify_with_date_shift_using_context_field(tempdir, capsys):
132+
output_filepath = os.path.join(tempdir, 'dates-shifted.csv')
133+
134+
deid.deidentify_with_date_shift(
135+
GCLOUD_PROJECT,
136+
input_csv_file=CSV_FILE,
137+
output_csv_file=output_filepath,
138+
lower_bound_days=DATE_SHIFTED_AMOUNT,
139+
upper_bound_days=DATE_SHIFTED_AMOUNT,
140+
date_fields=DATE_FIELDS,
141+
context_field_id=CSV_CONTEXT_FIELD,
142+
wrapped_key=WRAPPED_KEY,
143+
key_name=KEY_NAME)
144+
145+
out, _ = capsys.readouterr()
146+
147+
assert 'Successful' in out
148+
149+
150+
def test_reidentify_with_fpe(capsys):
151+
labeled_fpe_string = 'My SSN is SSN_TOKEN(9):731997681'
152+
153+
deid.reidentify_with_fpe(
154+
GCLOUD_PROJECT,
155+
labeled_fpe_string,
156+
surrogate_type=SURROGATE_TYPE,
157+
wrapped_key=WRAPPED_KEY,
158+
key_name=KEY_NAME,
159+
alphabet='NUMERIC')
160+
161+
out, _ = capsys.readouterr()
162+
163+
assert '731997681' not in out

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.