Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 47fc04f

Browse filesBrowse files
authored
Share logic for building custom info types
1 parent 5258658 commit 47fc04f
Copy full SHA for 47fc04f

File tree

Expand file treeCollapse file tree

1 file changed

+28
-75
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+28
-75
lines changed

‎dlp/inspect_content.py

Copy file name to clipboardExpand all lines: dlp/inspect_content.py
+28-75Lines changed: 28 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,8 @@ def inspect_string(project, content_string, info_types,
5353

5454
# Prepare custom_info_types by parsing the dictionary word lists and
5555
# regex patterns.
56-
if custom_dictionaries is None:
57-
custom_dictionaries = []
58-
dictionaries = [{
59-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
60-
'dictionary': {
61-
'word_list': {'words': custom_dictionaries[i].split(',')}
62-
}
63-
} for i in range(len(custom_dictionaries))]
64-
if custom_regexes is None:
65-
custom_regexes = []
66-
regexes = [{
67-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
68-
'regex': {'pattern': custom_regexes[i]}
69-
} for i in range(len(custom_regexes))]
70-
custom_info_types = dictionaries + regexes
56+
custom_info_types = build_custom_info_types(custom_dictionaries,
57+
custom_info_types)
7158

7259
# Construct the configuration dictionary. Keys which are None may
7360
# optionally be omitted entirely.
@@ -141,21 +128,8 @@ def inspect_file(project, filename, info_types, min_likelihood=None,
141128

142129
# Prepare custom_info_types by parsing the dictionary word lists and
143130
# regex patterns.
144-
if custom_dictionaries is None:
145-
custom_dictionaries = []
146-
dictionaries = [{
147-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
148-
'dictionary': {
149-
'word_list': {'words': custom_dictionaries[i].split(',')}
150-
}
151-
} for i in range(len(custom_dictionaries))]
152-
if custom_regexes is None:
153-
custom_regexes = []
154-
regexes = [{
155-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
156-
'regex': {'pattern': custom_regexes[i]}
157-
} for i in range(len(custom_regexes))]
158-
custom_info_types = dictionaries + regexes
131+
custom_info_types = build_custom_info_types(custom_dictionaries,
132+
custom_regexes)
159133

160134
# Construct the configuration dictionary. Keys which are None may
161135
# optionally be omitted entirely.
@@ -254,21 +228,8 @@ def inspect_gcs_file(project, bucket, filename, topic_id, subscription_id,
254228

255229
# Prepare custom_info_types by parsing the dictionary word lists and
256230
# regex patterns.
257-
if custom_dictionaries is None:
258-
custom_dictionaries = []
259-
dictionaries = [{
260-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
261-
'dictionary': {
262-
'word_list': {'words': custom_dictionaries[i].split(',')}
263-
}
264-
} for i in range(len(custom_dictionaries))]
265-
if custom_regexes is None:
266-
custom_regexes = []
267-
regexes = [{
268-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
269-
'regex': {'pattern': custom_regexes[i]}
270-
} for i in range(len(custom_regexes))]
271-
custom_info_types = dictionaries + regexes
231+
custom_info_types = build_custom_info_types(custom_dictionaries,
232+
custom_regexes)
272233

273234
# Construct the configuration dictionary. Keys which are None may
274235
# optionally be omitted entirely.
@@ -400,21 +361,8 @@ def inspect_datastore(project, datastore_project, kind,
400361

401362
# Prepare custom_info_types by parsing the dictionary word lists and
402363
# regex patterns.
403-
if custom_dictionaries is None:
404-
custom_dictionaries = []
405-
dictionaries = [{
406-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
407-
'dictionary': {
408-
'word_list': {'words': custom_dictionaries[i].split(',')}
409-
}
410-
} for i in range(len(custom_dictionaries))]
411-
if custom_regexes is None:
412-
custom_regexes = []
413-
regexes = [{
414-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
415-
'regex': {'pattern': custom_regexes[i]}
416-
} for i in range(len(custom_regexes))]
417-
custom_info_types = dictionaries + regexes
364+
custom_info_types = build_custom_info_types(custom_dictionaries,
365+
custom_regexes)
418366

419367
# Construct the configuration dictionary. Keys which are None may
420368
# optionally be omitted entirely.
@@ -551,21 +499,8 @@ def inspect_bigquery(project, bigquery_project, dataset_id, table_id,
551499

552500
# Prepare custom_info_types by parsing the dictionary word lists and
553501
# regex patterns.
554-
if custom_dictionaries is None:
555-
custom_dictionaries = []
556-
dictionaries = [{
557-
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
558-
'dictionary': {
559-
'word_list': {'words': custom_dictionaries[i].split(',')}
560-
}
561-
} for i in range(len(custom_dictionaries))]
562-
if custom_regexes is None:
563-
custom_regexes = []
564-
regexes = [{
565-
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
566-
'regex': {'pattern': custom_regexes[i]}
567-
} for i in range(len(custom_regexes))]
568-
custom_info_types = dictionaries + regexes
502+
custom_info_types = build_custom_info_types(custom_dictionaries,
503+
custom_regexes)
569504

570505
# Construct the configuration dictionary. Keys which are None may
571506
# optionally be omitted entirely.
@@ -651,6 +586,24 @@ def callback(message):
651586
# [END dlp_inspect_bigquery]
652587

653588

589+
def build_custom_info_types(custom_dictionaries, custom_regexes):
590+
if custom_dictionaries is None:
591+
custom_dictionaries = []
592+
dictionaries = [{
593+
'info_type': {'name': 'CUSTOM_DICTIONARY_{}'.format(i)},
594+
'dictionary': {
595+
'word_list': {'words': custom_dictionaries[i].split(',')}
596+
}
597+
} for i in range(len(custom_dictionaries))]
598+
if custom_regexes is None:
599+
custom_regexes = []
600+
regexes = [{
601+
'info_type': {'name': 'CUSTOM_REGEX_{}'.format(i)},
602+
'regex': {'pattern': custom_regexes[i]}
603+
} for i in range(len(custom_regexes))]
604+
return dictionaries + regexes
605+
606+
654607
if __name__ == '__main__':
655608
default_project = os.environ.get('GCLOUD_PROJECT')
656609

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.