-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpython-parser.py
More file actions
120 lines (95 loc) · 3.77 KB
/
python-parser.py
File metadata and controls
120 lines (95 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
__author__ = 'lan'
from os.path import isfile
import re
import os
import sys
from multiprocessing import Process
from multiprocessing.managers import BaseManager
class PythonParser:
def __init__(self):
self.indent_dict = {'tab': 0, 'space': 0}
self.line_length_dict = {'char80': 0, 'char120': 0, 'char150': 0}
self.imports_dict = {'separated': 0, 'non_separated': 0}
self.whitespace_dict = {'non_extra': 0, 'extra': 0}
def run(self, py_file):
t1 = Process(target=self.parse, args=(py_file,))
t1.start()
t1.join(5)
if t1.is_alive():
t1.terminate()
def parse(self, py_file):
if not isfile(py_file) or not py_file.endswith('.py'):
print "wrong type. need .py file."
exit(1)
with open(py_file, 'r') as fd:
stream = fd.read()
self.indent(stream)
self.line_length(stream)
self.imports(stream)
self.whitespace(stream)
print self
def indent(self, stream):
tab_pattern = re.compile(r'^\t+.*', re.M)
space_pattern = re.compile(r'^ +.*', re.M)
tab_re = re.findall(tab_pattern, stream)
if tab_re is not None:
self.indent_dict['tab'] += len(tab_re)
space_re = re.findall(space_pattern, stream)
if space_re is not None:
self.indent_dict['space'] += len(space_re)
def line_length(self, stream):
stream_list = stream.split('\n')
for each_line in stream_list:
if each_line.__len__() < 80:
self.line_length_dict['char80'] += 1
elif each_line.__len__() < 120:
self.line_length_dict['char120'] += 1
else:
self.line_length_dict['char150'] += 1
def imports(self, stream):
separated_pattern = re.compile(r'^\s*\t*import\s+[\w.]+([^,]\s*|\s*#.*)$', re.M)
non_separated_pattern = re.compile(r'^\s*\t*import\s+\w+\s*,\s+\w+', re.M)
separated_re = re.findall(separated_pattern, stream)
if separated_re is not None:
self.imports_dict['separated'] += len(separated_re)
non_separated_re = re.findall(non_separated_pattern, stream)
if non_separated_re is not None:
self.imports_dict['non_separated'] += len(non_separated_re)
def whitespace(self, stream):
non_extra_pattern = re.compile(r'\S+[\(\)\[\],]\S+|\S+:\s|\S\s=\s', re.M) # redundant whitespace occurs in three cases: , : =
extra_pattern = re.compile(r'\(\s+|\s+[\(\)\[\]]|\s+[:,]\s+|\s{2,}=|=\s{2,}', re.M)
non_extra_re = re.findall(non_extra_pattern, stream)
if non_extra_re is not None:
self.whitespace_dict['non_extra'] += len(non_extra_re)
extra_re = re.findall(extra_pattern, stream)
if extra_re is not None:
self.whitespace_dict['extra'] += len(extra_re)
def __str__(self):
return_string = ''
for each in self.__dict__:
if each.endswith('_dict'):
return_string += '{}: '.format(each).ljust(30)
return_string += '{}\n'.format(self.__dict__[each])
return return_string
def get_value(self):
return self
class MyManager(BaseManager):
pass
def manager():
m = MyManager()
m.start()
return m
MyManager.register('PythonParser', PythonParser)
if __name__ == '__main__':
manager = manager()
py_parser = manager.PythonParser()
dir_path = url = sys.argv[1]
file_count = 0
for file_name in os.listdir(dir_path):
if not file_name.endswith('.py'):
continue
file_count += 1
print str(file_count) + " Parsing file : " + file_name
py_parser.run(os.path.join(dir_path, file_name))
print py_parser.get_value()
print('\n')