-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparsing_utils.py
More file actions
84 lines (68 loc) · 2.14 KB
/
parsing_utils.py
File metadata and controls
84 lines (68 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""Some utils to make parsing easier."""
def _string_to_db(stream: str) -> list:
db_values = []
if stream == '':
return db_values
escaped_actual = (
(r'\n', ord('\n')),
(r'\t', ord('\t'))
)
for c, val in escaped_actual:
if c in stream:
a, b = stream.split(c, maxsplit=1)
a = _string_to_db(a)
b = _string_to_db(b)
db_values.extend([*a, val, *b])
return db_values
return [stream]
def string_to_db(stream: str) -> str:
vals = _string_to_db(stream)
vals_quoted = []
length = 0
for val in vals:
if isinstance(val, str):
length += len(val)
vals_quoted.append(f'"{val}"')
else:
length += 1
vals_quoted.append(str(val))
return ','.join(vals_quoted) + ', 0', length + 1
def read_until_sentinel(iterator, sentinel: str) -> str:
acc = ''
for item in iterator:
acc += item
if item == sentinel:
break
return acc
def remove_comments(stream: str) -> str:
"""
Remove comments from a stream of code.
Not the prettiest of solutions but it'll do
"""
def remove_comment_from_line(line):
return line.split('#', maxsplit=1)[0]
uncommented_lines = map(remove_comment_from_line, stream.split('\n'))
uncommented_code = '\n'.join(uncommented_lines)
return uncommented_code
def pyre_split(stream: str) -> list:
"""Get a list of lexemes from a stream of code."""
# We add a space at the end to force the last item to be added
# Modern problems require modern solutions
stream = stream.strip() + ' '
items = []
acc = ''
stream_iterator = iter(stream)
for c in stream_iterator:
if c == '"' or c == "'":
assert acc == ''
acc = c + read_until_sentinel(stream_iterator, sentinel=c)
elif c == '[':
acc += c + read_until_sentinel(stream_iterator, sentinel=']')
elif c.isspace():
if acc != '':
items.append(acc)
acc = ''
continue
else:
acc += c
return items