Skip to content

Commit 5686969

Browse files
gh-108095: Add re.grep module
1 parent 80f30cf commit 5686969

1 file changed

Lines changed: 248 additions & 0 deletions

File tree

Lib/re/grep.py

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
import sys
2+
import argparse
3+
import re
4+
from collections import deque
5+
6+
7+
def find(patterns, line):
8+
for pat in patterns:
9+
if pat.search(line) is not None:
10+
return True
11+
return False
12+
13+
def findall(patterns, line):
14+
if len(patterns) == 1:
15+
for m in patterns[0].finditer(line):
16+
yield m.group()
17+
return
18+
19+
i = 0
20+
while True:
21+
begin = len(line) + 1
22+
end = -1
23+
best_span = (len(line) + 1, 1)
24+
for pat in patterns:
25+
m = pat.search(line, i)
26+
if m is not None:
27+
# Find the longest of the first matches.
28+
best_span = min(best_span, (m.begin(), -m.end()))
29+
if best_span == (i, -len(line)):
30+
# Matches the rest of line.
31+
break
32+
begin = best_span[0]
33+
end = -best_span[1]
34+
if end < 0:
35+
break
36+
yield line[begin:end]
37+
i = end
38+
if begin == end:
39+
i += 1
40+
41+
def grep(opts, patterns, file, filename):
42+
def print_line(sep, ln, line):
43+
if opts.filename:
44+
print(filename, end=sep)
45+
if opts.line_number:
46+
print(ln, end=sep)
47+
print(line)
48+
49+
fast_exit = opts.quiet or opts.filename_only is not None
50+
prev_lines = deque(maxlen=opts.before_context)
51+
after_lines = 0
52+
next_group = None
53+
total_count = 0
54+
found = False
55+
for ln, line in enumerate(file, 1):
56+
line = line.removesuffix('\n')
57+
if opts.only_matching:
58+
for match in findall(patterns, line):
59+
print_line(':', ln, match)
60+
found = True
61+
else:
62+
matches = find(patterns, line)
63+
if opts.invert_match:
64+
matches = not matches
65+
if matches:
66+
found = True
67+
if fast_exit:
68+
break
69+
elif opts.count:
70+
total_count += 1
71+
else:
72+
if next_group and ln > next_group:
73+
print(opts.group_separator)
74+
for ln2, line2 in enumerate(prev_lines, ln - len(prev_lines)):
75+
print_line('-', ln2, line2)
76+
prev_lines.clear()
77+
print_line(':', ln, line)
78+
after_lines = opts.after_context
79+
next_group = (None if opts.group_separator is None else
80+
ln + after_lines + opts.before_context + 1)
81+
elif not (fast_exit or opts.count):
82+
if after_lines:
83+
print_line('-', ln, line)
84+
after_lines -= 1
85+
else:
86+
prev_lines.append(line)
87+
if fast_exit:
88+
if opts.filename_only == found:
89+
print(filename)
90+
elif opts.count:
91+
if opts.filename:
92+
print(filename, end=':')
93+
print(total_count)
94+
return found
95+
96+
def read_from_file(filename):
97+
with open(filename, encoding=sys.stdin.encoding,
98+
errors=sys.stdin.errors) as f:
99+
return [line.removesuffix('\n') for line in f.readlines()]
100+
101+
def main():
102+
parser = argparse.ArgumentParser(add_help=False)
103+
# Add --help option explicitly to avoid conflict in the -h option.
104+
parser.add_argument('--help',
105+
action='help', default=argparse.SUPPRESS,
106+
help='show this help message and exit')
107+
108+
parser.set_defaults(label='(standard input)', group_separator='--')
109+
110+
parser.add_argument('files',
111+
nargs=argparse.REMAINDER,
112+
metavar='FILES',
113+
help='Files to search.')
114+
115+
grp = parser.add_argument_group('Matching Control')
116+
grp.add_argument('-e', '--regexp',
117+
action='append', dest='patterns',
118+
metavar='PATTERN',
119+
help='Use PATTERN as the pattern.')
120+
grp.add_argument('-f', '--file',
121+
action='extend', dest='patterns', type=read_from_file,
122+
metavar='PATTERN_FILE',
123+
help='Obtain patterns from PATTERN_FILE, one per line.')
124+
grp.add_argument('-F', '--fixed-strings',
125+
action='store_true',
126+
help='Interpret patterns as fixed strings.')
127+
grp.add_argument('-i', '--ignore-case',
128+
action='store_true',
129+
help='Ignore case distinctions in patterns and input data.')
130+
grp.add_argument('--no-ignore-case',
131+
action='store_false', dest='ignore_case',
132+
help='Do not ignore case distinctions in patterns and input data. '
133+
'This is the default.')
134+
grp.add_argument('-v', '--invert-match',
135+
action='store_true',
136+
help='Invert the sense of matching, to select non-matching lines.')
137+
grp.add_argument('-w', '--word-regexp',
138+
action='store_true',
139+
help='Select only those lines containing matches that form whole words.')
140+
grp.add_argument('-x', '--line-regexp',
141+
action='store_true',
142+
help='Select only those matches that exactly match the whole line.')
143+
144+
grp = parser.add_argument_group('Output Control')
145+
grp.add_argument('-c', '--count',
146+
action='store_true',
147+
help='Suppress normal output; instead print a count of matching '
148+
'lines for each input file.')
149+
grp.add_argument('-L', '--files-without-match',
150+
action='store_false', dest='filename_only', default=None,
151+
help='Suppress normal output; instead print the name of each input '
152+
'file from which no output would normally have been printed.')
153+
grp.add_argument('-l', '--files-with-match',
154+
action='store_true', dest='filename_only',
155+
help='Suppress normal output; instead print the name of each input '
156+
'file from which output would normally have been printed.')
157+
grp.add_argument('-o', '--only-matching',
158+
action='store_true',
159+
help='Print only the matched (non-empty) parts of a matching line, '
160+
'with each such part on a separate output line.')
161+
grp.add_argument('-q', '--quiet',
162+
action='store_true',
163+
help='Quiet; do not write anything to standard output. '
164+
'Exit immediately with zero status if any match is found.')
165+
166+
grp = parser.add_argument_group('Output Line Prefix Control')
167+
grp.add_argument('-H', '--with-filename',
168+
action='store_true', dest='filename', default=None,
169+
help='Print the file name for each match. '
170+
'This is the default when there is more than one file to search.')
171+
grp.add_argument('-h', '--no-filename',
172+
action='store_false', dest='filename',
173+
help='Suppress the prefixing of file names on output. '
174+
'This is the default when there is only one file (or only standard '
175+
'input) to search.')
176+
grp.add_argument('-n', '--line-number',
177+
action='store_true',
178+
help='Prefix each line of output with the 1-based line number '
179+
'within its input file.')
180+
181+
grp = parser.add_argument_group('Context Line Control')
182+
grp.add_argument('-A', '--after-context',
183+
type=int, metavar='NUM',
184+
help='Print NUM lines of trailing context after matching lines.')
185+
grp.add_argument('-B', '--before-context',
186+
type=int, metavar='NUM',
187+
help='Print NUM lines of leading context before matching lines.')
188+
grp.add_argument('-C', '--context',
189+
type=int, default=0, metavar='NUM',
190+
help='Print NUM lines of output context.')
191+
192+
opts = parser.parse_args()
193+
194+
patterns = opts.patterns or []
195+
if opts.fixed_strings:
196+
patterns = [re.escape(pat) for pat in patterns]
197+
if opts.line_regexp:
198+
patterns = [fr'\A(?:{pat})\Z' for pat in patterns]
199+
elif opts.word_regexp:
200+
patterns = [fr'\b(?:{pat})\b' for pat in patterns]
201+
flags = re.IGNORECASE if opts.ignore_case else 0
202+
patterns = [re.compile(pat, flags) for pat in patterns]
203+
204+
# By default print filenames only if more than one file is specified.
205+
if opts.filename is None:
206+
opts.filename = len(opts.files) > 1
207+
# -C sets -A and -B if they are not specified explicitly.
208+
if opts.after_context is None:
209+
opts.after_context = opts.context
210+
if opts.before_context is None:
211+
opts.before_context = opts.context
212+
# -q, -l, -L and -c suppresses normal output.
213+
if opts.quiet or opts.filename_only is not None or opts.count:
214+
opts.only_matching = False
215+
opts.after_context = opts.before_context = 0
216+
# -vo suppresses normal output.
217+
if opts.only_matching and opts.invert_match:
218+
opts.only_matching = False
219+
opts.quiet = True
220+
# -o suppresses context output.
221+
if opts.only_matching:
222+
opts.after_context = opts.before_context = 0
223+
# Only print group separator for non-zero context.
224+
if not (opts.after_context or opts.before_context):
225+
opts.group_separator = None
226+
227+
found = False
228+
for filename in opts.files or ['-']:
229+
if filename == '-':
230+
found |= grep(opts, patterns, sys.stdin, opts.label)
231+
else:
232+
with open(filename, encoding=sys.stdin.encoding,
233+
errors=sys.stdin.errors) as f:
234+
found |= grep(opts, patterns, f, filename)
235+
if found and opts.quiet:
236+
break
237+
return found
238+
239+
240+
if __name__ == '__main__':
241+
try:
242+
found = main()
243+
except SystemExit:
244+
raise
245+
except BaseException as e:
246+
print(f'error: {e!r}')
247+
sys.exit(2)
248+
sys.exit(0 if found else 1)

0 commit comments

Comments
 (0)