#!/usr/bin/env python
import sys, os
try:
treshold = int(sys.argv[1])
except IndexError:
print "usage: %s TRESHOLD [INPUT_FILE]" % sys.argv[0]
sys.exit(1)
try:
f = open(sys.argv[2])
except IndexError:
f = sys.stdin
indent_level = 0
lines_indented = 0
trunc_line_count = 0
whole = f.readlines()
def count_leading_spaces(input_str):
space_count = 0
for char in input_str:
if char.isspace():
space_count += 1
else:
return space_count
def print_truncated_notice():
if trunc_line_count > 0:
# suffer extra pain to pretty print a "truncated" line
print " "*(indent_level) + "...", trunc_line_count, "line" + ('','s')[trunc_line_count != 1], "truncated ..."
for line in whole:
leading_spaces = count_leading_spaces(line)
if leading_spaces == indent_level:
lines_indented += 1
if lines_indented > treshold:
trunc_line_count += 1
else:
print line,
else:
print_truncated_notice()
indent_level = leading_spaces
trunc_line_count = 0
lines_indented = 1
print line,
# did we truncate at the last indent level ?
print_truncated_notice()
Refactorings
No refactoring yet !
hayalci
September 20, 2009, September 20, 2009 12:50, permalink
Sample run below.
I noticed that it does not print a "truncated" line after the last line, even though it is truncated. I fixed it in the code above, by seperating print_truncated_notice function and calling it before exiting.
aaa aa bbb bbb bbb bbb ccc ccc ddd xxx xxx xxx ttt ttt ttt
aaa aa bbb bbb ... 2 lines truncated ... ccc ccc ... 1 line truncated ... xxx xxx ... 1 line truncated ... ttt ttt ... 1 line truncated ...
fain182.myopenid.com
September 21, 2009, September 21, 2009 12:58, permalink
count_leading_spaces() slightly improved
the "whole" variable was unusefull
other minor improvements
#!/usr/bin/env python
import sys, os
try:
treshold = int(sys.argv[1])
except IndexError:
print "usage: %s TRESHOLD [INPUT_FILE]" % sys.argv[0]
sys.exit(1)
try:
f = open(sys.argv[2])
except IndexError:
f = sys.stdin
indent_level = 0
lines_indented = 0
trunc_line_count = 0
def count_leading_spaces(input_str):
space_count = 0
while input_str[space_count].isspace(): space_count += 1
return space_count
def print_truncated_notice():
if trunc_line_count > 0:
# suffer extra pain to pretty print a "truncated" line
print " "*(indent_level) + "...", trunc_line_count, "line" + ('','s')[trunc_line_count != 1], "truncated ..."
for line in f.readlines():
leading_spaces = count_leading_spaces(line)
if leading_spaces == indent_level:
lines_indented += 1
if lines_indented > treshold:
trunc_line_count += 1
continue
else:
print_truncated_notice()
indent_level = leading_spaces
trunc_line_count = 0
lines_indented = 1
print line,
# did we truncate at the last indent level ?
print_truncated_notice()
I wrote this to filter output of "pflogsumm" postfix log summary utility. Even though it had options to limit output to "top n", some parts of the output were not limited. (Like the hosts blocked by spamhaus, there are ~30000 hosts)
Luckily the output was indented by categories, so I wrote this to filter text by indentation level after a treshold. It would be better to patch pflogsumm but I don't know that much perl.