summaryrefslogtreecommitdiff
blob: c5d5da45e4867288304949e5b81a62ff1c238ad9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python
# Copyright 1999-2007 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Id$

import os, re, sys

egrep_compat_map = {
	"[:alnum:]" : r'\w',
	"[:digit:]" : r'\d',
	"[:space:]" : r'\s',
}

here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$')
func_end_re = re.compile(r'^\}$')

var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^export\s+)([^=\s]+)=("|\')?.*$')
close_quote_re = re.compile(r'(\\"|"|\')\s*$')

def compile_egrep_pattern(s):
	for k, v in egrep_compat_map.iteritems():
		s = s.replace(k, v)
	return re.compile(s)

def have_end_quote(quote, line):
	"""
	Check if the line has an end quote (useful for handling multi-line
	quotes). This handles escaped double quotes that may occur at the
	end of a line. The posix spec does not allow escaping of single
	quotes inside of single quotes, so that case is not handled.
	"""
	close_quote_match = close_quote_re.search(line)
	return close_quote_match is not None and \
		close_quote_match.group(1) == quote

def filter_bash_environment(pattern, file_in, file_out):
	here_doc_delim = None
	in_func = None
	multi_line_quote = None
	multi_line_quote_filter = None
	for line in file_in:
		if multi_line_quote is not None:
			if not multi_line_quote_filter:
				file_out.write(line)
			if have_end_quote(multi_line_quote, line):
				multi_line_quote = None
				multi_line_quote_filter = None
			continue
		if here_doc_delim is None and in_func is None:
			var_assign_match = var_assign_re.match(line)
			if var_assign_match is not None:
				quote = var_assign_match.group(3)
				filter_this = pattern.match(var_assign_match.group(2)) \
					is not None
				if quote is not None and not have_end_quote(quote, line):
					multi_line_quote = quote
					multi_line_quote_filter = filter_this
				if not filter_this:
					file_out.write(line)
				continue
		if here_doc_delim is not None:
			if here_doc_delim.match(line):
				here_doc_delim = None
			file_out.write(line)
			continue
		here_doc = here_doc_re.match(line)
		if here_doc is not None:
			here_doc_delim = re.compile("^%s$" % here_doc.group(1))
			file_out.write(line)
			continue
		# Note: here-documents are handled before fuctions since otherwise
		# it would be possible for the content of a here-document to be
		# mistaken as the end of a function.
		if in_func:
			if func_end_re.match(line) is not None:
				in_func = None
			file_out.write(line)
			continue
		in_func = func_start_re.match(line)
		if in_func is not None:
			file_out.write(line)
			continue
		# This line is not recognized as part of a variable assignment,
		# function definition, or here document, so just allow it to
		# pass through.
		file_out.write(line)

if __name__ == "__main__":
	description = "Filter out variable assignments for varable " + \
		"names matching a given PATTERN " + \
		"while leaving bash function definitions and here-documents " + \
		"intact. The PATTERN should use python regular expression syntax" + \
		" but [:digit:], [:space:] and " + \
		"[:alnum:] character classes will be automatically translated " + \
		"for compatibility with egrep syntax."
	usage = "usage: %s PATTERN" % os.path.basename(sys.argv[0])
	from optparse import OptionParser
	parser = OptionParser(description=description, usage=usage)
	options, args = parser.parse_args(sys.argv[1:])
	if len(args) != 1:
		parser.error("Missing required PATTERN argument.")
	file_in = sys.stdin
	file_out = sys.stdout
	var_pattern = "^(%s)$" % "|".join(args[0].split())
	filter_bash_environment(
		compile_egrep_pattern(var_pattern), file_in, file_out)
	file_out.flush()