Improved ini parser: support for multiline values, colon delimiter, value quotation

This commit is contained in:
Maxim Kulkin 2013-09-15 11:23:15 +04:00
parent c0a856d3cc
commit 79e1f38e6e
4 changed files with 184 additions and 40 deletions

View File

@ -15,18 +15,30 @@ class Mark(object):
def __ne__(self, other):
return not self == other
def merge(self, other):
return Mark(self.source, self.line + other.line - 1, self.column + other.column - 1)
def __repr__(self):
return '%s line %d column %d' % (self.source, self.line, self.column)
class Error(object):
def __init__(self, message):
self.message = message
def __repr__(self):
return '<%s "%s">' % (str(self.__class__).split('.')[-1], self.message)
return '<%s "%s">' % (str(self.__class__).split('.')[-1][:-2], self.message)
def __str__(self):
return 'Error: %s' % self.message
class MarkedError(Error):
def __init__(self, message, mark):
super(MarkedError, self).__init__(message)
self.mark = mark
def __repr__(self):
return '<%s "%s" at %s>' % (str(self.__class__).split('.')[-1][:-2], self.message, self.mark)
def __str__(self):
return self.message + (" (source '%s' line %d column %d)" % (self.mark.source, self.mark.line, self.mark.column))

View File

@ -5,37 +5,54 @@ from ostack_validator.model import *
from ostack_validator.config_formats.common import *
class IniConfigParser:
key_value_re = re.compile('^\s*(\w+)\s*=\s*(.*)$')
key_value_re = re.compile("^(\w+)\s*([:=])\s*('.*'|\".*\"|.*)\s*$")
def parse(self, name, io):
if not hasattr(io, 'readlines'):
io = StringIO(io)
errors = []
current_section_name = ConfigSectionName(Mark(name, 1, 1), Mark(name, 1, 1), '')
current_section_name = ConfigSectionName(Mark(name, 0, 0), Mark(name, 0, 0), '')
current_param_name = None
current_param_value = None
current_param_delimiter = None
sections = []
parameters = []
line_number = 0
for line in io.readlines():
line_number += 1
line = line.rstrip()
if line.strip() == '': continue
if current_param_name and (current_param_value.quotechar or (line == '' or not line[0].isspace())):
param = ConfigParameter(current_param_name.start_mark, current_param_value.end_mark, current_param_name, current_param_value, current_param_delimiter)
parameters.append(param)
start_index = 0
while line[start_index].isspace(): start_index+=1
current_param_name = None
current_param_value = None
current_param_delimiter = None
if line[start_index] == '#': continue
if line == '': continue
if line[start_index] == '[':
end_index = line.find(']', start_index)
if line[0] in '#;': continue
if line[0].isspace():
if current_param_name:
current_param_value.end_mark = Mark(name, line_number, len(line))
current_param_value.text += line.lstrip()
continue
else:
errors.append(ParseError('Unexpected multiline value continuation', Mark(name, line_number, 0)))
continue
if line[0] == '[':
end_index = line.find(']')
if end_index == -1:
errors.append(ParseError('Unclosed section', Mark(name, line_number, len(line))))
end_index = len(line)
while line[end_index-1].isspace(): end_index -= 1
if end_index <= start_index+1:
errors.append(ParseError('Missing section name', Mark(name, line_number, start_index)))
if end_index <= 1:
errors.append(ParseError('Missing section name', Mark(name, line_number, 0)))
continue
else:
i = end_index+1
@ -46,35 +63,53 @@ class IniConfigParser:
i += 1
if current_section_name.text != '' or len(parameters) > 0:
section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, start_index), current_section_name, parameters)
section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, 0), current_section_name, parameters)
sections.append(section)
parameters = []
current_section_name = ConfigSectionName(
Mark(name, line_number, start_index),
Mark(name, line_number, 0),
Mark(name, line_number, end_index),
line[start_index+1:end_index]
line[1:end_index]
)
else:
m = self.key_value_re.match(line)
if m:
param_name = ConfigParameterName(
current_param_name = ConfigParameterName(
Mark(name, line_number, m.start(1)),
Mark(name, line_number, m.end(1)),
m.group(1)
)
param_value = ConfigParameterValue(
current_param_delimiter = TextElement(
Mark(name, line_number, m.start(2)),
Mark(name, line_number, m.end(2)),
m.group(2)
)
param = ConfigParameter(param_name.start_mark, param_value.end_mark, param_name, param_value)
parameters.append(param)
# Unquote value
value = m.group(3)
quotechar = None
if (value[0] == value[-1] and value[0] in "\"'"):
quotechar = value[0]
value = value[1:-1]
current_param_value = ConfigParameterValue(
Mark(name, line_number, m.start(3)),
Mark(name, line_number, m.end(3)),
value,
quotechar
)
else:
errors.append(ParseError('Syntax error', Mark(name, line_number, 1)))
errors.append(ParseError('Syntax error', Mark(name, line_number, 0)))
line_number += 1
if current_param_name:
param = ConfigParameter(current_param_name.start_mark, current_param_value.end_mark, current_param_name, current_param_value, current_param_delimiter)
parameters.append(param)
if current_section_name.text != '' or len(parameters) > 0:
section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, start_index), current_section_name, parameters)
section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, 0), current_section_name, parameters)
sections.append(section)
parameters = []

View File

@ -6,7 +6,22 @@ class IniConfigParserTests(unittest.TestCase):
def setUp(self):
self.parser = IniConfigParser()
def parse(self, content):
def _strip_margin(self, content):
lines = content.split("\n")
if lines[0] == '' and lines[-1].strip() == '':
lines = lines[1:-1]
first_line = lines[0]
margin_size = 0
while margin_size < len(first_line) and first_line[margin_size].isspace(): margin_size += 1
stripped_lines = [line[margin_size:] for line in lines]
return "\n".join(stripped_lines)
def parse(self, content, margin=False):
if margin:
content = self._strip_margin(content)
return self.parser.parse('test.conf', content)
def test_parsing(self):
@ -18,6 +33,48 @@ class IniConfigParserTests(unittest.TestCase):
self.assertParameter('param1', 'value1', config.sections[0].parameters[0])
self.assertEqual(1, len(config.sections[0].parameters))
def test_colon_as_delimiter(self):
r = self.parse('param1 : value1')
self.assertTrue(r.success)
self.assertParameter('param1', 'value1', r.value.sections[0].parameters[0])
def test_use_colon_delimiter_if_it_comes_before_equals_sign(self):
r = self.parse('param1: value=123')
self.assertTrue(r.success)
self.assertParameter('param1', 'value=123', r.value.sections[0].parameters[0])
def test_use_equals_delimiter_if_it_comes_before_colon(self):
r = self.parse('param1=value:123')
self.assertTrue(r.success)
self.assertParameter('param1', 'value:123', r.value.sections[0].parameters[0])
def test_wrapping_value_with_single_quotes(self):
r = self.parse("param = 'foo bar'")
self.assertTrue(r.success)
self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0])
self.assertEqual("'", r.value.sections[0].parameters[0].value.quotechar)
def test_wrapping_value_with_single_quotes_and_trailing_whitespace(self):
r = self.parse("param = 'foo bar' ")
self.assertTrue(r.success)
self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0])
def test_wrapping_value_with_double_quotes(self):
r = self.parse("param = \"foo bar\"")
self.assertTrue(r.success)
self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0])
self.assertEqual('"', r.value.sections[0].parameters[0].value.quotechar)
def test_wrapping_value_with_double_quotes_and_trailing_whitespace(self):
r = self.parse("param = \"foo bar\" ")
self.assertTrue(r.success)
self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0])
def test_parsing_iolike_source(self):
r = self.parse(StringIO("param1 = value1"))
@ -36,8 +93,9 @@ class IniConfigParserTests(unittest.TestCase):
r = self.parse("""
[section1]
param1 = value1
""")
""", margin=True)
self.assertTrue(r.success)
self.assertEqual('section1', r.value.sections[0].name.text)
self.assertEqual(1, len(r.value.sections[0].parameters))
@ -46,8 +104,9 @@ class IniConfigParserTests(unittest.TestCase):
[section1]
param1 = value1
param2 = value2
""")
""", margin=True)
self.assertTrue(r.success)
self.assertEqual(2, len(r.value.sections[0].parameters))
def test_parsing_with_different_sections(self):
@ -56,8 +115,9 @@ class IniConfigParserTests(unittest.TestCase):
param1 = value1
[section2]
param2 = value2
""")
""", margin=True)
self.assertTrue(r.success)
c = r.value
self.assertEqual('section1', c.sections[0].name.text)
@ -67,14 +127,51 @@ class IniConfigParserTests(unittest.TestCase):
self.assertParameter('param2', 'value2', c.sections[1].parameters[0])
self.assertEqual(1, len(c.sections[1].parameters))
def test_whole_line_comments(self):
def test_whole_line_comments_starting_with_hash(self):
r = self.parse("#param=value")
self.assertTrue(r.success)
self.assertEqual(0, len(r.value.sections))
def test_whole_line_comments_starting_with_semicolon(self):
r = self.parse(";param=value")
self.assertTrue(r.success)
self.assertEqual(0, len(r.value.sections))
def test_hash_in_value_is_part_of_the_value(self):
r = self.parse("param=value#123")
self.assertTrue(r.success)
self.assertParameter("param", "value#123", r.value.sections[0].parameters[0])
def test_multiline_value(self):
r = self.parse("""
param1 = line1
line2
""", margin=True)
self.assertTrue(r.success)
self.assertParameter('param1', 'line1line2', r.value.sections[0].parameters[0])
def test_multiline_value_finished_by_other_parameter(self):
r = self.parse("""
param1 = foo
bar
param2 = baz
""", margin=True)
self.assertTrue(r.success)
self.assertParameter('param1', 'foobar', r.value.sections[0].parameters[0])
def test_multiline_value_finished_by_empty_line(self):
r = self.parse("""
param1 = foo
bar
param2 = baz
""", margin=True)
self.assertTrue(r.success)
self.assertParameter('param1', 'foobar', r.value.sections[0].parameters[0])
def test_unclosed_section_causes_error(self):
r = self.parse("[section1\nparam1=123")
self.assertFalse(r.success)

View File

@ -26,22 +26,25 @@ class Element(object):
def __ne__(self, other):
return not self == other
class TextElement(Element):
def __init__(self, start_mark, end_mark, text):
super(TextElement, self).__init__(start_mark, end_mark)
self.text = text
class ConfigSection(Element):
def __init__(self, start_mark, end_mark, name, parameters):
super(ConfigSection, self).__init__(start_mark, end_mark)
self.name = name
self.parameters = parameters
class ConfigSectionName(Element):
def __init__(self, start_mark, end_mark, text):
super(ConfigSectionName, self).__init__(start_mark, end_mark)
self.text = text
class ConfigSectionName(TextElement): pass
class ConfigParameter(Element):
def __init__(self, start_mark, end_mark, name, value):
def __init__(self, start_mark, end_mark, name, value, delimiter):
super(ConfigParameter, self).__init__(start_mark, end_mark)
self.name = name
self.value = value
self.delimiter = delimiter
def __eq__(self, other):
return (self.name.text == other.name.text) and (self.value.text == other.value.text)
@ -50,17 +53,14 @@ class ConfigParameter(Element):
return not self == other
def __repr__(self):
return "<ConfigParameter %s=%s>" % (self.name.text, self.value.text)
return "<ConfigParameter %s=%s delimiter=%s>" % (self.name.text, self.value.text, self.delimiter.text)
class ConfigParameterName(Element):
def __init__(self, start_mark, end_mark, text):
super(ConfigParameterName, self).__init__(start_mark, end_mark)
self.text = text
class ConfigParameterName(TextElement): pass
class ConfigParameterValue(Element):
def __init__(self, start_mark, end_mark, text):
super(ConfigParameterValue, self).__init__(start_mark, end_mark)
self.text = text
class ConfigParameterValue(TextElement):
def __init__(self, start_mark, end_mark, text, quotechar=None):
super(ConfigParameterValue, self).__init__(start_mark, end_mark, text)
self.quotechar = quotechar