From 79e1f38e6e813c9c7fc3a66a20073ac0dded3541 Mon Sep 17 00:00:00 2001 From: Maxim Kulkin Date: Sun, 15 Sep 2013 11:23:15 +0400 Subject: [PATCH] Improved ini parser: support for multiline values, colon delimiter, value quotation --- ostack_validator/common.py | 14 ++- ostack_validator/config_formats/ini.py | 75 ++++++++++---- ostack_validator/config_formats/test_ini.py | 107 +++++++++++++++++++- ostack_validator/model.py | 28 ++--- 4 files changed, 184 insertions(+), 40 deletions(-) diff --git a/ostack_validator/common.py b/ostack_validator/common.py index ba12600..d0d1c37 100644 --- a/ostack_validator/common.py +++ b/ostack_validator/common.py @@ -15,18 +15,30 @@ class Mark(object): def __ne__(self, other): return not self == other + def merge(self, other): + return Mark(self.source, self.line + other.line - 1, self.column + other.column - 1) + + def __repr__(self): + return '%s line %d column %d' % (self.source, self.line, self.column) + class Error(object): def __init__(self, message): self.message = message def __repr__(self): - return '<%s "%s">' % (str(self.__class__).split('.')[-1], self.message) + return '<%s "%s">' % (str(self.__class__).split('.')[-1][:-2], self.message) + + def __str__(self): + return 'Error: %s' % self.message class MarkedError(Error): def __init__(self, message, mark): super(MarkedError, self).__init__(message) self.mark = mark + def __repr__(self): + return '<%s "%s" at %s>' % (str(self.__class__).split('.')[-1][:-2], self.message, self.mark) + def __str__(self): return self.message + (" (source '%s' line %d column %d)" % (self.mark.source, self.mark.line, self.mark.column)) diff --git a/ostack_validator/config_formats/ini.py b/ostack_validator/config_formats/ini.py index ee7116d..d90dc16 100644 --- a/ostack_validator/config_formats/ini.py +++ b/ostack_validator/config_formats/ini.py @@ -5,37 +5,54 @@ from ostack_validator.model import * from ostack_validator.config_formats.common import * class IniConfigParser: - key_value_re = re.compile('^\s*(\w+)\s*=\s*(.*)$') + key_value_re = re.compile("^(\w+)\s*([:=])\s*('.*'|\".*\"|.*)\s*$") def parse(self, name, io): if not hasattr(io, 'readlines'): io = StringIO(io) errors = [] - current_section_name = ConfigSectionName(Mark(name, 1, 1), Mark(name, 1, 1), '') + current_section_name = ConfigSectionName(Mark(name, 0, 0), Mark(name, 0, 0), '') + current_param_name = None + current_param_value = None + current_param_delimiter = None sections = [] parameters = [] line_number = 0 for line in io.readlines(): - line_number += 1 + line = line.rstrip() - if line.strip() == '': continue + if current_param_name and (current_param_value.quotechar or (line == '' or not line[0].isspace())): + param = ConfigParameter(current_param_name.start_mark, current_param_value.end_mark, current_param_name, current_param_value, current_param_delimiter) + parameters.append(param) - start_index = 0 - while line[start_index].isspace(): start_index+=1 + current_param_name = None + current_param_value = None + current_param_delimiter = None - if line[start_index] == '#': continue + if line == '': continue - if line[start_index] == '[': - end_index = line.find(']', start_index) + if line[0] in '#;': continue + + if line[0].isspace(): + if current_param_name: + current_param_value.end_mark = Mark(name, line_number, len(line)) + current_param_value.text += line.lstrip() + continue + else: + errors.append(ParseError('Unexpected multiline value continuation', Mark(name, line_number, 0))) + continue + + if line[0] == '[': + end_index = line.find(']') if end_index == -1: errors.append(ParseError('Unclosed section', Mark(name, line_number, len(line)))) end_index = len(line) while line[end_index-1].isspace(): end_index -= 1 - if end_index <= start_index+1: - errors.append(ParseError('Missing section name', Mark(name, line_number, start_index))) + if end_index <= 1: + errors.append(ParseError('Missing section name', Mark(name, line_number, 0))) continue else: i = end_index+1 @@ -46,35 +63,53 @@ class IniConfigParser: i += 1 if current_section_name.text != '' or len(parameters) > 0: - section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, start_index), current_section_name, parameters) + section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, 0), current_section_name, parameters) sections.append(section) parameters = [] current_section_name = ConfigSectionName( - Mark(name, line_number, start_index), + Mark(name, line_number, 0), Mark(name, line_number, end_index), - line[start_index+1:end_index] + line[1:end_index] ) else: m = self.key_value_re.match(line) if m: - param_name = ConfigParameterName( + current_param_name = ConfigParameterName( Mark(name, line_number, m.start(1)), Mark(name, line_number, m.end(1)), m.group(1) ) - param_value = ConfigParameterValue( + current_param_delimiter = TextElement( Mark(name, line_number, m.start(2)), Mark(name, line_number, m.end(2)), m.group(2) ) - param = ConfigParameter(param_name.start_mark, param_value.end_mark, param_name, param_value) - parameters.append(param) + + # Unquote value + value = m.group(3) + quotechar = None + if (value[0] == value[-1] and value[0] in "\"'"): + quotechar = value[0] + value = value[1:-1] + + current_param_value = ConfigParameterValue( + Mark(name, line_number, m.start(3)), + Mark(name, line_number, m.end(3)), + value, + quotechar + ) else: - errors.append(ParseError('Syntax error', Mark(name, line_number, 1))) + errors.append(ParseError('Syntax error', Mark(name, line_number, 0))) + + line_number += 1 + + if current_param_name: + param = ConfigParameter(current_param_name.start_mark, current_param_value.end_mark, current_param_name, current_param_value, current_param_delimiter) + parameters.append(param) if current_section_name.text != '' or len(parameters) > 0: - section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, start_index), current_section_name, parameters) + section = ConfigSection(current_section_name.start_mark, Mark(name, line_number, 0), current_section_name, parameters) sections.append(section) parameters = [] diff --git a/ostack_validator/config_formats/test_ini.py b/ostack_validator/config_formats/test_ini.py index d5311f7..98ffe32 100644 --- a/ostack_validator/config_formats/test_ini.py +++ b/ostack_validator/config_formats/test_ini.py @@ -6,7 +6,22 @@ class IniConfigParserTests(unittest.TestCase): def setUp(self): self.parser = IniConfigParser() - def parse(self, content): + def _strip_margin(self, content): + lines = content.split("\n") + if lines[0] == '' and lines[-1].strip() == '': + lines = lines[1:-1] + first_line = lines[0] + margin_size = 0 + while margin_size < len(first_line) and first_line[margin_size].isspace(): margin_size += 1 + + stripped_lines = [line[margin_size:] for line in lines] + + return "\n".join(stripped_lines) + + def parse(self, content, margin=False): + if margin: + content = self._strip_margin(content) + return self.parser.parse('test.conf', content) def test_parsing(self): @@ -18,6 +33,48 @@ class IniConfigParserTests(unittest.TestCase): self.assertParameter('param1', 'value1', config.sections[0].parameters[0]) self.assertEqual(1, len(config.sections[0].parameters)) + def test_colon_as_delimiter(self): + r = self.parse('param1 : value1') + + self.assertTrue(r.success) + self.assertParameter('param1', 'value1', r.value.sections[0].parameters[0]) + + def test_use_colon_delimiter_if_it_comes_before_equals_sign(self): + r = self.parse('param1: value=123') + self.assertTrue(r.success) + self.assertParameter('param1', 'value=123', r.value.sections[0].parameters[0]) + + def test_use_equals_delimiter_if_it_comes_before_colon(self): + r = self.parse('param1=value:123') + self.assertTrue(r.success) + self.assertParameter('param1', 'value:123', r.value.sections[0].parameters[0]) + + def test_wrapping_value_with_single_quotes(self): + r = self.parse("param = 'foo bar'") + + self.assertTrue(r.success) + self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0]) + self.assertEqual("'", r.value.sections[0].parameters[0].value.quotechar) + + def test_wrapping_value_with_single_quotes_and_trailing_whitespace(self): + r = self.parse("param = 'foo bar' ") + + self.assertTrue(r.success) + self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0]) + + def test_wrapping_value_with_double_quotes(self): + r = self.parse("param = \"foo bar\"") + + self.assertTrue(r.success) + self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0]) + self.assertEqual('"', r.value.sections[0].parameters[0].value.quotechar) + + def test_wrapping_value_with_double_quotes_and_trailing_whitespace(self): + r = self.parse("param = \"foo bar\" ") + + self.assertTrue(r.success) + self.assertParameter('param', 'foo bar', r.value.sections[0].parameters[0]) + def test_parsing_iolike_source(self): r = self.parse(StringIO("param1 = value1")) @@ -36,8 +93,9 @@ class IniConfigParserTests(unittest.TestCase): r = self.parse(""" [section1] param1 = value1 - """) + """, margin=True) + self.assertTrue(r.success) self.assertEqual('section1', r.value.sections[0].name.text) self.assertEqual(1, len(r.value.sections[0].parameters)) @@ -46,8 +104,9 @@ class IniConfigParserTests(unittest.TestCase): [section1] param1 = value1 param2 = value2 - """) + """, margin=True) + self.assertTrue(r.success) self.assertEqual(2, len(r.value.sections[0].parameters)) def test_parsing_with_different_sections(self): @@ -56,8 +115,9 @@ class IniConfigParserTests(unittest.TestCase): param1 = value1 [section2] param2 = value2 - """) + """, margin=True) + self.assertTrue(r.success) c = r.value self.assertEqual('section1', c.sections[0].name.text) @@ -67,14 +127,51 @@ class IniConfigParserTests(unittest.TestCase): self.assertParameter('param2', 'value2', c.sections[1].parameters[0]) self.assertEqual(1, len(c.sections[1].parameters)) - def test_whole_line_comments(self): + def test_whole_line_comments_starting_with_hash(self): r = self.parse("#param=value") + self.assertTrue(r.success) + self.assertEqual(0, len(r.value.sections)) + + def test_whole_line_comments_starting_with_semicolon(self): + r = self.parse(";param=value") + self.assertTrue(r.success) self.assertEqual(0, len(r.value.sections)) def test_hash_in_value_is_part_of_the_value(self): r = self.parse("param=value#123") + self.assertTrue(r.success) self.assertParameter("param", "value#123", r.value.sections[0].parameters[0]) + def test_multiline_value(self): + r = self.parse(""" + param1 = line1 + line2 + """, margin=True) + + self.assertTrue(r.success) + self.assertParameter('param1', 'line1line2', r.value.sections[0].parameters[0]) + + def test_multiline_value_finished_by_other_parameter(self): + r = self.parse(""" + param1 = foo + bar + param2 = baz + """, margin=True) + + self.assertTrue(r.success) + self.assertParameter('param1', 'foobar', r.value.sections[0].parameters[0]) + + def test_multiline_value_finished_by_empty_line(self): + r = self.parse(""" + param1 = foo + bar + + param2 = baz + """, margin=True) + + self.assertTrue(r.success) + self.assertParameter('param1', 'foobar', r.value.sections[0].parameters[0]) + def test_unclosed_section_causes_error(self): r = self.parse("[section1\nparam1=123") self.assertFalse(r.success) diff --git a/ostack_validator/model.py b/ostack_validator/model.py index 48152ad..0444d80 100644 --- a/ostack_validator/model.py +++ b/ostack_validator/model.py @@ -26,22 +26,25 @@ class Element(object): def __ne__(self, other): return not self == other +class TextElement(Element): + def __init__(self, start_mark, end_mark, text): + super(TextElement, self).__init__(start_mark, end_mark) + self.text = text + class ConfigSection(Element): def __init__(self, start_mark, end_mark, name, parameters): super(ConfigSection, self).__init__(start_mark, end_mark) self.name = name self.parameters = parameters -class ConfigSectionName(Element): - def __init__(self, start_mark, end_mark, text): - super(ConfigSectionName, self).__init__(start_mark, end_mark) - self.text = text +class ConfigSectionName(TextElement): pass class ConfigParameter(Element): - def __init__(self, start_mark, end_mark, name, value): + def __init__(self, start_mark, end_mark, name, value, delimiter): super(ConfigParameter, self).__init__(start_mark, end_mark) self.name = name self.value = value + self.delimiter = delimiter def __eq__(self, other): return (self.name.text == other.name.text) and (self.value.text == other.value.text) @@ -50,17 +53,14 @@ class ConfigParameter(Element): return not self == other def __repr__(self): - return "" % (self.name.text, self.value.text) + return "" % (self.name.text, self.value.text, self.delimiter.text) -class ConfigParameterName(Element): - def __init__(self, start_mark, end_mark, text): - super(ConfigParameterName, self).__init__(start_mark, end_mark) - self.text = text +class ConfigParameterName(TextElement): pass -class ConfigParameterValue(Element): - def __init__(self, start_mark, end_mark, text): - super(ConfigParameterValue, self).__init__(start_mark, end_mark) - self.text = text +class ConfigParameterValue(TextElement): + def __init__(self, start_mark, end_mark, text, quotechar=None): + super(ConfigParameterValue, self).__init__(start_mark, end_mark, text) + self.quotechar = quotechar