
Signed-off-by: Stephen Finucane <stephenfin@redhat.com> Change-Id: I2f37e055838ea50627562d3585d6951f8d8d46aa
147 lines
3.9 KiB
Python
147 lines
3.9 KiB
Python
# Copyright (c) 2015 Mirantis, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import codecs
|
|
import re
|
|
|
|
from yaql.language import exceptions
|
|
|
|
|
|
NEVER_MATCHING_RE = '(?!x)x'
|
|
ESCAPE_SEQUENCE_RE = re.compile(r'''
|
|
( \\U........ # 8-digit hex escapes
|
|
| \\u.... # 4-digit hex escapes
|
|
| \\x.. # 2-digit hex escapes
|
|
| \\[0-7]{1,3} # Octal escapes
|
|
| \\N\{[^}]+\} # Unicode characters by name
|
|
| \\[\\'"abfnrtv] # Single-character escapes
|
|
)''', re.UNICODE | re.VERBOSE)
|
|
|
|
|
|
def decode_escapes(s):
|
|
def decode_match(match):
|
|
return codecs.decode(match.group(0), 'unicode-escape')
|
|
return ESCAPE_SEQUENCE_RE.sub(decode_match, s)
|
|
|
|
|
|
# noinspection PyPep8Naming
|
|
class Lexer(object):
|
|
t_ignore = ' \t\r\n'
|
|
|
|
literals = '()],}'
|
|
keywords = {
|
|
'true': 'TRUE',
|
|
'false': 'FALSE',
|
|
'null': 'NULL'
|
|
}
|
|
|
|
keyword_to_val = {
|
|
'TRUE': True,
|
|
'FALSE': False,
|
|
'NULL': None
|
|
}
|
|
|
|
def __init__(self, yaql_operators):
|
|
self._operators_table = yaql_operators.operators
|
|
self.tokens = [
|
|
'KEYWORD_STRING',
|
|
'QUOTED_STRING',
|
|
'NUMBER',
|
|
'FUNC',
|
|
'DOLLAR',
|
|
'INDEXER',
|
|
'MAPPING',
|
|
'MAP'
|
|
] + list(self.keywords.values())
|
|
for op_symbol, op_record in self._operators_table.items():
|
|
if op_symbol in ('[]', '{}'):
|
|
continue
|
|
lexem_name = op_record[2]
|
|
setattr(self, 't_' + lexem_name, re.escape(op_symbol))
|
|
self.tokens.append(lexem_name)
|
|
self.t_MAPPING = re.escape(yaql_operators.name_value_op) \
|
|
if yaql_operators.name_value_op else NEVER_MATCHING_RE
|
|
self.t_INDEXER = '\\[' \
|
|
if '[]' in self._operators_table else NEVER_MATCHING_RE
|
|
self.t_MAP = '{' \
|
|
if '{}' in self._operators_table else NEVER_MATCHING_RE
|
|
|
|
@staticmethod
|
|
def t_DOLLAR(t):
|
|
"""
|
|
\\$\\w*
|
|
"""
|
|
return t
|
|
|
|
@staticmethod
|
|
def t_NUMBER(t):
|
|
"""
|
|
\\b\\d+(\\.?\\d+)?\\b
|
|
"""
|
|
if '.' in t.value:
|
|
t.value = float(t.value)
|
|
else:
|
|
t.value = int(t.value)
|
|
return t
|
|
|
|
@staticmethod
|
|
def t_FUNC(t):
|
|
"""
|
|
\\b[^\\W\\d]\\w*\\(
|
|
"""
|
|
val = t.value[:-1]
|
|
t.value = val
|
|
return t
|
|
|
|
def t_KEYWORD_STRING(self, t):
|
|
"""
|
|
(?!__)\\b[^\\W\\d]\\w*\\b
|
|
"""
|
|
if t.value in self._operators_table:
|
|
t.type = self._operators_table[t.value][2]
|
|
else:
|
|
t.type = self.keywords.get(t.value, 'KEYWORD_STRING')
|
|
t.value = self.keyword_to_val.get(t.type, t.value)
|
|
return t
|
|
|
|
@staticmethod
|
|
def t_QUOTED_STRING(t):
|
|
"""
|
|
'([^'\\\\]|\\\\.)*'
|
|
"""
|
|
t.value = decode_escapes(t.value[1:-1])
|
|
return t
|
|
|
|
@staticmethod
|
|
def t_DOUBLE_QUOTED_STRING(t):
|
|
"""
|
|
"([^"\\\\]|\\\\.)*"
|
|
"""
|
|
t.value = decode_escapes(t.value[1:-1])
|
|
t.type = 'QUOTED_STRING'
|
|
return t
|
|
|
|
@staticmethod
|
|
def t_QUOTED_VERBATIM_STRING(t):
|
|
"""
|
|
`([^`\\\\]|\\\\.)*`
|
|
"""
|
|
t.value = t.value[1:-1].replace('\\`', '`')
|
|
t.type = 'QUOTED_STRING'
|
|
return t
|
|
|
|
@staticmethod
|
|
def t_error(t):
|
|
raise exceptions.YaqlLexicalException(t.value[0], t.lexpos)
|