Source code for pureyaml.grammar.tokens

# coding=utf-8
"""Yaml tokens."""
from __future__ import absolute_import

from textwrap import dedent

from .utils import find_column, rollback_lexpos
from ..exceptions import YAMLUnknownSyntaxError


[docs]class TokenList(object): tokens = [ # :off 'DOC_START', 'DOC_END', 'B_SEQUENCE_COMPACT_START', 'B_SEQUENCE_START', 'B_MAP_COMPACT_KEY', 'B_MAP_COMPACT_VALUE', 'B_MAP_KEY', 'B_MAP_VALUE', 'B_LITERAL_START', 'B_LITERAL_END', 'B_FOLD_START', 'B_FOLD_END', 'DOUBLEQUOTE_START', 'DOUBLEQUOTE_END', 'SINGLEQUOTE_START', 'SINGLEQUOTE_END', 'CAST_TYPE', 'SCALAR', 'INDENT', 'DEDENT', 'F_SEQUENCE_START', 'F_SEQUENCE_END', 'F_MAP_START', 'F_MAP_END', 'F_MAP_KEY', 'F_SEP', ] # :on
# noinspection PyMethodMayBeStatic,PyIncorrectDocstring,PySingleQuotedDocstring,PyPep8Naming class YAMLTokens(TokenList): def __init__(self): self.indent_stack = [1] def get_indent_status(self, t): column = find_column(t) curr_depth, next_depth = self.indent_stack[-1], column if next_depth > curr_depth: status = 'INDENT' elif next_depth < curr_depth: status = 'DEDENT' else: status = 'NODENT' return status, curr_depth, next_depth # LEXER # =================================================================== states = ( # :off ('tag', 'inclusive'), ('doublequote', 'exclusive'), ('comment', 'exclusive'), ('singlequote', 'exclusive'), ('literal', 'exclusive'), ('fold', 'exclusive'), ('flowsequence', 'exclusive'), ('flowmap', 'exclusive'), ) # :on literals = '"' # state: multiple # ------------------------------------------------------------------- def t_ignore_INDENT(self, t): r'\n\s*' indent_status, curr_depth, next_depth = self.get_indent_status(t) if indent_status == 'NODENT': return if indent_status == 'INDENT': # note: also set by # * t_B_SEQUENCE_COMPACT_START # * t_B_MAP_COMPACT_KEY # * t_B_MAP_COMPACT_VALUE self.indent_stack.append(next_depth) if indent_status == 'DEDENT': indent_delta = curr_depth - next_depth step = self.indent_stack.pop() - self.indent_stack[-1] # If dedent is larger then last indent if indent_delta > step: # Go back and reevaluate this token. rollback_lexpos(t) t.type = indent_status return t # state: tag # ------------------------------------------------------------------- def t_begin_tag(self, t): r'(?<!\\)!' t.lexer.push_state('tag') def t_tag_end(self, t): r'\ ' t.lexer.pop_state() def t_tag_CAST_TYPE(self, t): r'(?<=\!)[a-z]+' return t # state: doublequote # ------------------------------------------------------------------- t_doublequote_SCALAR = r'(?:\\"|[^"])+' def t_begin_doublequote(self, t): r'(?<!\\)"' t.lexer.push_state('doublequote') t.type = 'DOUBLEQUOTE_START' return t def t_doublequote_end(self, t): r'(?<!\\)"' t.lexer.pop_state() t.type = 'DOUBLEQUOTE_END' return t # state: comment # ------------------------------------------------------------------- t_comment_ignore_COMMENT = r'[^\n]+' def t_INITIAL_flowsequence_flowmap_begin_comment(self, t): r'\s*[\#\%]\ ?' t.lexer.push_state('comment') # t.lexer.begin('comment') def t_comment_end(self, t): r'(?=\n)' # t.lexer.begin('INITIAL') t.lexer.pop_state() # state: singlequote # ------------------------------------------------------------------- t_singlequote_SCALAR = r"(?:\\'|[^']|'')+" def t_begin_singlequote(self, t): r"(?<!\\)'" t.lexer.push_state('singlequote') # t.lexer.begin('singlequote') t.type = 'CAST_TYPE' t.type = 'SINGLEQUOTE_START' return t def t_singlequote_end(self, t): r"(?<!\\)'" t.lexer.pop_state() t.type = 'SINGLEQUOTE_END' return t # state: literal # ------------------------------------------------------------------- t_literal_SCALAR = r'.+' def t_begin_literal(self, t): r'\ *(?<!\\)\|\ ?\n' t.lexer.push_state('literal') t.type = 'B_LITERAL_START' return t def t_literal_end(self, t): r'\n+\ *' column = find_column(t) indent = self.indent_stack[-1] if column < indent: rollback_lexpos(t) if column <= indent: t.lexer.pop_state() t.type = 'B_LITERAL_END' if column > indent: t.type = 'SCALAR' return t # state: fold # ------------------------------------------------------------------- t_fold_SCALAR = r'.+' def t_begin_fold(self, t): r'\ *(?<!\\)\>\ ?\n' t.lexer.push_state('fold') t.type = 'B_FOLD_START' return t def t_fold_end(self, t): r'\n+\ *' column = find_column(t) indent = self.indent_stack[-1] if column < indent: rollback_lexpos(t) if column <= indent: t.lexer.pop_state() t.type = 'B_FOLD_END' if column > indent: t.type = 'SCALAR' return t # state: flowsequence and flowmap # ------------------------------------------------------------------- def t_flowsequence_flowmap_F_SEP(self, t): r',' return t def t_flowsequence_flowmap_ignore_space(self, t): r'\s+' # state: flowsequence # ------------------------------------------------------------------- t_flowsequence_SCALAR = r'[^\[\],\#]+' def t_begin_flowsequence(self, t): r'\[' t.lexer.push_state('flowsequence') t.type = 'F_SEQUENCE_START' return t def t_flowsequence_end(self, t): r'\]' t.lexer.pop_state() t.type = 'F_SEQUENCE_END' return t # state: flowmap # ------------------------------------------------------------------- t_flowmap_SCALAR = r'[^\{\}\:,\#]+' def t_flowmap_F_MAP_KEY(self, t): r'\:\ ?' return t def t_begin_flowmap(self, t): r'\{' t.lexer.push_state('flowmap') t.type = 'F_MAP_START' return t def t_flowmap_end(self, t): r'\}' t.lexer.pop_state() t.type = 'F_MAP_END' return t # state: INITIAL # ------------------------------------------------------------------- t_ignore_EOL = r'\s*\n' def t_DOC_START(self, t): r'\-\-\-' return t def t_DOC_END(self, t): r'\.\.\.' return t def t_B_SEQUENCE_COMPACT_START(self, t): r""" \-\ + (?= -\ ) # ^ ^ sequence indicator | \-\ + (?= [\{\[]\ | [^:\n]*:\s ) # ^ ^ ^^^ map indicator # ^ ^ flow indicator """ indent_status, curr_depth, next_depth = self.get_indent_status(t) if indent_status == 'INDENT': self.indent_stack.append(next_depth) return t msg = dedent(""" expected 'INDENT', got {indent_status!r} current_depth: {curr_depth} next_depth: {next_depth} token: {t} """).format(**vars()) raise YAMLUnknownSyntaxError(msg) def t_B_SEQUENCE_START(self, t): r'-\ +|-(?=\n)' return t def t_B_MAP_COMPACT_KEY(self, t): r""" \?\ + (?= -\ ) # ^ ^ sequence indicator | \?\ + (?= [\{\[]\ | [^:\n]*:\s ) # ^ ^ ^^^ map indicator # ^ ^ flow indicator """ indent_status, curr_depth, next_depth = self.get_indent_status(t) if indent_status == 'INDENT': self.indent_stack.append(next_depth) return t msg = dedent(""" expected 'INDENT', got {indent_status!r} current_depth: {curr_depth} next_depth: {next_depth} token: {t} """).format(**vars()) raise YAMLUnknownSyntaxError(msg) def t_B_MAP_COMPACT_VALUE(self, t): r""" \:\ + (?= -\ ) # ^ ^ sequence indicator | \:\ + (?= [\{\[]\ | [^:\n]*:\s ) # ^ ^ ^^^ map indicator # ^ ^ flow indicator """ indent_status, curr_depth, next_depth = self.get_indent_status(t) if indent_status == 'INDENT': self.indent_stack.append(next_depth) return t msg = dedent(""" expected 'INDENT', got {indent_status!r} current_depth: {curr_depth} next_depth: {next_depth} token: {t} """).format(**vars()) raise YAMLUnknownSyntaxError(msg) def t_B_MAP_KEY(self, t): r'\?\ +|\?(?=\n)' return t def t_B_MAP_VALUE(self, t): r':\ +|:(?=\n)' return t def t_ignore_unused_indicators(self, t): r'\ *[\@\`].*(?=\n)' def t_SCALAR(self, t): r'(?:\\.|[^\n\#\:\-\|\>]|[\:\-\|\>]\S)+' return t