Source code for quickly.lang.html
# -*- coding: utf-8 -*-
#
# This file is part of `quickly`, a library for LilyPond and the `.ly` format
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
Html language and transformation definition (for lilypond-book).
"""
import re
import parce.action as a
from parce import lexicon
from parce.rule import (
ARG, MATCH, TEXT, bygroup, dselect, findmember, ifeq, pattern)
from parce.lang import html, lilypond_words
from parce.util import Dispatcher
from parce.transform import add_untransformed
from quickly.dom import base, element, htm
from . import lilypond
[docs]class Html(base.XmlLike, html.Html):
"""Html language definition, with support for the lilypond-book tags.
Those are: ``<lilypond ... />`` for short code, ``<lilypond> ...
</lilypond>`` for longer code, ``<lilypondfile>filename</lilypondfile>``
for a LilyPond file, and ``<musicxmlfile>filename</musicxmlfile>`` for a
MusicXml file.
These tags also support the attribute notation that's outlined in the
`LilyPond documentation <https://lilypond.org/doc/latest/Documentation/usage/html>`_.
"""
@lexicon(re_flags=re.IGNORECASE)
def root(cls):
yield r'(<)(lilypond(?:file)?|musicxmlfile)\b(>|/\s*>)?', bygroup(a.Delimiter, a.Name.Tag, a.Delimiter), \
dselect(MATCH[2], {
"lilypond": dselect(MATCH[3],
{'>': lilypond.LilyPond.html_lilypond_tag, None: cls.lilypond_book_options('lilypond')}),
"lilypondfile": dselect(MATCH[3],
{'>': cls.tag, None: cls.lilypond_book_options("lilypondfile")}),
"musicxmlfile": dselect(MATCH[3],
{'>': cls.tag, None: cls.lilypond_book_options("musicxmlfile")}),
}) # by default a close tag, stay in the context.
yield from super().root
@lexicon
def lilypond_book_options(cls):
"""Options within the attribute space of a lilypond book tag."""
yield r'>', a.Delimiter, -1, ifeq(ARG, "lilypond", lilypond.LilyPond.html_lilypond_tag, cls.tag)
yield pattern(ifeq(ARG, "lilypond", ":", None)), a.Delimiter, -1, lilypond.LilyPond.html_lilypond_tag("short form")
yield r'\d+(?:\.\d+)?', a.Number
yield r'[^\W\d]\w*(?:-\w+)*', findmember(TEXT, (
(lilypond_words.lilypond_book_options, a.Name.Attribute),
(lilypond_words.lilypond_book_units, a.Name.Builtin.Unit)), a.Name)
yield r'=', a.Operator
yield from cls.find_strings()
yield r'/\s*>', a.Delimiter, -1 # self-closing tag, no LilyPond input here
[docs]class HtmlTransform(base.Transform):
"""Transform Html (for lilypond-book) to :mod:`quickly.dom.htm` elements.
Note that this transform currently ignores the following lexicons:
``css_style_attribute``, ``css_style_tag``, ``doctype``, ``internal_dtd``
and ``script_tag``.
This means the constructed :class:`htm.Document <quickly.dom.htm.Document>`
cannot write back the full document. You should only rely on the
lilypond-tags and their content.
The alternative would be to disable css and javascript in the inherited
parce language definition, but then we loose the nice highlighting of CSS
and JS parts.
"""
## helper methods and factory
[docs] def factory(self, element_class, head_origin, tail_origin=(), *children):
"""Create a node, keeping its origin.
The ``head_origin`` and optionally ``tail_origin`` is an iterable of
Token instances. All items should be created using this method, so that
it can be overridden for the case you don't want to remember the
origin.
"""
return element_class.with_origin(tuple(head_origin), tuple(tail_origin), *children)
## unimplemented transform contexts
css_style_attribute = None
css_style_tag = None
doctype = None
internal_dtd = None
script_tag = None
## transform methods
[docs] @add_untransformed
def root(self, items):
"""Process the ``root`` context."""
return htm.Document(*self.tag(items))
[docs] @add_untransformed
def attrs(self, items):
"""Process the ``attrs`` context.
Returns a list of :class:`htm.Attribute` elements and a ``tail_origin``
tuple.
"""
return self.lilypond_book_options(items)
[docs] def cdata(self, items):
"""Process the ``cdata`` context."""
return self.factory(htm.CData, items)
[docs] def comment(self, items):
"""Process the ``comment`` context."""
return self.factory(htm.Comment, items)
[docs] def dqstring(self, items):
"""Process the ``dqstring`` context."""
head_origin = items[0],
tail_origin = (items.pop(),) if items[-1] == '"' else ()
children = (self._action(t.action, t) for t in items[1:])
return self.factory(htm.DqString, head_origin, tail_origin, *children)
[docs] def sqstring(self, items):
"""Process the ``sqstring`` context."""
head_origin = items[0],
tail_origin = (items.pop(),) if items[-1] == "'" else ()
children = (self._action(t.action, t) for t in items[1:])
return self.factory(htm.SqString, head_origin, tail_origin, *children)
[docs] def processing_instruction(self, items):
"""Process the ``processing_instruction`` context."""
head_origin = items[0],
tail_origin = (items.pop(),) if items[-1] == '?>' else ()
# combine multiple text tokens into Text elements, and keep [SD]qString
# and EntityRef elements
def children():
origin = []
for i in items[1:]:
obj = self._action(i.action, i) if i.is_token else i.obj
if obj:
if origin:
yield self.factory(htm.Text, origin)
origin.clear()
yield obj
else:
origin.append(i)
if origin:
yield self.factory(htm.Text, origin)
return self.factory(htm.ProcessingInstruction, head_origin, tail_origin, *children())
[docs] @add_untransformed
def tag(self, items):
"""Process the ``tag`` context.
Returns a list of nodes representing the contents.
"""
nodes = []
z = len(items)
i = 0
while i < z:
if items[i].is_token:
if items[i].action in (a.Text, a.Whitespace):
nodes.append(self.factory(htm.Text, (items[i],)))
elif items[i].action is a.Escape:
nodes.append(self.factory(htm.EntityRef, (items[i],)))
elif items[i].action is a.Delimiter:
if items.peek(i+1, a.Name.Tag, "attrs", "<untransformed>") or \
items.peek(i+1, a.Name.Tag, a.Delimiter, "<untransformed>") or \
items.peek(i+1, a.Keyword, a.Name.Tag.Definition, "<untransformed>"):
# untransformed css style, script tag or doctype declaration
nodes.append(self.factory(base.Unknown, (items[i], items[i+3].obj.last_token())))
i += 1
elif i < z - 2:
head_origin = items[i:i+1]
tagname = self.factory(htm.TagName, items[i+1:i+2])
if '/' in items[i].text: # and z - i < 2: (will always be the case)
# closing tag, will also be the end of this context
tail_origin = items[i+2:i+3]
nodes.append(self.factory(htm.CloseTag, head_origin, tail_origin, tagname))
else:
cls = None
attrs = ()
if items[i+2].is_token:
if items[i+2].group == -2:
tail_origin = items[i+2:i+3]
if '/' in items[i+2].text:
# self closing tag without attributes
cls = htm.SingleTag
else:
# opening tag without attributes, new tag ctxt will follow
cls = htm.OpenTag
elif items[i+2].name == 'attrs':
attrs, tail_origin = items[i+2].obj
# opening or self-closing tag with attributes that follow
cls = htm.SingleTag if tail_origin and '/' in tail_origin[0].text else htm.OpenTag
elif items[i+2].name == 'lilypond_book_options':
# short form of LilyPond input within (self-closing) lilypond tag?
attrs, tail_origin = items[i+2].obj
if tail_origin and tail_origin[0] == ':' and items.peek(i + 3, "html_lilypond_tag"):
# yes, add the music to the attrs
attrs = list(attrs)
attrs.append(self.factory(htm.Colon, tail_origin))
objs, tail_origin = items[i+3].obj
attrs.extend(objs)
cls = htm.SingleTag
i += 1
else:
# no, just handle the attrs
cls = htm.SingleTag if tail_origin and '/' in tail_origin[0].text else htm.OpenTag
if cls:
nodes.append(htm.Element(self.factory(cls, head_origin, tail_origin, tagname, *attrs)))
i += 2
elif items[i].name == "tag":
nodes[-1].extend(items[i].obj) # add contents and CloseTag
elif items[i].name == "html_lilypond_tag":
nodes[-1].extend(items[i].obj[0]) # add contents and CloseTag
i += 1
return nodes
[docs] @add_untransformed
def lilypond_book_options(self, items):
"""Process the ``lilypond_book_options`` context.
Returns a list of Attribute elements and a tuple with the ending
delimiter (``:`` or ``>`` or ``/>``).
"""
attrs = []
tail_origin = ()
z = len(items)
i = 0
while i < z:
t = items[i]
if t.is_token:
if t.action is a.Name.Attribute:
attrs.append(htm.Attribute(self.factory(htm.AttrName, (t,))))
elif t == '=':
if attrs and len(attrs[-1]) == 1:
attrs[-1].append(self.factory(htm.EqualSign, (t,)))
elif t.action is a.Delimiter:
tail_origin = (t,)
break
elif t == '"' and items.peek(i, a.String, "<untransformed>"):
# this happens with a css style attribute; create Unknown
if attrs and len(attrs[-1]) > 1:
attrs[-1].append(self.factory(base.Unknown, (t, items[i+1].obj.last_token())))
i += 1
# only appear in lilypond_book_options
elif t.action is a.Number:
if attrs and len(attrs[-1]) > 1:
attrs[-1].append(self.factory(htm.Number, (t,)))
elif t.action is a.Name.Builtin.Unit:
if attrs and len(attrs[-1]) > 2:
attrs[-1].append(self.factory(htm.Unit, (t,)))
elif t.action is a.Name:
# unknown LilyPond book attribute...
attrs.append(htm.Attribute(self.factory(htm.AttrName, (t,))))
elif attrs and len(attrs[-1]) == 2:
attrs[-1].append(t.obj) # a string value
i += 1
return attrs, tail_origin
_action = Dispatcher()
[docs] @_action(a.Escape)
@_action(a.String.Escape)
def entityref_action(self, token):
return self.factory(htm.EntityRef, (token,))
[docs] @_action(a.String.Single)
@_action(a.String.Double)
def string_action(self, token):
return self.factory(htm.Text, (token,))
[docs]class HtmlAdHocTransform(base.AdHocTransform, HtmlTransform):
"""Html Transform that does not keep the originating tokens."""
pass