Source code for quickly.lang.html

# -*- coding: utf-8 -*-
#
# This file is part of `quickly`, a library for LilyPond and the `.ly` format
#
# Copyright © 2019-2020 by Wilbert Berendsen <info@wilbertberendsen.nl>
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.


"""
Html language and transformation definition (for lilypond-book).
"""

import re

import parce.action as a
from parce import lexicon
from parce.rule import (
    ARG, MATCH, TEXT, bygroup, dselect, findmember, ifeq, pattern)
from parce.lang import html, lilypond_words
from parce.util import Dispatcher
from parce.transform import add_untransformed

from quickly.dom import base, element, htm
from . import lilypond


[docs]class Html(base.XmlLike, html.Html):
    """Html language definition, with support for the lilypond-book tags.

    Those are: ``<lilypond ... />`` for short code, ``<lilypond> ...
    </lilypond>`` for longer code, ``<lilypondfile>filename</lilypondfile>``
    for a LilyPond file, and ``<musicxmlfile>filename</musicxmlfile>`` for a
    MusicXml file.

    These tags also support the attribute notation that's outlined in the
    `LilyPond documentation <https://lilypond.org/doc/latest/Documentation/usage/html>`_.

    """
    @lexicon(re_flags=re.IGNORECASE)
    def root(cls):
        yield r'(<)(lilypond(?:file)?|musicxmlfile)\b(>|/\s*>)?', bygroup(a.Delimiter, a.Name.Tag, a.Delimiter), \
            dselect(MATCH[2], {
                "lilypond": dselect(MATCH[3],
                    {'>': lilypond.LilyPond.html_lilypond_tag, None: cls.lilypond_book_options('lilypond')}),
                "lilypondfile": dselect(MATCH[3],
                    {'>': cls.tag, None: cls.lilypond_book_options("lilypondfile")}),
                "musicxmlfile": dselect(MATCH[3],
                    {'>': cls.tag, None: cls.lilypond_book_options("musicxmlfile")}),
            })  # by default a close tag, stay in the context.
        yield from super().root

    @lexicon
    def lilypond_book_options(cls):
        """Options within the attribute space of a lilypond book tag."""
        yield r'>', a.Delimiter, -1, ifeq(ARG, "lilypond", lilypond.LilyPond.html_lilypond_tag, cls.tag)
        yield pattern(ifeq(ARG, "lilypond", ":", None)), a.Delimiter, -1, lilypond.LilyPond.html_lilypond_tag("short form")
        yield r'\d+(?:\.\d+)?', a.Number
        yield r'[^\W\d]\w*(?:-\w+)*', findmember(TEXT, (
            (lilypond_words.lilypond_book_options, a.Name.Attribute),
            (lilypond_words.lilypond_book_units, a.Name.Builtin.Unit)), a.Name)
        yield r'=', a.Operator
        yield from cls.find_strings()
        yield r'/\s*>', a.Delimiter, -1   # self-closing tag, no LilyPond input here


[docs]class HtmlTransform(base.Transform):
    """Transform Html (for lilypond-book) to :mod:`quickly.dom.htm` elements.

    Note that this transform currently ignores the following lexicons:
    ``css_style_attribute``, ``css_style_tag``, ``doctype``, ``internal_dtd``
    and ``script_tag``.

    This means the constructed :class:`htm.Document <quickly.dom.htm.Document>`
    cannot write back the full document. You should only rely on the
    lilypond-tags and their content.

    The alternative would be to disable css and javascript in the inherited
    parce language definition, but then we loose the nice highlighting of CSS
    and JS parts.

    """

    ## helper methods and factory
[docs]    def factory(self, element_class, head_origin, tail_origin=(), *children):
        """Create a node, keeping its origin.

        The ``head_origin`` and optionally ``tail_origin`` is an iterable of
        Token instances. All items should be created using this method, so that
        it can be overridden for the case you don't want to remember the
        origin.

        """
        return element_class.with_origin(tuple(head_origin), tuple(tail_origin), *children)

    ## unimplemented transform contexts
    css_style_attribute = None
    css_style_tag = None
    doctype = None
    internal_dtd = None
    script_tag = None

    ## transform methods
[docs]    @add_untransformed
    def root(self, items):
        """Process the ``root`` context."""
        return htm.Document(*self.tag(items))

[docs]    @add_untransformed
    def attrs(self, items):
        """Process the ``attrs`` context.

        Returns a list of :class:`htm.Attribute` elements and a ``tail_origin``
        tuple.

        """
        return self.lilypond_book_options(items)

[docs]    def cdata(self, items):
        """Process the ``cdata`` context."""
        return self.factory(htm.CData, items)

[docs]    def comment(self, items):
        """Process the ``comment`` context."""
        return self.factory(htm.Comment, items)

[docs]    def dqstring(self, items):
        """Process the ``dqstring`` context."""
        head_origin = items[0],
        tail_origin = (items.pop(),) if items[-1] == '"' else ()
        children = (self._action(t.action, t) for t in items[1:])
        return self.factory(htm.DqString, head_origin, tail_origin, *children)

[docs]    def sqstring(self, items):
        """Process the ``sqstring`` context."""
        head_origin = items[0],
        tail_origin = (items.pop(),) if items[-1] == "'" else ()
        children = (self._action(t.action, t) for t in items[1:])
        return self.factory(htm.SqString, head_origin, tail_origin, *children)

[docs]    def processing_instruction(self, items):
        """Process the ``processing_instruction`` context."""
        head_origin = items[0],
        tail_origin = (items.pop(),) if items[-1] == '?>' else ()
        # combine multiple text tokens into Text elements, and keep [SD]qString
        # and EntityRef elements
        def children():
            origin = []
            for i in items[1:]:
                obj = self._action(i.action, i) if i.is_token else i.obj
                if obj:
                    if origin:
                        yield self.factory(htm.Text, origin)
                        origin.clear()
                    yield obj
                else:
                    origin.append(i)
            if origin:
                yield self.factory(htm.Text, origin)
        return self.factory(htm.ProcessingInstruction, head_origin, tail_origin, *children())

[docs]    @add_untransformed
    def tag(self, items):
        """Process the ``tag`` context.

        Returns a list of nodes representing the contents.

        """
        nodes = []
        z = len(items)
        i = 0
        while i < z:
            if items[i].is_token:
                if items[i].action in (a.Text, a.Whitespace):
                    nodes.append(self.factory(htm.Text, (items[i],)))
                elif items[i].action is a.Escape:
                    nodes.append(self.factory(htm.EntityRef, (items[i],)))
                elif items[i].action is a.Delimiter:
                    if items.peek(i+1, a.Name.Tag, "attrs", "<untransformed>") or \
                       items.peek(i+1, a.Name.Tag, a.Delimiter, "<untransformed>") or \
                       items.peek(i+1, a.Keyword, a.Name.Tag.Definition, "<untransformed>"):
                        # untransformed css style, script tag or doctype declaration
                        nodes.append(self.factory(base.Unknown, (items[i], items[i+3].obj.last_token())))
                        i += 1
                    elif i < z - 2:
                        head_origin = items[i:i+1]
                        tagname = self.factory(htm.TagName, items[i+1:i+2])
                        if '/' in items[i].text: # and z - i < 2: (will always be the case)
                            # closing tag, will also be the end of this context
                            tail_origin = items[i+2:i+3]
                            nodes.append(self.factory(htm.CloseTag, head_origin, tail_origin, tagname))
                        else:
                            cls = None
                            attrs = ()
                            if items[i+2].is_token:
                                if items[i+2].group == -2:
                                    tail_origin = items[i+2:i+3]
                                    if '/' in items[i+2].text:
                                        # self closing tag without attributes
                                        cls = htm.SingleTag
                                    else:
                                        # opening tag without attributes, new tag ctxt will follow
                                        cls = htm.OpenTag
                            elif items[i+2].name == 'attrs':
                                attrs, tail_origin = items[i+2].obj
                                # opening or self-closing tag with attributes that follow
                                cls = htm.SingleTag if tail_origin and '/' in tail_origin[0].text else htm.OpenTag
                            elif items[i+2].name == 'lilypond_book_options':
                                # short form of LilyPond input within (self-closing) lilypond tag?
                                attrs, tail_origin = items[i+2].obj
                                if tail_origin and tail_origin[0] == ':' and items.peek(i + 3, "html_lilypond_tag"):
                                    # yes, add the music to the attrs
                                    attrs = list(attrs)
                                    attrs.append(self.factory(htm.Colon, tail_origin))
                                    objs, tail_origin = items[i+3].obj
                                    attrs.extend(objs)
                                    cls = htm.SingleTag
                                    i += 1
                                else:
                                    # no, just handle the attrs
                                    cls = htm.SingleTag if tail_origin and '/' in tail_origin[0].text else htm.OpenTag
                            if cls:
                                nodes.append(htm.Element(self.factory(cls, head_origin, tail_origin, tagname, *attrs)))
                    i += 2
            elif items[i].name == "tag":
                nodes[-1].extend(items[i].obj) # add contents and CloseTag
            elif items[i].name == "html_lilypond_tag":
                nodes[-1].extend(items[i].obj[0]) # add contents and CloseTag
            i += 1
        return nodes

[docs]    @add_untransformed
    def lilypond_book_options(self, items):
        """Process the ``lilypond_book_options`` context.

        Returns a list of Attribute elements and a tuple with the ending
        delimiter (``:`` or ``>`` or ``/>``).

        """
        attrs = []
        tail_origin = ()
        z = len(items)
        i = 0
        while i < z:
            t = items[i]
            if t.is_token:
                if t.action is a.Name.Attribute:
                    attrs.append(htm.Attribute(self.factory(htm.AttrName, (t,))))
                elif t == '=':
                    if attrs and len(attrs[-1]) == 1:
                        attrs[-1].append(self.factory(htm.EqualSign, (t,)))
                elif t.action is a.Delimiter:
                    tail_origin = (t,)
                    break
                elif t == '"' and items.peek(i, a.String, "<untransformed>"):
                    # this happens with a css style attribute; create Unknown
                    if attrs and len(attrs[-1]) > 1:
                        attrs[-1].append(self.factory(base.Unknown, (t, items[i+1].obj.last_token())))
                    i += 1
                # only appear in lilypond_book_options
                elif t.action is a.Number:
                    if attrs and len(attrs[-1]) > 1:
                        attrs[-1].append(self.factory(htm.Number, (t,)))
                elif t.action is a.Name.Builtin.Unit:
                    if attrs and len(attrs[-1]) > 2:
                        attrs[-1].append(self.factory(htm.Unit, (t,)))
                elif t.action is a.Name:
                    # unknown LilyPond book attribute...
                    attrs.append(htm.Attribute(self.factory(htm.AttrName, (t,))))
            elif attrs and len(attrs[-1]) == 2:
                attrs[-1].append(t.obj) # a string value
            i += 1
        return attrs, tail_origin

    _action = Dispatcher()

[docs]    @_action(a.Escape)
    @_action(a.String.Escape)
    def entityref_action(self, token):
        return self.factory(htm.EntityRef, (token,))

[docs]    @_action(a.String.Single)
    @_action(a.String.Double)
    def string_action(self, token):
        return self.factory(htm.Text, (token,))


[docs]class HtmlAdHocTransform(base.AdHocTransform, HtmlTransform):
    """Html Transform that does not keep the originating tokens."""
    pass