| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189 |
- # markdown/searializers.py
- #
- # Add x/html serialization to Elementree
- # Taken from ElementTree 1.3 preview with slight modifications
- #
- # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
- #
- # fredrik@pythonware.com
- # https://www.pythonware.com/
- #
- # --------------------------------------------------------------------
- # The ElementTree toolkit is
- #
- # Copyright (c) 1999-2007 by Fredrik Lundh
- #
- # By obtaining, using, and/or copying this software and/or its
- # associated documentation, you agree that you have read, understood,
- # and will comply with the following terms and conditions:
- #
- # Permission to use, copy, modify, and distribute this software and
- # its associated documentation for any purpose and without fee is
- # hereby granted, provided that the above copyright notice appears in
- # all copies, and that both that copyright notice and this permission
- # notice appear in supporting documentation, and that the name of
- # Secret Labs AB or the author not be used in advertising or publicity
- # pertaining to distribution of the software without specific, written
- # prior permission.
- #
- # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
- # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
- # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
- # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- # OF THIS SOFTWARE.
- # --------------------------------------------------------------------
- from xml.etree.ElementTree import ProcessingInstruction
- from xml.etree.ElementTree import Comment, ElementTree, QName
- import re
- __all__ = ['to_html_string', 'to_xhtml_string']
- HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
- "img", "input", "isindex", "link", "meta", "param")
- RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I)
- try:
- HTML_EMPTY = set(HTML_EMPTY)
- except NameError: # pragma: no cover
- pass
- def _raise_serialization_error(text): # pragma: no cover
- raise TypeError(
- "cannot serialize {!r} (type {})".format(text, type(text).__name__)
- )
- def _escape_cdata(text):
- # escape character data
- try:
- # it's worth avoiding do-nothing calls for strings that are
- # shorter than 500 character, or so. assume that's, by far,
- # the most common case in most applications.
- if "&" in text:
- # Only replace & when not part of an entity
- text = RE_AMP.sub('&', text)
- if "<" in text:
- text = text.replace("<", "<")
- if ">" in text:
- text = text.replace(">", ">")
- return text
- except (TypeError, AttributeError): # pragma: no cover
- _raise_serialization_error(text)
- def _escape_attrib(text):
- # escape attribute value
- try:
- if "&" in text:
- # Only replace & when not part of an entity
- text = RE_AMP.sub('&', text)
- if "<" in text:
- text = text.replace("<", "<")
- if ">" in text:
- text = text.replace(">", ">")
- if "\"" in text:
- text = text.replace("\"", """)
- if "\n" in text:
- text = text.replace("\n", " ")
- return text
- except (TypeError, AttributeError): # pragma: no cover
- _raise_serialization_error(text)
- def _escape_attrib_html(text):
- # escape attribute value
- try:
- if "&" in text:
- # Only replace & when not part of an entity
- text = RE_AMP.sub('&', text)
- if "<" in text:
- text = text.replace("<", "<")
- if ">" in text:
- text = text.replace(">", ">")
- if "\"" in text:
- text = text.replace("\"", """)
- return text
- except (TypeError, AttributeError): # pragma: no cover
- _raise_serialization_error(text)
- def _serialize_html(write, elem, format):
- tag = elem.tag
- text = elem.text
- if tag is Comment:
- write("<!--%s-->" % _escape_cdata(text))
- elif tag is ProcessingInstruction:
- write("<?%s?>" % _escape_cdata(text))
- elif tag is None:
- if text:
- write(_escape_cdata(text))
- for e in elem:
- _serialize_html(write, e, format)
- else:
- namespace_uri = None
- if isinstance(tag, QName):
- # QNAME objects store their data as a string: `{uri}tag`
- if tag.text[:1] == "{":
- namespace_uri, tag = tag.text[1:].split("}", 1)
- else:
- raise ValueError('QName objects must define a tag.')
- write("<" + tag)
- items = elem.items()
- if items:
- items = sorted(items) # lexical order
- for k, v in items:
- if isinstance(k, QName):
- # Assume a text only QName
- k = k.text
- if isinstance(v, QName):
- # Assume a text only QName
- v = v.text
- else:
- v = _escape_attrib_html(v)
- if k == v and format == 'html':
- # handle boolean attributes
- write(" %s" % v)
- else:
- write(' {}="{}"'.format(k, v))
- if namespace_uri:
- write(' xmlns="%s"' % (_escape_attrib(namespace_uri)))
- if format == "xhtml" and tag.lower() in HTML_EMPTY:
- write(" />")
- else:
- write(">")
- if text:
- if tag.lower() in ["script", "style"]:
- write(text)
- else:
- write(_escape_cdata(text))
- for e in elem:
- _serialize_html(write, e, format)
- if tag.lower() not in HTML_EMPTY:
- write("</" + tag + ">")
- if elem.tail:
- write(_escape_cdata(elem.tail))
- def _write_html(root, format="html"):
- assert root is not None
- data = []
- write = data.append
- _serialize_html(write, root, format)
- return "".join(data)
- # --------------------------------------------------------------------
- # public functions
- def to_html_string(element):
- return _write_html(ElementTree(element).getroot(), format="html")
- def to_xhtml_string(element):
- return _write_html(ElementTree(element).getroot(), format="xhtml")
|