From b4d036ed6b3119b3df614dbd8dff88ac3e1de91a Mon Sep 17 00:00:00 2001 From: croneter Date: Fri, 29 Mar 2019 17:45:19 +0100 Subject: [PATCH] Monkey patch xml.etree.ElementTree to always return unicode --- resources/lib/defused_etree.py | 39 ++++++++++++++++++++++++++++++++++ resources/lib/utils.py | 3 ++- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 resources/lib/defused_etree.py diff --git a/resources/lib/defused_etree.py b/resources/lib/defused_etree.py new file mode 100644 index 00000000..503324f1 --- /dev/null +++ b/resources/lib/defused_etree.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +xml.etree.ElementTree tries to encode with text.encode('ascii') - which is +just plain BS. This etree will always return unicode, not string +""" +from __future__ import absolute_import, division, unicode_literals +# Originally tried faster cElementTree, but does NOT work reliably with Kodi +from defusedxml.ElementTree import DefusedXMLParser, _generate_etree_functions + +from xml.etree.ElementTree import TreeBuilder as _TreeBuilder +from xml.etree.ElementTree import parse as _parse +from xml.etree.ElementTree import iterparse as _iterparse +from xml.etree.ElementTree import tostring + + +class UnicodeXMLParser(DefusedXMLParser): + """ + PKC Hack to ensure we're always receiving unicode, not str + """ + @staticmethod + def _fixtext(text): + """ + Do NOT try to convert every entry to str with entry.encode('ascii')! + """ + return text + + +# aliases +XMLTreeBuilder = XMLParse = UnicodeXMLParser + +parse, iterparse, fromstring = _generate_etree_functions(UnicodeXMLParser, + _TreeBuilder, _parse, + _iterparse) +XML = fromstring + + +__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse', + 'parse', 'tostring'] diff --git a/resources/lib/utils.py b/resources/lib/utils.py index 06c09229..cc4bbf59 100644 --- a/resources/lib/utils.py +++ b/resources/lib/utils.py @@ -12,7 +12,8 @@ from threading import Lock import urllib # Originally tried faster cElementTree, but does NOT work reliably with Kodi import xml.etree.ElementTree as etree -import defusedxml.ElementTree as defused_etree # etree parse unsafe +# etree parse unsafe; make sure we're always receiving unicode +from . import defused_etree from xml.etree.ElementTree import ParseError from functools import wraps import hashlib