Meeting20100904: genshi-py3k.diff

File genshi-py3k.diff, 129.2 KB (added by hodgestar, 14 years ago)

Patch making Genshi py3k compatible (against trunk r1135)

  • new file .hgignore

    diff -r 4bbd2b021cb5 .hgignore
    - +  
     1.*\.pyc
     2.*.egg-info/
     3.*~
     4build/
  • MANIFEST.in

    diff -r 4bbd2b021cb5 MANIFEST.in
    a b  
    22recursive-exclude doc/logo.lineform *
    33include doc/api/*.*
    44include doc/*.html
     5recursive-include genshi/template/tests/templates *.html *.txt
  • README.txt

    diff -r 4bbd2b021cb5 README.txt
    a b  
    1010directory, and visit the Genshi web site:
    1111
    1212  <http://genshi.edgewall.org/>
     13
     14About this repository
     15=====================
     16
     17This is a work area for porting Genshi to Python 3.
  • doc/common/doctools.py

    diff -r 4bbd2b021cb5 doc/common/doctools.py
    a b  
    6262            code_block.content = 1
    6363            rst.directives.register_directive('code-block', code_block)
    6464        except ImportError:
    65             print 'Pygments not installed, syntax highlighting disabled'
     65            print('Pygments not installed, syntax highlighting disabled')
    6666
    6767        loader = TemplateLoader(['doc', 'doc/common'], variable_lookup='strict')
    6868        for source in glob('doc/*.txt'):
    6969            dest = os.path.splitext(source)[0] + '.html'
    7070            if self.force or not os.path.exists(dest) or \
    7171                    os.path.getmtime(dest) < os.path.getmtime(source):
    72                 print 'building documentation file %s' % dest
     72                print('building documentation file %s' % dest)
    7373                publish_cmdline(writer_name='html',
    7474                                argv=['--config=%s' % docutils_conf, source,
    7575                                      dest])
     
    104104                sys.argv[1:] = old_argv
    105105
    106106            except ImportError:
    107                 print 'epydoc not installed, skipping API documentation.'
     107                print('epydoc not installed, skipping API documentation.')
    108108
    109109
    110110class test_doc(Command):
     
    119119
    120120    def run(self):
    121121        for filename in glob('doc/*.txt'):
    122             print 'testing documentation file %s' % filename
     122            print('testing documentation file %s' % filename)
    123123            doctest.testfile(filename, False, optionflags=doctest.ELLIPSIS)
  • new file examples_to_py3k.sh

    diff -r 4bbd2b021cb5 examples_to_py3k.sh
    - +  
     1#!/bin/sh
     2#
     3# Script to run 2to3 on files not covered by setup.py
     4#
     5export PYTHONIOENCODING=utf8
     6
     7# General 2to3 run
     82to3 -w --no-diffs examples/
  • new file fixes/fix_unicode_in_strings.py

    diff -r 4bbd2b021cb5 fixes/fix_unicode_in_strings.py
    - +  
     1"""Fixer that changes expressions inside strings literals from u"..." to "...".
     2
     3"""
     4
     5import re
     6from lib2to3 import fixer_base
     7
     8_literal_re = re.compile(r"(.+?)\b[uU]([rR]?[\'\"])")
     9
     10class FixUnicodeInStrings(fixer_base.BaseFix):
     11
     12    PATTERN = "STRING"
     13
     14    def transform(self, node, results):
     15        new = node.clone()
     16        new.value = _literal_re.sub(r"\1\2", new.value)
     17        return new
  • genshi/_speedups.c

    diff -r 4bbd2b021cb5 genshi/_speedups.c
    a b  
    1414#include <Python.h>
    1515#include <structmember.h>
    1616
    17 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
    18 typedef int Py_ssize_t;
    19 #define PY_SSIZE_T_MAX INT_MAX
    20 #define PY_SSIZE_T_MIN INT_MIN
     17#if PY_MAJOR_VERSION > 2
     18#   define IS_PY3K
     19#elif PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
     20    typedef int Py_ssize_t;
     21#   define PY_SSIZE_T_MAX INT_MAX
     22#   define PY_SSIZE_T_MIN INT_MIN
     23#endif
     24
     25/* We only use Unicode Strings in this module */
     26#ifndef IS_PY3K
     27#   define PyObject_Str PyObject_Unicode
    2128#endif
    2229
    2330static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2;
     
    7380        Py_DECREF(args);
    7481        return ret;
    7582    }
    76     in = (PyUnicodeObject *) PyObject_Unicode(text);
     83    in = (PyUnicodeObject *) PyObject_Str(text);
    7784    if (in == NULL) {
    7885        return NULL;
    7986    }
     
    390397    PyObject *unicode, *result, *args;
    391398
    392399    if (PyObject_TypeCheck(self, &MarkupType)) {
    393         unicode = PyObject_Unicode(self);
     400        unicode = PyObject_Str(self);
    394401        if (unicode == NULL) return NULL;
    395402        result = PyNumber_Multiply(unicode, num);
    396403    } else { // __rmul__
    397         unicode = PyObject_Unicode(num);
     404        unicode = PyObject_Str(num);
    398405        if (unicode == NULL) return NULL;
    399406        result = PyNumber_Multiply(unicode, self);
    400407    }
     
    418425{
    419426    PyObject *format, *result, *args;
    420427
     428#ifdef IS_PY3K
     429    format = PyUnicode_FromString("<Markup %r>");
     430#else
    421431    format = PyString_FromString("<Markup %r>");
     432#endif
    422433    if (format == NULL) return NULL;
    423     result = PyObject_Unicode(self);
     434    result = PyObject_Str(self);
    424435    if (result == NULL) {
    425436        Py_DECREF(format);
    426437        return NULL;
     
    432443        return NULL;
    433444    }
    434445    PyTuple_SET_ITEM(args, 0, result);
     446#ifdef IS_PY3K
     447    result = PyUnicode_Format(format, args);
     448#else
    435449    result = PyString_Format(format, args);
     450#endif
    436451    Py_DECREF(format);
    437452    Py_DECREF(args);
    438453    return result;
     
    553568    Markup_add, /*nb_add*/
    554569    0, /*nb_subtract*/
    555570    Markup_mul, /*nb_multiply*/
     571#ifndef IS_PY3K
    556572    0, /*nb_divide*/
     573#endif
    557574    Markup_mod, /*nb_remainder*/
    558575};
    559576
    560577PyTypeObject MarkupType = {
     578#ifdef IS_PY3K
     579    PyVarObject_HEAD_INIT(NULL, 0)
     580#else
    561581    PyObject_HEAD_INIT(NULL)
    562582    0,
     583#endif
    563584    "genshi._speedups.Markup",
    564585    sizeof(MarkupObject),
    565586    0,
     
    567588    0,          /*tp_print*/
    568589    0,          /*tp_getattr*/
    569590    0,          /*tp_setattr*/
     591#ifdef IS_PY3K
     592    0,          /*tp_reserved*/
     593#else
    570594    0,          /*tp_compare*/
     595#endif
    571596    Markup_repr, /*tp_repr*/
    572597    &Markup_as_number, /*tp_as_number*/
    573598    0,          /*tp_as_sequence*/
     
    580605    0,          /*tp_setattro*/
    581606    0,          /*tp_as_buffer*/
    582607
     608#ifdef IS_PY3K
     609    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_UNICODE_SUBCLASS, /*tp_flags*/
     610#elif defined(Py_TPFLAGS_UNICODE_SUBCLASS)
     611    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES | Py_TPFLAGS_UNICODE_SUBCLASS, /*tp_flags*/
     612#else
    583613    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
     614#endif
     615
    584616    Markup__doc__,/*tp_doc*/
    585617
    586618    0,          /*tp_traverse*/
     
    616648    0           /*tp_weaklist*/
    617649};
    618650
     651#ifdef IS_PY3K
     652struct PyModuleDef module_def = {
     653    PyModuleDef_HEAD_INIT, /*m_base*/
     654    "_speedups",           /*m_name*/
     655    NULL,                  /*m_doc*/
     656    -1,                    /*m_size*/
     657    NULL,                  /*m_methods*/
     658    NULL,                  /*m_reload*/
     659    NULL,                  /*m_traverse*/
     660    NULL,                  /*m_clear*/
     661    NULL                   /*m_free*/
     662};
     663
     664PyObject *
     665PyInit__speedups(void)
     666#else
    619667PyMODINIT_FUNC
    620668init_speedups(void)
     669#endif
    621670{
    622671    PyObject *module;
    623672
     
    626675    MarkupType.tp_base = &PyUnicode_Type;
    627676
    628677    if (PyType_Ready(&MarkupType) < 0)
     678#ifdef IS_PY3K
     679        return NULL;
     680#else
    629681        return;
     682#endif
    630683
    631684    init_constants();
    632685
     686#ifdef IS_PY3K
     687    module = PyModule_Create(&module_def);
     688#else
    633689    module = Py_InitModule("_speedups", NULL);
     690#endif
    634691    Py_INCREF(&MarkupType);
    635692    PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType);
     693
     694#ifdef IS_PY3K
     695    return module;
     696#endif
    636697}
  • new file genshi/compat.py

    diff -r 4bbd2b021cb5 genshi/compat.py
    - +  
     1# -*- coding: utf-8 -*-
     2#
     3# Copyright (C) 2006-2009 Edgewall Software
     4# All rights reserved.
     5#
     6# This software is licensed as described in the file COPYING, which
     7# you should have received as part of this distribution. The terms
     8# are also available at http://genshi.edgewall.org/wiki/License.
     9#
     10# This software consists of voluntary contributions made by many
     11# individuals. For the exact contribution history, see the revision
     12# history and logs, available at http://genshi.edgewall.org/log/.
     13
     14"""Various Python version compatibility classes and functions."""
     15
     16import sys
     17from types import CodeType
     18
     19
     20IS_PYTHON2 = (sys.version_info[0] == 2)
     21
     22
     23# This function should only be called in Python 2, and will fail in Python 3
     24
     25if IS_PYTHON2:
     26    def stringrepr(string):
     27        ascii = string.encode('ascii', 'backslashreplace')
     28        quoted = "'" +  ascii.replace("'", "\\'") + "'"
     29        if len(ascii) > len(string):
     30            return 'u' + quoted
     31        return quoted
     32else:
     33    def stringrepr(string):
     34        raise RuntimeError(
     35                'Python 2 compatibility function. Not usable in Python 3.')
     36
     37
     38# We need to differentiate between StringIO and BytesIO in places
     39
     40if IS_PYTHON2:
     41    from StringIO import StringIO
     42    try:
     43        from cStringIO import StringIO as BytesIO
     44    except ImportError:
     45        BytesIO = StringIO
     46else:
     47    from io import StringIO, BytesIO
     48
     49
     50# We want to test bytestring input to some stuff.
     51
     52if IS_PYTHON2:
     53    def wrapped_bytes(bstr):
     54        assert bstr.startswith('b')
     55        return bstr[1:]
     56else:
     57    def wrapped_bytes(bstr):
     58        assert bstr.startswith('b')
     59        return bstr
     60
     61
     62# We do some scary stuff with CodeType() in template/eval.py
     63
     64if IS_PYTHON2:
     65    def get_code_params(code):
     66        return (code.co_nlocals, code.co_stacksize, code.co_flags,
     67                code.co_code, code.co_consts, code.co_names, code.co_varnames,
     68                code.co_filename, code.co_name, code.co_firstlineno,
     69                code.co_lnotab, (), ())
     70
     71    def build_code_chunk(code, filename, name, lineno):
     72        return CodeType(0, code.co_nlocals, code.co_stacksize,
     73                        code.co_flags | 0x0040, code.co_code, code.co_consts,
     74                        code.co_names, code.co_varnames, filename, name,
     75                        lineno, code.co_lnotab, (), ())
     76else:
     77    def get_code_params(code):
     78        return (code.co_nlocals, code.co_kwonlyargcount, code.co_stacksize,
     79                code.co_flags, code.co_code, code.co_consts, code.co_names,
     80                code.co_varnames, code.co_filename, code.co_name,
     81                code.co_firstlineno, code.co_lnotab, (), ())
     82
     83    def build_code_chunk(code, filename, name, lineno):
     84        return CodeType(0, code.co_nlocals, code.co_kwonlyargcount,
     85                        code.co_stacksize, code.co_flags | 0x0040,
     86                        code.co_code, code.co_consts, code.co_names,
     87                        code.co_varnames, filename, name, lineno,
     88                        code.co_lnotab, (), ())
     89
     90# Compatibility fallback implementations for Python < 2.6
     91
     92try:
     93    next = next
     94except NameError:
     95    def next(iterator):
     96        return iterator.next()
     97
     98# Compatibility fallback implementations for Python < 2.5
     99
     100try:
     101    all = all
     102    any = any
     103except NameError:
     104    def any(S):
     105        for x in S:
     106            if x:
     107                return True
     108        return False
     109
     110    def all(S):
     111        for x in S:
     112            if not x:
     113                return False
     114        return True
     115
  • genshi/core.py

    diff -r 4bbd2b021cb5 genshi/core.py
    a b  
    1717    reduce # builtin in Python < 3
    1818except NameError:
    1919    from functools import reduce
     20import sys
    2021from itertools import chain
    2122import operator
    2223
     
    9293        Assume the following stream produced by the `HTML` function:
    9394       
    9495        >>> from genshi.input import HTML
    95         >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
     96        >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8')
    9697        >>> print(html)
    9798        <p onclick="alert('Whoa')">Hello, world!</p>
    9899       
     
    153154        """
    154155        return reduce(operator.or_, (self,) + filters)
    155156
    156     def render(self, method=None, encoding='utf-8', out=None, **kwargs):
     157    def render(self, method=None, encoding=None, out=None, **kwargs):
    157158        """Return a string representation of the stream.
    158159       
    159160        Any additional keyword arguments are passed to the serializer, and thus
     
    187188        XPath expression.
    188189       
    189190        >>> from genshi import HTML
    190         >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
     191        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8')
    191192        >>> print(stream.select('elem'))
    192193        <elem>foo</elem><elem>bar</elem>
    193194        >>> print(stream.select('elem/text()'))
     
    667668    def __hash__(self):
    668669        return hash(self.uri)
    669670
    670     def __repr__(self):
    671         return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
     671    if sys.version_info[0] == 2:
     672        # Only use stringrepr in python 2
     673        def __repr__(self):
     674            return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
     675    else:
     676        def __repr__(self):
     677            return '%s(%r)' % (type(self).__name__, self.uri)
    672678
    673679    def __str__(self):
    674680        return self.uri.encode('utf-8')
     
    728734    def __getnewargs__(self):
    729735        return (self.lstrip('{'),)
    730736
    731     def __repr__(self):
    732         return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
     737    if sys.version_info[0] == 2:
     738        # Only use stringrepr in python 2
     739        def __repr__(self):
     740            return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
     741    else:
     742        def __repr__(self):
     743            return '%s(%r)' % (type(self).__name__, self.lstrip('{'))
  • genshi/filters/html.py

    diff -r 4bbd2b021cb5 genshi/filters/html.py
    a b  
    3232    >>> from genshi.input import HTML
    3333    >>> html = HTML('''<form>
    3434    ...   <p><input type="text" name="foo" /></p>
    35     ... </form>''')
     35    ... </form>''', encoding='utf-8')
    3636    >>> filler = HTMLFormFiller(data={'foo': 'bar'})
    3737    >>> print(html | filler)
    3838    <form>
     
    199199    from the stream.
    200200   
    201201    >>> from genshi import HTML
    202     >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
     202    >>> html = HTML('<div><script>alert(document.cookie)</script></div>', encoding='utf-8')
    203203    >>> print(html | HTMLSanitizer())
    204204    <div/>
    205205   
     
    207207    is instantiated. For example, to allow inline ``style`` attributes, the
    208208    following instantation would work:
    209209   
    210     >>> html = HTML('<div style="background: #000"></div>')
     210    >>> html = HTML('<div style="background: #000"></div>', encoding='utf-8')
    211211    >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
    212212    >>> print(html | sanitizer)
    213213    <div style="background: #000"/>
     
    215215    Note that even in this case, the filter *does* attempt to remove dangerous
    216216    constructs from style attributes:
    217217
    218     >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
     218    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>', encoding='utf-8')
    219219    >>> print(html | sanitizer)
    220220    <div style="color: #000"/>
    221221   
  • genshi/filters/i18n.py

    diff -r 4bbd2b021cb5 genshi/filters/i18n.py
    a b  
    3333from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
    3434from genshi.template.directives import Directive, StripDirective
    3535from genshi.template.markup import MarkupTemplate, EXEC
     36from genshi.compat import IS_PYTHON2
    3637
    3738__all__ = ['Translator', 'extract']
    3839__docformat__ = 'restructuredtext en'
     
    288289    also need to pass a name for those parameters. Consider the following
    289290    examples:
    290291   
    291     >>> tmpl = MarkupTemplate('''\
    292         <html xmlns:i18n="http://genshi.edgewall.org/i18n">
     292    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
    293293    ...   <div i18n:choose="num; num">
    294294    ...     <p i18n:singular="">There is $num coin</p>
    295295    ...     <p i18n:plural="">There are $num coins</p>
     
    301301    [(2, 'ngettext', (u'There is %(num)s coin',
    302302                      u'There are %(num)s coins'), [])]
    303303
    304     >>> tmpl = MarkupTemplate('''\
    305         <html xmlns:i18n="http://genshi.edgewall.org/i18n">
     304    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
    306305    ...   <div i18n:choose="num; num">
    307306    ...     <p i18n:singular="">There is $num coin</p>
    308307    ...     <p i18n:plural="">There are $num coins</p>
     
    324323
    325324    When used as a element and not as an attribute:
    326325
    327     >>> tmpl = MarkupTemplate('''\
    328         <html xmlns:i18n="http://genshi.edgewall.org/i18n">
     326    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
    329327    ...   <i18n:choose numeral="num" params="num">
    330328    ...     <p i18n:singular="">There is $num coin</p>
    331329    ...     <p i18n:plural="">There are $num coins</p>
     
    492490    another i18n domain(catalog) to translate from.
    493491   
    494492    >>> from genshi.filters.tests.i18n import DummyTranslations
    495     >>> tmpl = MarkupTemplate('''\
    496         <html xmlns:i18n="http://genshi.edgewall.org/i18n">
     493    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
    497494    ...   <p i18n:msg="">Bar</p>
    498495    ...   <div i18n:domain="foo">
    499496    ...     <p i18n:msg="">FooBar</p>
     
    663660            if ctxt:
    664661                ctxt['_i18n.gettext'] = gettext
    665662        else:
    666             gettext = self.translate.ugettext
    667             ngettext = self.translate.ungettext
     663            if IS_PYTHON2:
     664                gettext = self.translate.ugettext
     665                ngettext = self.translate.ungettext
     666            else:
     667                gettext = self.translate.gettext
     668                ngettext = self.translate.ngettext
    668669            try:
    669                 dgettext = self.translate.dugettext
    670                 dngettext = self.translate.dungettext
     670                if IS_PYTHON2:
     671                    dgettext = self.translate.dugettext
     672                    dngettext = self.translate.dungettext
     673                else:
     674                    dgettext = self.translate.dgettext
     675                    dngettext = self.translate.dngettext
    671676            except AttributeError:
    672677                dgettext = lambda _, y: gettext(y)
    673678                dngettext = lambda _, s, p, n: ngettext(s, p, n)
     
    678683                ctxt['_i18n.dngettext'] = dngettext
    679684
    680685        if ctxt and ctxt.get('_i18n.domain'):
     686            # TODO: This can cause infinite recursion if dgettext is defined
     687            #       via the AttributeError case above!
    681688            gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
    682689
    683690        for kind, data, pos in stream:
     
    11681175                and node.func.id in gettext_functions:
    11691176            strings = []
    11701177            def _add(arg):
    1171                 if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
     1178                if isinstance(arg, _ast.Str) and isinstance(arg.s, unicode):
     1179                    strings.append(arg.s)
     1180                elif isinstance(arg, _ast.Str):
    11721181                    strings.append(unicode(arg.s, 'utf-8'))
    11731182                elif arg:
    11741183                    strings.append(None)
  • genshi/filters/tests/__init__.py

    diff -r 4bbd2b021cb5 genshi/filters/tests/__init__.py
    a b  
    1515import unittest
    1616
    1717def suite():
    18     from genshi.filters.tests import html, i18n, transform
     18    from genshi.filters.tests import test_html, i18n, transform
    1919    suite = unittest.TestSuite()
    20     suite.addTest(html.suite())
     20    suite.addTest(test_html.suite())
    2121    suite.addTest(i18n.suite())
    2222    if hasattr(doctest, 'NORMALIZE_WHITESPACE'):
    2323        suite.addTest(transform.suite())
  • deleted file genshi/filters/tests/html.py

    diff -r 4bbd2b021cb5 genshi/filters/tests/html.py
    + -  
    1 # -*- coding: utf-8 -*-
    2 #
    3 # Copyright (C) 2006-2009 Edgewall Software
    4 # All rights reserved.
    5 #
    6 # This software is licensed as described in the file COPYING, which
    7 # you should have received as part of this distribution. The terms
    8 # are also available at http://genshi.edgewall.org/wiki/License.
    9 #
    10 # This software consists of voluntary contributions made by many
    11 # individuals. For the exact contribution history, see the revision
    12 # history and logs, available at http://genshi.edgewall.org/log/.
    13 
    14 import doctest
    15 import unittest
    16 
    17 from genshi.input import HTML, ParseError
    18 from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
    19 from genshi.template import MarkupTemplate
    20 
    21 class HTMLFormFillerTestCase(unittest.TestCase):
    22 
    23     def test_fill_input_text_no_value(self):
    24         html = HTML("""<form><p>
    25           <input type="text" name="foo" />
    26         </p></form>""") | HTMLFormFiller()
    27         self.assertEquals("""<form><p>
    28           <input type="text" name="foo"/>
    29         </p></form>""", html.render())
    30 
    31     def test_fill_input_text_single_value(self):
    32         html = HTML("""<form><p>
    33           <input type="text" name="foo" />
    34         </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
    35         self.assertEquals("""<form><p>
    36           <input type="text" name="foo" value="bar"/>
    37         </p></form>""", html.render())
    38 
    39     def test_fill_input_text_multi_value(self):
    40         html = HTML("""<form><p>
    41           <input type="text" name="foo" />
    42         </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
    43         self.assertEquals("""<form><p>
    44           <input type="text" name="foo" value="bar"/>
    45         </p></form>""", html.render())
    46 
    47     def test_fill_input_hidden_no_value(self):
    48         html = HTML("""<form><p>
    49           <input type="hidden" name="foo" />
    50         </p></form>""") | HTMLFormFiller()
    51         self.assertEquals("""<form><p>
    52           <input type="hidden" name="foo"/>
    53         </p></form>""", html.render())
    54 
    55     def test_fill_input_hidden_single_value(self):
    56         html = HTML("""<form><p>
    57           <input type="hidden" name="foo" />
    58         </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
    59         self.assertEquals("""<form><p>
    60           <input type="hidden" name="foo" value="bar"/>
    61         </p></form>""", html.render())
    62 
    63     def test_fill_input_hidden_multi_value(self):
    64         html = HTML("""<form><p>
    65           <input type="hidden" name="foo" />
    66         </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
    67         self.assertEquals("""<form><p>
    68           <input type="hidden" name="foo" value="bar"/>
    69         </p></form>""", html.render())
    70 
    71     def test_fill_textarea_no_value(self):
    72         html = HTML("""<form><p>
    73           <textarea name="foo"></textarea>
    74         </p></form>""") | HTMLFormFiller()
    75         self.assertEquals("""<form><p>
    76           <textarea name="foo"/>
    77         </p></form>""", html.render())
    78 
    79     def test_fill_textarea_single_value(self):
    80         html = HTML("""<form><p>
    81           <textarea name="foo"></textarea>
    82         </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
    83         self.assertEquals("""<form><p>
    84           <textarea name="foo">bar</textarea>
    85         </p></form>""", html.render())
    86 
    87     def test_fill_textarea_multi_value(self):
    88         html = HTML("""<form><p>
    89           <textarea name="foo"></textarea>
    90         </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
    91         self.assertEquals("""<form><p>
    92           <textarea name="foo">bar</textarea>
    93         </p></form>""", html.render())
    94 
    95     def test_fill_textarea_multiple(self):
    96         # Ensure that the subsequent textarea doesn't get the data from the
    97         # first
    98         html = HTML("""<form><p>
    99           <textarea name="foo"></textarea>
    100           <textarea name="bar"></textarea>
    101         </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
    102         self.assertEquals("""<form><p>
    103           <textarea name="foo">Some text</textarea>
    104           <textarea name="bar"/>
    105         </p></form>""", html.render())
    106 
    107     def test_fill_textarea_preserve_original(self):
    108         html = HTML("""<form><p>
    109           <textarea name="foo"></textarea>
    110           <textarea name="bar">Original value</textarea>
    111         </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
    112         self.assertEquals("""<form><p>
    113           <textarea name="foo">Some text</textarea>
    114           <textarea name="bar">Original value</textarea>
    115         </p></form>""", html.render())
    116 
    117     def test_fill_input_checkbox_single_value_auto_no_value(self):
    118         html = HTML("""<form><p>
    119           <input type="checkbox" name="foo" />
    120         </p></form>""") | HTMLFormFiller()
    121         self.assertEquals("""<form><p>
    122           <input type="checkbox" name="foo"/>
    123         </p></form>""", html.render())
    124 
    125     def test_fill_input_checkbox_single_value_auto(self):
    126         html = HTML("""<form><p>
    127           <input type="checkbox" name="foo" />
    128         </p></form>""")
    129         self.assertEquals("""<form><p>
    130           <input type="checkbox" name="foo"/>
    131         </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
    132         self.assertEquals("""<form><p>
    133           <input type="checkbox" name="foo" checked="checked"/>
    134         </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
    135 
    136     def test_fill_input_checkbox_single_value_defined(self):
    137         html = HTML("""<form><p>
    138           <input type="checkbox" name="foo" value="1" />
    139         </p></form>""")
    140         self.assertEquals("""<form><p>
    141           <input type="checkbox" name="foo" value="1" checked="checked"/>
    142         </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
    143         self.assertEquals("""<form><p>
    144           <input type="checkbox" name="foo" value="1"/>
    145         </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
    146 
    147     def test_fill_input_checkbox_multi_value_auto(self):
    148         html = HTML("""<form><p>
    149           <input type="checkbox" name="foo" />
    150         </p></form>""")
    151         self.assertEquals("""<form><p>
    152           <input type="checkbox" name="foo"/>
    153         </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
    154         self.assertEquals("""<form><p>
    155           <input type="checkbox" name="foo" checked="checked"/>
    156         </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
    157 
    158     def test_fill_input_checkbox_multi_value_defined(self):
    159         html = HTML("""<form><p>
    160           <input type="checkbox" name="foo" value="1" />
    161         </p></form>""")
    162         self.assertEquals("""<form><p>
    163           <input type="checkbox" name="foo" value="1" checked="checked"/>
    164         </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
    165         self.assertEquals("""<form><p>
    166           <input type="checkbox" name="foo" value="1"/>
    167         </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
    168 
    169     def test_fill_input_radio_no_value(self):
    170         html = HTML("""<form><p>
    171           <input type="radio" name="foo" />
    172         </p></form>""") | HTMLFormFiller()
    173         self.assertEquals("""<form><p>
    174           <input type="radio" name="foo"/>
    175         </p></form>""", html.render())
    176 
    177     def test_fill_input_radio_single_value(self):
    178         html = HTML("""<form><p>
    179           <input type="radio" name="foo" value="1" />
    180         </p></form>""")
    181         self.assertEquals("""<form><p>
    182           <input type="radio" name="foo" value="1" checked="checked"/>
    183         </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
    184         self.assertEquals("""<form><p>
    185           <input type="radio" name="foo" value="1"/>
    186         </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
    187 
    188     def test_fill_input_radio_multi_value(self):
    189         html = HTML("""<form><p>
    190           <input type="radio" name="foo" value="1" />
    191         </p></form>""")
    192         self.assertEquals("""<form><p>
    193           <input type="radio" name="foo" value="1" checked="checked"/>
    194         </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
    195         self.assertEquals("""<form><p>
    196           <input type="radio" name="foo" value="1"/>
    197         </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
    198 
    199     def test_fill_input_radio_empty_string(self):
    200         html = HTML("""<form><p>
    201           <input type="radio" name="foo" value="" />
    202         </p></form>""")
    203         self.assertEquals("""<form><p>
    204           <input type="radio" name="foo" value="" checked="checked"/>
    205         </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
    206 
    207     def test_fill_input_radio_multi_empty_string(self):
    208         html = HTML("""<form><p>
    209           <input type="radio" name="foo" value="" />
    210         </p></form>""")
    211         self.assertEquals("""<form><p>
    212           <input type="radio" name="foo" value="" checked="checked"/>
    213         </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
    214 
    215     def test_fill_select_no_value_auto(self):
    216         html = HTML("""<form><p>
    217           <select name="foo">
    218             <option>1</option>
    219             <option>2</option>
    220             <option>3</option>
    221           </select>
    222         </p></form>""") | HTMLFormFiller()
    223         self.assertEquals("""<form><p>
    224           <select name="foo">
    225             <option>1</option>
    226             <option>2</option>
    227             <option>3</option>
    228           </select>
    229         </p></form>""", html.render())
    230 
    231     def test_fill_select_no_value_defined(self):
    232         html = HTML("""<form><p>
    233           <select name="foo">
    234             <option value="1">1</option>
    235             <option value="2">2</option>
    236             <option value="3">3</option>
    237           </select>
    238         </p></form>""") | HTMLFormFiller()
    239         self.assertEquals("""<form><p>
    240           <select name="foo">
    241             <option value="1">1</option>
    242             <option value="2">2</option>
    243             <option value="3">3</option>
    244           </select>
    245         </p></form>""", html.render())
    246 
    247     def test_fill_select_single_value_auto(self):
    248         html = HTML("""<form><p>
    249           <select name="foo">
    250             <option>1</option>
    251             <option>2</option>
    252             <option>3</option>
    253           </select>
    254         </p></form>""") | HTMLFormFiller(data={'foo': '1'})
    255         self.assertEquals("""<form><p>
    256           <select name="foo">
    257             <option selected="selected">1</option>
    258             <option>2</option>
    259             <option>3</option>
    260           </select>
    261         </p></form>""", html.render())
    262 
    263     def test_fill_select_single_value_defined(self):
    264         html = HTML("""<form><p>
    265           <select name="foo">
    266             <option value="1">1</option>
    267             <option value="2">2</option>
    268             <option value="3">3</option>
    269           </select>
    270         </p></form>""") | HTMLFormFiller(data={'foo': '1'})
    271         self.assertEquals("""<form><p>
    272           <select name="foo">
    273             <option value="1" selected="selected">1</option>
    274             <option value="2">2</option>
    275             <option value="3">3</option>
    276           </select>
    277         </p></form>""", html.render())
    278 
    279     def test_fill_select_multi_value_auto(self):
    280         html = HTML("""<form><p>
    281           <select name="foo" multiple>
    282             <option>1</option>
    283             <option>2</option>
    284             <option>3</option>
    285           </select>
    286         </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
    287         self.assertEquals("""<form><p>
    288           <select name="foo" multiple="multiple">
    289             <option selected="selected">1</option>
    290             <option>2</option>
    291             <option selected="selected">3</option>
    292           </select>
    293         </p></form>""", html.render())
    294 
    295     def test_fill_select_multi_value_defined(self):
    296         html = HTML("""<form><p>
    297           <select name="foo" multiple>
    298             <option value="1">1</option>
    299             <option value="2">2</option>
    300             <option value="3">3</option>
    301           </select>
    302         </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
    303         self.assertEquals("""<form><p>
    304           <select name="foo" multiple="multiple">
    305             <option value="1" selected="selected">1</option>
    306             <option value="2">2</option>
    307             <option value="3" selected="selected">3</option>
    308           </select>
    309         </p></form>""", html.render())
    310 
    311     def test_fill_option_segmented_text(self):
    312         html = MarkupTemplate("""<form>
    313           <select name="foo">
    314             <option value="1">foo $x</option>
    315           </select>
    316         </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
    317         self.assertEquals("""<form>
    318           <select name="foo">
    319             <option value="1" selected="selected">foo 1</option>
    320           </select>
    321         </form>""", html.render())
    322 
    323     def test_fill_option_segmented_text_no_value(self):
    324         html = MarkupTemplate("""<form>
    325           <select name="foo">
    326             <option>foo $x bar</option>
    327           </select>
    328         </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
    329         self.assertEquals("""<form>
    330           <select name="foo">
    331             <option selected="selected">foo 1 bar</option>
    332           </select>
    333         </form>""", html.render())
    334 
    335     def test_fill_option_unicode_value(self):
    336         html = HTML("""<form>
    337           <select name="foo">
    338             <option value="&ouml;">foo</option>
    339           </select>
    340         </form>""") | HTMLFormFiller(data={'foo': u'ö'})
    341         self.assertEquals(u"""<form>
    342           <select name="foo">
    343             <option value="ö" selected="selected">foo</option>
    344           </select>
    345         </form>""", html.render(encoding=None))
    346 
    347     def test_fill_input_password_disabled(self):
    348         html = HTML("""<form><p>
    349           <input type="password" name="pass" />
    350         </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
    351         self.assertEquals("""<form><p>
    352           <input type="password" name="pass"/>
    353         </p></form>""", html.render())
    354 
    355     def test_fill_input_password_enabled(self):
    356         html = HTML("""<form><p>
    357           <input type="password" name="pass" />
    358         </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
    359         self.assertEquals("""<form><p>
    360           <input type="password" name="pass" value="1234"/>
    361         </p></form>""", html.render())
    362 
    363 
    364 class HTMLSanitizerTestCase(unittest.TestCase):
    365 
    366     def test_sanitize_unchanged(self):
    367         html = HTML('<a href="#">fo<br />o</a>')
    368         self.assertEquals('<a href="#">fo<br/>o</a>',
    369                           (html | HTMLSanitizer()).render())
    370         html = HTML('<a href="#with:colon">foo</a>')
    371         self.assertEquals('<a href="#with:colon">foo</a>',
    372                           (html | HTMLSanitizer()).render())
    373 
    374     def test_sanitize_escape_text(self):
    375         html = HTML('<a href="#">fo&amp;</a>')
    376         self.assertEquals('<a href="#">fo&amp;</a>',
    377                           (html | HTMLSanitizer()).render())
    378         html = HTML('<a href="#">&lt;foo&gt;</a>')
    379         self.assertEquals('<a href="#">&lt;foo&gt;</a>',
    380                           (html | HTMLSanitizer()).render())
    381 
    382     def test_sanitize_entityref_text(self):
    383         html = HTML('<a href="#">fo&ouml;</a>')
    384         self.assertEquals(u'<a href="#">foö</a>',
    385                           (html | HTMLSanitizer()).render(encoding=None))
    386 
    387     def test_sanitize_escape_attr(self):
    388         html = HTML('<div title="&lt;foo&gt;"></div>')
    389         self.assertEquals('<div title="&lt;foo&gt;"/>',
    390                           (html | HTMLSanitizer()).render())
    391 
    392     def test_sanitize_close_empty_tag(self):
    393         html = HTML('<a href="#">fo<br>o</a>')
    394         self.assertEquals('<a href="#">fo<br/>o</a>',
    395                           (html | HTMLSanitizer()).render())
    396 
    397     def test_sanitize_invalid_entity(self):
    398         html = HTML('&junk;')
    399         self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
    400 
    401     def test_sanitize_remove_script_elem(self):
    402         html = HTML('<script>alert("Foo")</script>')
    403         self.assertEquals('', (html | HTMLSanitizer()).render())
    404         html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
    405         self.assertEquals('', (html | HTMLSanitizer()).render())
    406         self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
    407         self.assertRaises(ParseError, HTML,
    408                           '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
    409 
    410     def test_sanitize_remove_onclick_attr(self):
    411         html = HTML('<div onclick=\'alert("foo")\' />')
    412         self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
    413 
    414     def test_sanitize_remove_input_password(self):
    415         html = HTML('<form><input type="password" /></form>')
    416         self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
    417 
    418     def test_sanitize_remove_comments(self):
    419         html = HTML('''<div><!-- conditional comment crap --></div>''')
    420         self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
    421 
    422     def test_sanitize_remove_style_scripts(self):
    423         sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
    424         # Inline style with url() using javascript: scheme
    425         html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
    426         self.assertEquals('<div/>', (html | sanitizer).render())
    427         # Inline style with url() using javascript: scheme, using control char
    428         html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
    429         self.assertEquals('<div/>', (html | sanitizer).render())
    430         # Inline style with url() using javascript: scheme, in quotes
    431         html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
    432         self.assertEquals('<div/>', (html | sanitizer).render())
    433         # IE expressions in CSS not allowed
    434         html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
    435         self.assertEquals('<div/>', (html | sanitizer).render())
    436         html = HTML('<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
    437         self.assertEquals('<div/>', (html | sanitizer).render())
    438         html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
    439                                  'color: #fff\'>')
    440         self.assertEquals('<div style="color: #fff"/>',
    441                           (html | sanitizer).render())
    442         # Inline style with url() using javascript: scheme, using unicode
    443         # escapes
    444         html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
    445         self.assertEquals('<div/>', (html | sanitizer).render())
    446         html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
    447         self.assertEquals('<div/>', (html | sanitizer).render())
    448         html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
    449         self.assertEquals('<div/>', (html | sanitizer).render())
    450         html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
    451         self.assertEquals('<div/>', (html | sanitizer).render())
    452         html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
    453         self.assertEquals('<div/>', (html | sanitizer).render())
    454 
    455     def test_sanitize_remove_style_phishing(self):
    456         sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
    457         # The position property is not allowed
    458         html = HTML('<div style="position:absolute;top:0"></div>')
    459         self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
    460         # Normal margins get passed through
    461         html = HTML('<div style="margin:10px 20px"></div>')
    462         self.assertEquals('<div style="margin:10px 20px"/>',
    463                           (html | sanitizer).render())
    464         # But not negative margins
    465         html = HTML('<div style="margin:-1000px 0 0"></div>')
    466         self.assertEquals('<div/>', (html | sanitizer).render())
    467         html = HTML('<div style="margin-left:-2000px 0 0"></div>')
    468         self.assertEquals('<div/>', (html | sanitizer).render())
    469         html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>')
    470         self.assertEquals('<div/>', (html | sanitizer).render())
    471 
    472     def test_sanitize_remove_src_javascript(self):
    473         html = HTML('<img src=\'javascript:alert("foo")\'>')
    474         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    475         # Case-insensitive protocol matching
    476         html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
    477         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    478         # Grave accents (not parsed)
    479         self.assertRaises(ParseError, HTML,
    480                           '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
    481         # Protocol encoded using UTF-8 numeric entities
    482         html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
    483                     '&#112;&#116;&#58;alert("foo")\'>')
    484         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    485         # Protocol encoded using UTF-8 numeric entities without a semicolon
    486         # (which is allowed because the max number of digits is used)
    487         html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
    488                     '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
    489                     '&#0000058alert("foo")\'>')
    490         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    491         # Protocol encoded using UTF-8 numeric hex entities without a semicolon
    492         # (which is allowed because the max number of digits is used)
    493         html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
    494                     '&#x70&#x74&#x3A;alert("foo")\'>')
    495         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    496         # Embedded tab character in protocol
    497         html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
    498         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    499         # Embedded tab character in protocol, but encoded this time
    500         html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
    501         self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
    502 
    503 
    504 def suite():
    505     suite = unittest.TestSuite()
    506     suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
    507     suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
    508     suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
    509     return suite
    510 
    511 
    512 if __name__ == '__main__':
    513     unittest.main(defaultTest='suite')
  • genshi/filters/tests/i18n.py

    diff -r 4bbd2b021cb5 genshi/filters/tests/i18n.py
    a b  
    1414from datetime import datetime
    1515import doctest
    1616from gettext import NullTranslations
    17 from StringIO import StringIO
    1817import unittest
    1918
    2019from genshi.core import Attrs
    2120from genshi.template import MarkupTemplate, Context
    2221from genshi.filters.i18n import Translator, extract
    2322from genshi.input import HTML
     23from genshi.compat import IS_PYTHON2, StringIO
    2424
    2525
    2626class DummyTranslations(NullTranslations):
     
    3939    def _domain_call(self, func, domain, *args, **kwargs):
    4040        return getattr(self._domains.get(domain, self), func)(*args, **kwargs)
    4141
    42     def ugettext(self, message):
    43         missing = object()
    44         tmsg = self._catalog.get(message, missing)
    45         if tmsg is missing:
    46             if self._fallback:
    47                 return self._fallback.ugettext(message)
    48             return unicode(message)
    49         return tmsg
     42    if IS_PYTHON2:
     43        def ugettext(self, message):
     44            missing = object()
     45            tmsg = self._catalog.get(message, missing)
     46            if tmsg is missing:
     47                if self._fallback:
     48                    return self._fallback.ugettext(message)
     49                return unicode(message)
     50            return tmsg
     51    else:
     52        def gettext(self, message):
     53            missing = object()
     54            tmsg = self._catalog.get(message, missing)
     55            if tmsg is missing:
     56                if self._fallback:
     57                    return self._fallback.gettext(message)
     58                return unicode(message)
     59            return tmsg
    5060
    51     def dugettext(self, domain, message):
    52         return self._domain_call('ugettext', domain, message)
     61    if IS_PYTHON2:
     62        def dugettext(self, domain, message):
     63            return self._domain_call('ugettext', domain, message)
     64    else:
     65        def dgettext(self, domain, message):
     66            return self._domain_call('gettext', domain, message)
    5367
    5468    def ungettext(self, msgid1, msgid2, n):
    5569        try:
     
    6276            else:
    6377                return msgid2
    6478
    65     def dungettext(self, domain, singular, plural, numeral):
    66         return self._domain_call('ungettext', domain, singular, plural, numeral)
     79    if not IS_PYTHON2:
     80        ngettext = ungettext
     81        del ungettext
     82
     83    if IS_PYTHON2:
     84        def dungettext(self, domain, singular, plural, numeral):
     85            return self._domain_call('ungettext', domain, singular, plural, numeral)
     86    else:
     87        def dngettext(self, domain, singular, plural, numeral):
     88            return self._domain_call('ngettext', domain, singular, plural, numeral)
    6789
    6890
    6991class TranslatorTestCase(unittest.TestCase):
     
    7294        """
    7395        Verify that translated attributes end up in a proper `Attrs` instance.
    7496        """
    75         html = HTML("""<html>
     97        html = HTML(u"""<html>
    7698          <span title="Foo"></span>
    7799        </html>""")
    78100        translator = Translator(lambda s: u"Voh")
     
    218240        gettext = lambda s: u"FÃŒr Details siehe bitte [1:Hilfe]."
    219241        translator = Translator(gettext)
    220242        translator.setup(tmpl)
    221         self.assertEqual("""<html>
     243        self.assertEqual(u"""<html>
    222244          <p>FÃŒr Details siehe bitte <a href="help.html">Hilfe</a>.</p>
    223         </html>""", tmpl.generate().render())
     245        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
    224246
    225247    def test_extract_i18n_msg_nonewline(self):
    226248        tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
     
    241263        gettext = lambda s: u"FÃŒr Details siehe bitte [1:Hilfe]"
    242264        translator = Translator(gettext)
    243265        translator.setup(tmpl)
    244         self.assertEqual("""<html>
     266        self.assertEqual(u"""<html>
    245267          <p>FÃŒr Details siehe bitte <a href="help.html">Hilfe</a></p>
    246268        </html>""", tmpl.generate().render())
    247269
     
    264286        gettext = lambda s: u"FÃŒr Details siehe bitte [1:Hilfe]"
    265287        translator = Translator(gettext)
    266288        translator.setup(tmpl)
    267         self.assertEqual("""<html>
     289        self.assertEqual(u"""<html>
    268290          FÃŒr Details siehe bitte <a href="help.html">Hilfe</a>
    269         </html>""", tmpl.generate().render())
     291        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
    270292
    271293    def test_extract_i18n_msg_with_attributes(self):
    272294        tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
     
    394416        gettext = lambda s: u"FÃŒr Details siehe bitte [1:[2:Hilfeseite]]."
    395417        translator = Translator(gettext)
    396418        translator.setup(tmpl)
    397         self.assertEqual("""<html>
     419        self.assertEqual(u"""<html>
    398420          <p>FÃŒr Details siehe bitte <a href="help.html"><em>Hilfeseite</em></a>.</p>
    399421        </html>""", tmpl.generate().render())
    400422
     
    449471        gettext = lambda s: u"[1:] EintrÀge pro Seite anzeigen."
    450472        translator = Translator(gettext)
    451473        translator.setup(tmpl)
    452         self.assertEqual("""<html>
     474        self.assertEqual(u"""<html>
    453475          <p><input type="text" name="num"/> EintrÀge pro Seite anzeigen.</p>
    454476        </html>""", tmpl.generate().render())
    455477
     
    476498        gettext = lambda s: u"FÃŒr [2:Details] siehe bitte [1:Hilfe]."
    477499        translator = Translator(gettext)
    478500        translator.setup(tmpl)
    479         self.assertEqual("""<html>
     501        self.assertEqual(u"""<html>
    480502          <p>FÃŒr <em>Details</em> siehe bitte <a href="help.html">Hilfe</a>.</p>
    481503        </html>""", tmpl.generate().render())
    482504
     
    500522          <p i18n:msg="">
    501523            Show me <input type="text" name="num" /> entries per page, starting at page <input type="text" name="num" />.
    502524          </p>
    503         </html>""")
     525        </html>""", encoding='utf-8')
    504526        gettext = lambda s: u"[1:] EintrÀge pro Seite, beginnend auf Seite [2:]."
    505527        translator = Translator(gettext)
    506528        translator.setup(tmpl)
    507         self.assertEqual("""<html>
    508           <p><input type="text" name="num"/> Eintr\xc3\xa4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>
    509         </html>""", tmpl.generate().render())
     529        self.assertEqual(u"""<html>
     530          <p><input type="text" name="num"/> Eintr\u00E4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>
     531        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
    510532
    511533    def test_extract_i18n_msg_with_param(self):
    512534        tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
     
    545567        gettext = lambda s: u"%(name)s, sei gegrÌßt!"
    546568        translator = Translator(gettext)
    547569        translator.setup(tmpl)
    548         self.assertEqual("""<html>
     570        self.assertEqual(u"""<html>
    549571          <p>Jim, sei gegrÌßt!</p>
    550572        </html>""", tmpl.generate(user=dict(name='Jim')).render())
    551573
     
    559581        gettext = lambda s: u"Sei gegrÌßt, [1:Alter]!"
    560582        translator = Translator(gettext)
    561583        translator.setup(tmpl)
    562         self.assertEqual("""<html>
     584        self.assertEqual(u"""<html>
    563585          <p>Sei gegrÌßt, <a href="#42">Alter</a>!</p>
    564586        </html>""", tmpl.generate(anchor='42').render())
    565587
     
    617639        gettext = lambda s: u"[1:] EintrÀge pro Seite anzeigen."
    618640        translator = Translator(gettext)
    619641        translator.setup(tmpl)
    620         self.assertEqual("""<html>
     642        self.assertEqual(u"""<html>
    621643          <p><input type="text" name="num" value="x"/> EintrÀge pro Seite anzeigen.</p>
    622644        </html>""", tmpl.generate().render())
    623645
     
    676698        }))
    677699        tmpl.filters.insert(0, translator)
    678700        tmpl.add_directives(Translator.NAMESPACE, translator)
    679         self.assertEqual("""<html>
     701        self.assertEqual(u"""<html>
    680702          <p title="Voh bÀr">Voh</p>
    681703        </html>""", tmpl.generate().render())
    682704
     
    720742        })
    721743        translator = Translator(translations)
    722744        translator.setup(tmpl)
    723         self.assertEqual("""<html>
     745        self.assertEqual(u"""<html>
    724746          Modificado à um dia por Pedro
    725         </html>""", tmpl.generate(date='um dia', author="Pedro").render())
     747        </html>""".encode('utf-8'), tmpl.generate(date='um dia', author="Pedro").render(encoding='utf-8'))
    726748
    727749
    728750    def test_i18n_msg_ticket_251_extract(self):
     
    749771        })
    750772        translator = Translator(translations)
    751773        translator.setup(tmpl)
    752         self.assertEqual("""<html>
     774        self.assertEqual(u"""<html>
    753775          <p><tt><b>Trandução[ 0 ]</b>: <em>Uma moeda</em></tt></p>
    754         </html>""", tmpl.generate().render())
     776        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
    755777
    756778    def test_extract_i18n_msg_with_other_directives_nested(self):
    757779        tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
     
    811833        self.assertEqual(1, len(messages))
    812834        ctx = Context()
    813835        ctx.push({'trac': {'homepage': 'http://trac.edgewall.org/'}})
    814         self.assertEqual("""<html>
     836        self.assertEqual(u"""<html>
    815837          <p>Antes de o fazer, porém,
    816838            <strong>por favor tente <a href="http://trac.edgewall.org/search?ticket=yes&amp;noquickjump=1&amp;q=q">procurar</a>
    817839            por problemas semelhantes</strong>, uma vez que é muito provável que este problema
     
    846868            '[2:[3:trac.ini]]\n            and cannot be edited on this page.',
    847869            messages[0][2]
    848870        )
    849         self.assertEqual("""<html>
     871        self.assertEqual(u"""<html>
    850872          <p class="hint"><strong>Nota:</strong> Este repositório está definido em
    851873           <code><a href="href.wiki(TracIni)">trac.ini</a></code>
    852874            e não pode ser editado nesta página.</p>
    853         </html>""", tmpl.generate(editable=False).render())
     875        </html>""".encode('utf-8'), tmpl.generate(editable=False).render(encoding='utf-8'))
    854876
    855877    def test_extract_i18n_msg_with_py_strip(self):
    856878        tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
     
    17711793            loader = TemplateLoader([dirname], callback=callback)
    17721794            tmpl = loader.load('tmpl10.html')
    17731795
     1796            if IS_PYTHON2:
     1797                dgettext = translations.dugettext
     1798            else:
     1799                dgettext = translations.dgettext
     1800
    17741801            self.assertEqual("""<html>
    17751802                        <div>Included tmpl0</div>
    17761803                        <p title="foo_Bar 0">foo_Bar 0</p>
     
    17971824                        <p title="Voh">Voh 3</p>
    17981825                        <p title="Voh">Voh 3</p>
    17991826                </html>""", tmpl.generate(idx=-1,
    1800                                           dg=translations.dugettext).render())
     1827                                          dg=dgettext).render())
    18011828        finally:
    18021829            shutil.rmtree(dirname)
    18031830
  • new file genshi/filters/tests/test_html.py

    diff -r 4bbd2b021cb5 genshi/filters/tests/test_html.py
    - +  
     1# -*- coding: utf-8 -*-
     2#
     3# Copyright (C) 2006-2009 Edgewall Software
     4# All rights reserved.
     5#
     6# This software is licensed as described in the file COPYING, which
     7# you should have received as part of this distribution. The terms
     8# are also available at http://genshi.edgewall.org/wiki/License.
     9#
     10# This software consists of voluntary contributions made by many
     11# individuals. For the exact contribution history, see the revision
     12# history and logs, available at http://genshi.edgewall.org/log/.
     13
     14import doctest
     15import unittest
     16
     17from genshi.input import HTML, ParseError
     18from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
     19from genshi.template import MarkupTemplate
     20
     21class HTMLFormFillerTestCase(unittest.TestCase):
     22
     23    def test_fill_input_text_no_value(self):
     24        html = HTML(u"""<form><p>
     25          <input type="text" name="foo" />
     26        </p></form>""") | HTMLFormFiller()
     27        self.assertEquals("""<form><p>
     28          <input type="text" name="foo"/>
     29        </p></form>""", html.render())
     30
     31    def test_fill_input_text_single_value(self):
     32        html = HTML(u"""<form><p>
     33          <input type="text" name="foo" />
     34        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
     35        self.assertEquals("""<form><p>
     36          <input type="text" name="foo" value="bar"/>
     37        </p></form>""", html.render())
     38
     39    def test_fill_input_text_multi_value(self):
     40        html = HTML(u"""<form><p>
     41          <input type="text" name="foo" />
     42        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
     43        self.assertEquals("""<form><p>
     44          <input type="text" name="foo" value="bar"/>
     45        </p></form>""", html.render())
     46
     47    def test_fill_input_hidden_no_value(self):
     48        html = HTML(u"""<form><p>
     49          <input type="hidden" name="foo" />
     50        </p></form>""") | HTMLFormFiller()
     51        self.assertEquals("""<form><p>
     52          <input type="hidden" name="foo"/>
     53        </p></form>""", html.render())
     54
     55    def test_fill_input_hidden_single_value(self):
     56        html = HTML(u"""<form><p>
     57          <input type="hidden" name="foo" />
     58        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
     59        self.assertEquals("""<form><p>
     60          <input type="hidden" name="foo" value="bar"/>
     61        </p></form>""", html.render())
     62
     63    def test_fill_input_hidden_multi_value(self):
     64        html = HTML(u"""<form><p>
     65          <input type="hidden" name="foo" />
     66        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
     67        self.assertEquals("""<form><p>
     68          <input type="hidden" name="foo" value="bar"/>
     69        </p></form>""", html.render())
     70
     71    def test_fill_textarea_no_value(self):
     72        html = HTML(u"""<form><p>
     73          <textarea name="foo"></textarea>
     74        </p></form>""") | HTMLFormFiller()
     75        self.assertEquals("""<form><p>
     76          <textarea name="foo"/>
     77        </p></form>""", html.render())
     78
     79    def test_fill_textarea_single_value(self):
     80        html = HTML(u"""<form><p>
     81          <textarea name="foo"></textarea>
     82        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
     83        self.assertEquals("""<form><p>
     84          <textarea name="foo">bar</textarea>
     85        </p></form>""", html.render())
     86
     87    def test_fill_textarea_multi_value(self):
     88        html = HTML(u"""<form><p>
     89          <textarea name="foo"></textarea>
     90        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
     91        self.assertEquals("""<form><p>
     92          <textarea name="foo">bar</textarea>
     93        </p></form>""", html.render())
     94
     95    def test_fill_textarea_multiple(self):
     96        # Ensure that the subsequent textarea doesn't get the data from the
     97        # first
     98        html = HTML(u"""<form><p>
     99          <textarea name="foo"></textarea>
     100          <textarea name="bar"></textarea>
     101        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
     102        self.assertEquals("""<form><p>
     103          <textarea name="foo">Some text</textarea>
     104          <textarea name="bar"/>
     105        </p></form>""", html.render())
     106
     107    def test_fill_textarea_preserve_original(self):
     108        html = HTML(u"""<form><p>
     109          <textarea name="foo"></textarea>
     110          <textarea name="bar">Original value</textarea>
     111        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
     112        self.assertEquals("""<form><p>
     113          <textarea name="foo">Some text</textarea>
     114          <textarea name="bar">Original value</textarea>
     115        </p></form>""", html.render())
     116
     117    def test_fill_input_checkbox_single_value_auto_no_value(self):
     118        html = HTML(u"""<form><p>
     119          <input type="checkbox" name="foo" />
     120        </p></form>""") | HTMLFormFiller()
     121        self.assertEquals("""<form><p>
     122          <input type="checkbox" name="foo"/>
     123        </p></form>""", html.render())
     124
     125    def test_fill_input_checkbox_single_value_auto(self):
     126        html = HTML(u"""<form><p>
     127          <input type="checkbox" name="foo" />
     128        </p></form>""")
     129        self.assertEquals("""<form><p>
     130          <input type="checkbox" name="foo"/>
     131        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
     132        self.assertEquals("""<form><p>
     133          <input type="checkbox" name="foo" checked="checked"/>
     134        </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
     135
     136    def test_fill_input_checkbox_single_value_defined(self):
     137        html = HTML("""<form><p>
     138          <input type="checkbox" name="foo" value="1" />
     139        </p></form>""", encoding='ascii')
     140        self.assertEquals("""<form><p>
     141          <input type="checkbox" name="foo" value="1" checked="checked"/>
     142        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
     143        self.assertEquals("""<form><p>
     144          <input type="checkbox" name="foo" value="1"/>
     145        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
     146
     147    def test_fill_input_checkbox_multi_value_auto(self):
     148        html = HTML("""<form><p>
     149          <input type="checkbox" name="foo" />
     150        </p></form>""", encoding='ascii')
     151        self.assertEquals("""<form><p>
     152          <input type="checkbox" name="foo"/>
     153        </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
     154        self.assertEquals("""<form><p>
     155          <input type="checkbox" name="foo" checked="checked"/>
     156        </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
     157
     158    def test_fill_input_checkbox_multi_value_defined(self):
     159        html = HTML(u"""<form><p>
     160          <input type="checkbox" name="foo" value="1" />
     161        </p></form>""")
     162        self.assertEquals("""<form><p>
     163          <input type="checkbox" name="foo" value="1" checked="checked"/>
     164        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
     165        self.assertEquals("""<form><p>
     166          <input type="checkbox" name="foo" value="1"/>
     167        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
     168
     169    def test_fill_input_radio_no_value(self):
     170        html = HTML(u"""<form><p>
     171          <input type="radio" name="foo" />
     172        </p></form>""") | HTMLFormFiller()
     173        self.assertEquals("""<form><p>
     174          <input type="radio" name="foo"/>
     175        </p></form>""", html.render())
     176
     177    def test_fill_input_radio_single_value(self):
     178        html = HTML(u"""<form><p>
     179          <input type="radio" name="foo" value="1" />
     180        </p></form>""")
     181        self.assertEquals("""<form><p>
     182          <input type="radio" name="foo" value="1" checked="checked"/>
     183        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
     184        self.assertEquals("""<form><p>
     185          <input type="radio" name="foo" value="1"/>
     186        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
     187
     188    def test_fill_input_radio_multi_value(self):
     189        html = HTML(u"""<form><p>
     190          <input type="radio" name="foo" value="1" />
     191        </p></form>""")
     192        self.assertEquals("""<form><p>
     193          <input type="radio" name="foo" value="1" checked="checked"/>
     194        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
     195        self.assertEquals("""<form><p>
     196          <input type="radio" name="foo" value="1"/>
     197        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
     198
     199    def test_fill_input_radio_empty_string(self):
     200        html = HTML(u"""<form><p>
     201          <input type="radio" name="foo" value="" />
     202        </p></form>""")
     203        self.assertEquals("""<form><p>
     204          <input type="radio" name="foo" value="" checked="checked"/>
     205        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
     206
     207    def test_fill_input_radio_multi_empty_string(self):
     208        html = HTML(u"""<form><p>
     209          <input type="radio" name="foo" value="" />
     210        </p></form>""")
     211        self.assertEquals("""<form><p>
     212          <input type="radio" name="foo" value="" checked="checked"/>
     213        </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
     214
     215    def test_fill_select_no_value_auto(self):
     216        html = HTML(u"""<form><p>
     217          <select name="foo">
     218            <option>1</option>
     219            <option>2</option>
     220            <option>3</option>
     221          </select>
     222        </p></form>""") | HTMLFormFiller()
     223        self.assertEquals("""<form><p>
     224          <select name="foo">
     225            <option>1</option>
     226            <option>2</option>
     227            <option>3</option>
     228          </select>
     229        </p></form>""", html.render())
     230
     231    def test_fill_select_no_value_defined(self):
     232        html = HTML(u"""<form><p>
     233          <select name="foo">
     234            <option value="1">1</option>
     235            <option value="2">2</option>
     236            <option value="3">3</option>
     237          </select>
     238        </p></form>""") | HTMLFormFiller()
     239        self.assertEquals("""<form><p>
     240          <select name="foo">
     241            <option value="1">1</option>
     242            <option value="2">2</option>
     243            <option value="3">3</option>
     244          </select>
     245        </p></form>""", html.render())
     246
     247    def test_fill_select_single_value_auto(self):
     248        html = HTML(u"""<form><p>
     249          <select name="foo">
     250            <option>1</option>
     251            <option>2</option>
     252            <option>3</option>
     253          </select>
     254        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
     255        self.assertEquals("""<form><p>
     256          <select name="foo">
     257            <option selected="selected">1</option>
     258            <option>2</option>
     259            <option>3</option>
     260          </select>
     261        </p></form>""", html.render())
     262
     263    def test_fill_select_single_value_defined(self):
     264        html = HTML(u"""<form><p>
     265          <select name="foo">
     266            <option value="1">1</option>
     267            <option value="2">2</option>
     268            <option value="3">3</option>
     269          </select>
     270        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
     271        self.assertEquals("""<form><p>
     272          <select name="foo">
     273            <option value="1" selected="selected">1</option>
     274            <option value="2">2</option>
     275            <option value="3">3</option>
     276          </select>
     277        </p></form>""", html.render())
     278
     279    def test_fill_select_multi_value_auto(self):
     280        html = HTML(u"""<form><p>
     281          <select name="foo" multiple>
     282            <option>1</option>
     283            <option>2</option>
     284            <option>3</option>
     285          </select>
     286        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
     287        self.assertEquals("""<form><p>
     288          <select name="foo" multiple="multiple">
     289            <option selected="selected">1</option>
     290            <option>2</option>
     291            <option selected="selected">3</option>
     292          </select>
     293        </p></form>""", html.render())
     294
     295    def test_fill_select_multi_value_defined(self):
     296        html = HTML(u"""<form><p>
     297          <select name="foo" multiple>
     298            <option value="1">1</option>
     299            <option value="2">2</option>
     300            <option value="3">3</option>
     301          </select>
     302        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
     303        self.assertEquals("""<form><p>
     304          <select name="foo" multiple="multiple">
     305            <option value="1" selected="selected">1</option>
     306            <option value="2">2</option>
     307            <option value="3" selected="selected">3</option>
     308          </select>
     309        </p></form>""", html.render())
     310
     311    def test_fill_option_segmented_text(self):
     312        html = MarkupTemplate(u"""<form>
     313          <select name="foo">
     314            <option value="1">foo $x</option>
     315          </select>
     316        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
     317        self.assertEquals(u"""<form>
     318          <select name="foo">
     319            <option value="1" selected="selected">foo 1</option>
     320          </select>
     321        </form>""", html.render())
     322
     323    def test_fill_option_segmented_text_no_value(self):
     324        html = MarkupTemplate("""<form>
     325          <select name="foo">
     326            <option>foo $x bar</option>
     327          </select>
     328        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
     329        self.assertEquals("""<form>
     330          <select name="foo">
     331            <option selected="selected">foo 1 bar</option>
     332          </select>
     333        </form>""", html.render())
     334
     335    def test_fill_option_unicode_value(self):
     336        html = HTML(u"""<form>
     337          <select name="foo">
     338            <option value="&ouml;">foo</option>
     339          </select>
     340        </form>""") | HTMLFormFiller(data={'foo': u'ö'})
     341        self.assertEquals(u"""<form>
     342          <select name="foo">
     343            <option value="ö" selected="selected">foo</option>
     344          </select>
     345        </form>""", html.render(encoding=None))
     346
     347    def test_fill_input_password_disabled(self):
     348        html = HTML(u"""<form><p>
     349          <input type="password" name="pass" />
     350        </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
     351        self.assertEquals("""<form><p>
     352          <input type="password" name="pass"/>
     353        </p></form>""", html.render())
     354
     355    def test_fill_input_password_enabled(self):
     356        html = HTML(u"""<form><p>
     357          <input type="password" name="pass" />
     358        </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
     359        self.assertEquals("""<form><p>
     360          <input type="password" name="pass" value="1234"/>
     361        </p></form>""", html.render())
     362
     363
     364class HTMLSanitizerTestCase(unittest.TestCase):
     365
     366    def test_sanitize_unchanged(self):
     367        html = HTML(u'<a href="#">fo<br />o</a>')
     368        self.assertEquals('<a href="#">fo<br/>o</a>',
     369                          (html | HTMLSanitizer()).render())
     370        html = HTML(u'<a href="#with:colon">foo</a>')
     371        self.assertEquals('<a href="#with:colon">foo</a>',
     372                          (html | HTMLSanitizer()).render())
     373
     374    def test_sanitize_escape_text(self):
     375        html = HTML(u'<a href="#">fo&amp;</a>')
     376        self.assertEquals('<a href="#">fo&amp;</a>',
     377                          (html | HTMLSanitizer()).render())
     378        html = HTML(u'<a href="#">&lt;foo&gt;</a>')
     379        self.assertEquals('<a href="#">&lt;foo&gt;</a>',
     380                          (html | HTMLSanitizer()).render())
     381
     382    def test_sanitize_entityref_text(self):
     383        html = HTML(u'<a href="#">fo&ouml;</a>')
     384        self.assertEquals(u'<a href="#">foö</a>',
     385                          (html | HTMLSanitizer()).render(encoding=None))
     386
     387    def test_sanitize_escape_attr(self):
     388        html = HTML(u'<div title="&lt;foo&gt;"></div>')
     389        self.assertEquals('<div title="&lt;foo&gt;"/>',
     390                          (html | HTMLSanitizer()).render())
     391
     392    def test_sanitize_close_empty_tag(self):
     393        html = HTML(u'<a href="#">fo<br>o</a>')
     394        self.assertEquals('<a href="#">fo<br/>o</a>',
     395                          (html | HTMLSanitizer()).render())
     396
     397    def test_sanitize_invalid_entity(self):
     398        html = HTML(u'&junk;')
     399        self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
     400
     401    def test_sanitize_remove_script_elem(self):
     402        html = HTML(u'<script>alert("Foo")</script>')
     403        self.assertEquals('', (html | HTMLSanitizer()).render())
     404        html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>')
     405        self.assertEquals('', (html | HTMLSanitizer()).render())
     406        self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>')
     407        self.assertRaises(ParseError, HTML,
     408                          u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
     409
     410    def test_sanitize_remove_onclick_attr(self):
     411        html = HTML(u'<div onclick=\'alert("foo")\' />')
     412        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
     413
     414    def test_sanitize_remove_input_password(self):
     415        html = HTML(u'<form><input type="password" /></form>')
     416        self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
     417
     418    def test_sanitize_remove_comments(self):
     419        html = HTML(u'''<div><!-- conditional comment crap --></div>''')
     420        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
     421
     422    def test_sanitize_remove_style_scripts(self):
     423        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
     424        # Inline style with url() using javascript: scheme
     425        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
     426        self.assertEquals('<div/>', (html | sanitizer).render())
     427        # Inline style with url() using javascript: scheme, using control char
     428        html = HTML(u'<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
     429        self.assertEquals('<div/>', (html | sanitizer).render())
     430        # Inline style with url() using javascript: scheme, in quotes
     431        html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
     432        self.assertEquals('<div/>', (html | sanitizer).render())
     433        # IE expressions in CSS not allowed
     434        html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>')
     435        self.assertEquals('<div/>', (html | sanitizer).render())
     436        html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
     437        self.assertEquals('<div/>', (html | sanitizer).render())
     438        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));'
     439                                 'color: #fff\'>')
     440        self.assertEquals('<div style="color: #fff"/>',
     441                          (html | sanitizer).render())
     442        # Inline style with url() using javascript: scheme, using unicode
     443        # escapes
     444        html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
     445        self.assertEquals('<div/>', (html | sanitizer).render())
     446        html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
     447        self.assertEquals('<div/>', (html | sanitizer).render())
     448        html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
     449        self.assertEquals('<div/>', (html | sanitizer).render())
     450        html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
     451        self.assertEquals('<div/>', (html | sanitizer).render())
     452        html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
     453        self.assertEquals('<div/>', (html | sanitizer).render())
     454
     455    def test_sanitize_remove_style_phishing(self):
     456        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
     457        # The position property is not allowed
     458        html = HTML(u'<div style="position:absolute;top:0"></div>')
     459        self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
     460        # Normal margins get passed through
     461        html = HTML(u'<div style="margin:10px 20px"></div>')
     462        self.assertEquals('<div style="margin:10px 20px"/>',
     463                          (html | sanitizer).render())
     464        # But not negative margins
     465        html = HTML(u'<div style="margin:-1000px 0 0"></div>')
     466        self.assertEquals('<div/>', (html | sanitizer).render())
     467        html = HTML(u'<div style="margin-left:-2000px 0 0"></div>')
     468        self.assertEquals('<div/>', (html | sanitizer).render())
     469        html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>')
     470        self.assertEquals('<div/>', (html | sanitizer).render())
     471
     472    def test_sanitize_remove_src_javascript(self):
     473        html = HTML(u'<img src=\'javascript:alert("foo")\'>')
     474        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     475        # Case-insensitive protocol matching
     476        html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
     477        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     478        # Grave accents (not parsed)
     479        self.assertRaises(ParseError, HTML,
     480                          u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
     481        # Protocol encoded using UTF-8 numeric entities
     482        html = HTML(u'<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
     483                    '&#112;&#116;&#58;alert("foo")\'>')
     484        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     485        # Protocol encoded using UTF-8 numeric entities without a semicolon
     486        # (which is allowed because the max number of digits is used)
     487        html = HTML(u'<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
     488                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
     489                    '&#0000058alert("foo")\'>')
     490        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     491        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
     492        # (which is allowed because the max number of digits is used)
     493        html = HTML(u'<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
     494                    '&#x70&#x74&#x3A;alert("foo")\'>')
     495        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     496        # Embedded tab character in protocol
     497        html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>')
     498        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     499        # Embedded tab character in protocol, but encoded this time
     500        html = HTML(u'<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
     501        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
     502
     503
     504def suite():
     505    suite = unittest.TestSuite()
     506    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
     507    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
     508    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
     509    return suite
     510
     511
     512if __name__ == '__main__':
     513    unittest.main(defaultTest='suite')
  • genshi/filters/tests/transform.py

    diff -r 4bbd2b021cb5 genshi/filters/tests/transform.py
    a b  
    4848
    4949def _transform(html, transformer, with_attrs=False):
    5050    """Apply transformation returning simplified marked stream."""
    51     if isinstance(html, basestring):
    52         html = HTML(html)
     51    if isinstance(html, basestring) and not isinstance(html, unicode):
     52        html = HTML(html, encoding='utf-8')
     53    elif isinstance(html, unicode):
     54        html = HTML(html, encoding='utf-8')
    5355    stream = transformer(html, keep_marks=True)
    5456    return _simplify(stream, with_attrs)
    5557
     
    5759class SelectTest(unittest.TestCase):
    5860    """Test .select()"""
    5961    def _select(self, select):
    60         html = HTML(FOOBAR)
     62        html = HTML(FOOBAR, encoding='utf-8')
    6163        if isinstance(select, basestring):
    6264            select = [select]
    6365        transformer = Transformer(select[0])
     
    138140
    139141    def test_select_text_context(self):
    140142        self.assertEqual(
    141             list(Transformer('.')(HTML('foo'), keep_marks=True)),
     143            list(Transformer('.')(HTML(u'foo'), keep_marks=True)),
    142144            [('OUTSIDE', ('TEXT', u'foo', (None, 1, 0)))],
    143145            )
    144146
     
    205207
    206208    def test_invert_text_context(self):
    207209        self.assertEqual(
    208             _simplify(Transformer('.').invert()(HTML('foo'), keep_marks=True)),
     210            _simplify(Transformer('.').invert()(HTML(u'foo'), keep_marks=True)),
    209211            [(None, 'TEXT', u'foo')],
    210212            )
    211213
     
    271273
    272274    def test_empty_text_context(self):
    273275        self.assertEqual(
    274             _simplify(Transformer('.')(HTML('foo'), keep_marks=True)),
     276            _simplify(Transformer('.')(HTML(u'foo'), keep_marks=True)),
    275277            [(OUTSIDE, TEXT, u'foo')],
    276278            )
    277279
     
    656658
    657659            def __iter__(self):
    658660                self.count += 1
    659                 return iter(HTML('CONTENT %i' % self.count))
     661                return iter(HTML(u'CONTENT %i' % self.count))
    660662
    661         if isinstance(html, basestring):
     663        if isinstance(html, basestring) and not isinstance(html, unicode):
     664            html = HTML(html, encoding='utf-8')
     665        else:
    662666            html = HTML(html)
    663667        if content is None:
    664668            content = Injector()
  • genshi/filters/transform.py

    diff -r 4bbd2b021cb5 genshi/filters/transform.py
    a b  
    3131...  <body>
    3232...    Some <em>body</em> text.
    3333...  </body>
    34 ... </html>''')
     34... </html>''',
     35... encoding='utf-8')
    3536>>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
    3637...                                    .unwrap().wrap(tag.u))
    3738<html>
     
    136137    mark.
    137138
    138139    >>> html = HTML('<html><head><title>Some Title</title></head>'
    139     ...             '<body>Some <em>body</em> text.</body></html>')
     140    ...             '<body>Some <em>body</em> text.</body></html>',
     141    ...             encoding='utf-8')
    140142
    141143    Transformations act on selected stream events matching an XPath expression.
    142144    Here's an example of removing some markup (the title, in this case)
     
    215217        ...             yield mark, (kind, data.upper(), pos)
    216218        ...         else:
    217219        ...             yield mark, (kind, data, pos)
    218         >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
     220        >>> short_stream = HTML('<body>Some <em>test</em> text</body>',
     221        ...                      encoding='utf-8')
    219222        >>> print(short_stream | Transformer('.//em/text()').apply(upper))
    220223        <body>Some <em>TEST</em> text</body>
    221224        """
     
    233236        """Mark events matching the given XPath expression, within the current
    234237        selection.
    235238
    236         >>> html = HTML('<body>Some <em>test</em> text</body>')
     239        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
    237240        >>> print(html | Transformer().select('.//em').trace())
    238241        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
    239242        (None, ('TEXT', u'Some ', (None, 1, 6)))
     
    257260        Specificaly, all marks are converted to null marks, and all null marks
    258261        are converted to OUTSIDE marks.
    259262
    260         >>> html = HTML('<body>Some <em>test</em> text</body>')
     263        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
    261264        >>> print(html | Transformer('//em').invert().trace())
    262265        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
    263266        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
     
    277280
    278281        Example:
    279282
    280         >>> html = HTML('<body>Some <em>test</em> text</body>')
     283        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
    281284        >>> print(html | Transformer('//em').end().trace())
    282285        ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
    283286        ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
     
    301304        Example:
    302305
    303306        >>> html = HTML('<html><head><title>Some Title</title></head>'
    304         ...             '<body>Some <em>body</em> text.</body></html>')
     307        ...             '<body>Some <em>body</em> text.</body></html>',
     308        ...             encoding='utf-8')
    305309        >>> print(html | Transformer('.//em').empty())
    306310        <html><head><title>Some Title</title></head><body>Some <em/>
    307311        text.</body></html>
     
    316320        Example:
    317321
    318322        >>> html = HTML('<html><head><title>Some Title</title></head>'
    319         ...             '<body>Some <em>body</em> text.</body></html>')
     323        ...             '<body>Some <em>body</em> text.</body></html>',
     324        ...             encoding='utf-8')
    320325        >>> print(html | Transformer('.//em').remove())
    321326        <html><head><title>Some Title</title></head><body>Some
    322327        text.</body></html>
     
    333338        Example:
    334339
    335340        >>> html = HTML('<html><head><title>Some Title</title></head>'
    336         ...             '<body>Some <em>body</em> text.</body></html>')
     341        ...             '<body>Some <em>body</em> text.</body></html>',
     342        ...             encoding='utf-8')
    337343        >>> print(html | Transformer('.//em').unwrap())
    338344        <html><head><title>Some Title</title></head><body>Some body
    339345        text.</body></html>
     
    346352        """Wrap selection in an element.
    347353
    348354        >>> html = HTML('<html><head><title>Some Title</title></head>'
    349         ...             '<body>Some <em>body</em> text.</body></html>')
     355        ...             '<body>Some <em>body</em> text.</body></html>',
     356        ...             encoding='utf-8')
    350357        >>> print(html | Transformer('.//em').wrap('strong'))
    351358        <html><head><title>Some Title</title></head><body>Some
    352359        <strong><em>body</em></strong> text.</body></html>
     
    362369        """Replace selection with content.
    363370
    364371        >>> html = HTML('<html><head><title>Some Title</title></head>'
    365         ...             '<body>Some <em>body</em> text.</body></html>')
     372        ...             '<body>Some <em>body</em> text.</body></html>',
     373        ...             encoding='utf-8')
    366374        >>> print(html | Transformer('.//title/text()').replace('New Title'))
    367375        <html><head><title>New Title</title></head><body>Some <em>body</em>
    368376        text.</body></html>
     
    380388        tag:
    381389
    382390        >>> html = HTML('<html><head><title>Some Title</title></head>'
    383         ...             '<body>Some <em>body</em> text.</body></html>')
     391        ...             '<body>Some <em>body</em> text.</body></html>',
     392        ...             encoding='utf-8')
    384393        >>> print(html | Transformer('.//em').before('emphasised '))
    385394        <html><head><title>Some Title</title></head><body>Some emphasised
    386395        <em>body</em> text.</body></html>
     
    397406        Here, we insert some text after the </em> closing tag:
    398407
    399408        >>> html = HTML('<html><head><title>Some Title</title></head>'
    400         ...             '<body>Some <em>body</em> text.</body></html>')
     409        ...             '<body>Some <em>body</em> text.</body></html>',
     410        ...             encoding='utf-8')
    401411        >>> print(html | Transformer('.//em').after(' rock'))
    402412        <html><head><title>Some Title</title></head><body>Some <em>body</em>
    403413        rock text.</body></html>
     
    414424        Inserting some new text at the start of the <body>:
    415425
    416426        >>> html = HTML('<html><head><title>Some Title</title></head>'
    417         ...             '<body>Some <em>body</em> text.</body></html>')
     427        ...             '<body>Some <em>body</em> text.</body></html>',
     428        ...             encoding='utf-8')
    418429        >>> print(html | Transformer('.//body').prepend('Some new body text. '))
    419430        <html><head><title>Some Title</title></head><body>Some new body text.
    420431        Some <em>body</em> text.</body></html>
     
    429440        """Insert content before the END event of the selection.
    430441
    431442        >>> html = HTML('<html><head><title>Some Title</title></head>'
    432         ...             '<body>Some <em>body</em> text.</body></html>')
     443        ...             '<body>Some <em>body</em> text.</body></html>',
     444        ...             encoding='utf-8')
    433445        >>> print(html | Transformer('.//body').append(' Some new body text.'))
    434446        <html><head><title>Some Title</title></head><body>Some <em>body</em>
    435447        text. Some new body text.</body></html>
     
    450462
    451463        >>> html = HTML('<html><head><title>Some Title</title></head>'
    452464        ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
    453         ...             '</html>')
     465        ...             '</html>', encoding='utf-8')
    454466        >>> print(html | Transformer('body/em').attr('class', None))
    455467        <html><head><title>Some Title</title></head><body>Some <em>body</em>
    456468        <em>text</em>.</body></html>
     
    493505        >>> from genshi.builder import tag
    494506        >>> buffer = StreamBuffer()
    495507        >>> html = HTML('<html><head><title>Some Title</title></head>'
    496         ...             '<body>Some <em>body</em> text.</body></html>')
     508        ...             '<body>Some <em>body</em> text.</body></html>',
     509        ...             encoding='utf-8')
    497510        >>> print(html | Transformer('head/title/text()').copy(buffer)
    498511        ...     .end().select('body').prepend(tag.h1(buffer)))
    499512        <html><head><title>Some Title</title></head><body><h1>Some
     
    514527
    515528        >>> html = HTML('<html><head><title>Some Title</title></head>'
    516529        ...             '<body><em>Some</em> <em class="before">body</em>'
    517         ...             '<em>text</em>.</body></html>')
     530        ...             '<em>text</em>.</body></html>',
     531        ...             encoding='utf-8')
    518532        >>> buffer = StreamBuffer()
    519533        >>> def apply_attr(name, entry):
    520534        ...     return list(buffer)[0][1][1].get('class')
     
    546560        >>> from genshi.builder import tag
    547561        >>> buffer = StreamBuffer()
    548562        >>> html = HTML('<html><head><title>Some Title</title></head>'
    549         ...             '<body>Some <em>body</em> text.</body></html>')
     563        ...             '<body>Some <em>body</em> text.</body></html>',
     564        ...             encoding='utf-8')
    550565        >>> print(html | Transformer('.//em/text()').cut(buffer)
    551566        ...     .end().select('.//em').after(tag.h1(buffer)))
    552567        <html><head><title>Some Title</title></head><body>Some
     
    577592        top of the document:
    578593
    579594        >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
    580         ...            'text <note>two</note>.</body></doc>')
     595        ...            'text <note>two</note>.</body></doc>',
     596        ...             encoding='utf-8')
    581597        >>> buffer = StreamBuffer()
    582598        >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
    583599        ...     .end().buffer().select('notes').prepend(buffer))
     
    595611
    596612        >>> from genshi.filters.html import HTMLSanitizer
    597613        >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
    598         ...             '</script> and some more text</body></html>')
     614        ...             '</script> and some more text</body></html>',
     615        ...             encoding='utf-8')
    599616        >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
    600617        <html><body>Some text and some more text</body></html>
    601618
     
    609626        the selection.
    610627
    611628        >>> html = HTML('<html><head><title>Some Title</title></head>'
    612         ...               '<body>Some <em>body</em> text.</body></html>')
     629        ...               '<body>Some <em>body</em> text.</body></html>',
     630        ...             encoding='utf-8')
    613631        >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
    614632        <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
    615633        text.</body></html>
     
    627645
    628646        >>> html = HTML('<html><body>Some text, some more text and '
    629647        ...             '<b>some bold text</b>\\n'
    630         ...             '<i>some italicised text</i></body></html>')
     648        ...             '<i>some italicised text</i></body></html>',
     649        ...             encoding='utf-8')
    631650        >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
    632651        <html><body>Some text, some more text and <b>SOME bold text</b>
    633652        <i>some italicised text</i></body></html>
     
    649668        """Rename matching elements.
    650669
    651670        >>> html = HTML('<html><body>Some text, some more text and '
    652         ...             '<b>some bold text</b></body></html>')
     671        ...             '<b>some bold text</b></body></html>',
     672        ...             encoding='utf-8')
    653673        >>> print(html | Transformer('body/b').rename('strong'))
    654674        <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
    655675        """
     
    658678    def trace(self, prefix='', fileobj=None):
    659679        """Print events as they pass through the transform.
    660680
    661         >>> html = HTML('<body>Some <em>test</em> text</body>')
     681        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
    662682        >>> print(html | Transformer('em').trace())
    663683        (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
    664684        (None, ('TEXT', u'Some ', (None, 1, 6)))
     
    10241044    ...             yield event
    10251045    ...         for event in stream:
    10261046    ...             yield event
    1027     >>> html = HTML('<body>Some <em>test</em> text</body>')
     1047    >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
    10281048    >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
    10291049    Prefix <body>Some <em>test</em> text</body>
    10301050    """
  • genshi/input.py

    diff -r 4bbd2b021cb5 genshi/input.py
    a b  
    1818from itertools import chain
    1919import htmlentitydefs as entities
    2020import HTMLParser as html
    21 from StringIO import StringIO
    2221from xml.parsers import expat
    2322
    2423from genshi.core import Attrs, QName, Stream, stripentities
    2524from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \
    2625                        END_NS, START_CDATA, END_CDATA, PI, COMMENT
     26from genshi.compat import StringIO, BytesIO
     27
    2728
    2829__all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
    2930__docformat__ = 'restructuredtext en'
     
    9091
    9192    _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
    9293                   entities.name2codepoint.items()]
    93     _external_dtd = '\n'.join(_entitydefs)
     94    _external_dtd = u'\n'.join(_entitydefs).encode('utf-8')
    9495
    9596    def __init__(self, source, filename=None, encoding=None):
    9697        """Initialize the parser for the given XML input.
     
    108109        # Setup the Expat parser
    109110        parser = expat.ParserCreate(encoding, '}')
    110111        parser.buffer_text = True
    111         parser.returns_unicode = True
     112        # Python 3 does not have returns_unicode
     113        if hasattr(parser, 'returns_unicode'):
     114            parser.returns_unicode = True
    112115        parser.ordered_attributes = True
    113116
    114117        parser.StartElementHandler = self._handle_start
     
    146149                while 1:
    147150                    while not done and len(self._queue) == 0:
    148151                        data = self.source.read(bufsize)
    149                         if data == '': # end of data
     152                        if not data: # end of data
    150153                            if hasattr(self, 'expat'):
    151154                                self.expat.Parse('', True)
    152155                                del self.expat # get rid of circular references
     
    170173
    171174    def _build_foreign(self, context, base, sysid, pubid):
    172175        parser = self.expat.ExternalEntityParserCreate(context)
    173         parser.ParseFile(StringIO(self._external_dtd))
     176        parser.ParseFile(BytesIO(self._external_dtd))
    174177        return 1
    175178
    176179    def _enqueue(self, kind, data=None, pos=None):
     
    279282   
    280283    The parsing is initiated by iterating over the parser object:
    281284   
    282     >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
     285    >>> parser = HTMLParser(BytesIO(u'<UL compact><LI>Foo</UL>'.encode('utf-8')), encoding='utf-8')
    283286    >>> for kind, data, pos in parser:
    284287    ...     print('%s %s' % (kind, data))
    285288    START (QName('ul'), Attrs([(QName('compact'), u'compact')]))
     
    293296                              'hr', 'img', 'input', 'isindex', 'link', 'meta',
    294297                              'param'])
    295298
    296     def __init__(self, source, filename=None, encoding='utf-8'):
     299    def __init__(self, source, filename=None, encoding=None):
    297300        """Initialize the parser for the given HTML input.
    298301       
    299302        :param source: the HTML text as a file-like object
     
    320323                while 1:
    321324                    while not done and len(self._queue) == 0:
    322325                        data = self.source.read(bufsize)
    323                         if data == '': # end of data
     326                        if not data: # end of data
    324327                            self.close()
    325328                            done = True
    326329                        else:
     330                            if not isinstance(data, unicode):
     331                                # bytes
     332                                if self.encoding:
     333                                    data = data.decode(self.encoding)
     334                                else:
     335                                    raise UnicodeError("source returned bytes, but no encoding specified")
    327336                            self.feed(data)
    328337                    for kind, data, pos in self._queue:
    329338                        yield kind, data, pos
     
    403412        self._enqueue(COMMENT, text)
    404413
    405414
    406 def HTML(text, encoding='utf-8'):
     415def HTML(text, encoding=None):
    407416    """Parse the given HTML source and return a markup stream.
    408417   
    409418    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
    410419    iterated over multiple times:
    411420   
    412     >>> html = HTML('<body><h1>Foo</h1></body>')
     421    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    413422    >>> print(html)
    414423    <body><h1>Foo</h1></body>
    415424    >>> print(html.select('h1'))
     
    422431    :raises ParseError: if the HTML text is not well-formed, and error recovery
    423432                        fails
    424433    """
    425     return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
     434    if isinstance(text, unicode):
     435        return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
     436    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
    426437
    427438
    428439def _coalesce(stream):
  • genshi/output.py

    diff -r 4bbd2b021cb5 genshi/output.py
    a b  
    2727__docformat__ = 'restructuredtext en'
    2828
    2929
    30 def encode(iterator, method='xml', encoding='utf-8', out=None):
     30def encode(iterator, method='xml', encoding=None, out=None):
    3131    """Encode serializer output into a string.
    3232   
    3333    :param iterator: the iterator returned from serializing a stream (basically
  • genshi/template/astutil.py

    diff -r 4bbd2b021cb5 genshi/template/astutil.py
    a b  
    2121    def parse(source, mode):
    2222        return compile(source, '', mode, _ast.PyCF_ONLY_AST)
    2323
     24from genshi.compat import IS_PYTHON2
    2425
    2526__docformat__ = 'restructuredtext en'
    2627
     
    129130                first = False
    130131            self._write('**' + node.kwarg)
    131132
     133    if not IS_PYTHON2:
     134        # In Python 3 arguments get a special node
     135        def visit_arg(self, node):
     136            self._write(node.arg)
     137
    132138    # FunctionDef(identifier name, arguments args,
    133139    #                           stmt* body, expr* decorator_list)
    134140    def visit_FunctionDef(self, node):
     
    289295        self._change_indent(-1)
    290296
    291297
    292     # Raise(expr? type, expr? inst, expr? tback)
    293     def visit_Raise(self, node):
    294         self._new_line()
    295         self._write('raise')
    296         if not node.type:
    297             return
    298         self._write(' ')
    299         self.visit(node.type)
    300         if not node.inst:
    301             return
    302         self._write(', ')
    303         self.visit(node.inst)
    304         if not node.tback:
    305             return
    306         self._write(', ')
    307         self.visit(node.tback)
     298    if IS_PYTHON2:
     299        # Raise(expr? type, expr? inst, expr? tback)
     300        def visit_Raise(self, node):
     301            self._new_line()
     302            self._write('raise')
     303            if not node.type:
     304                return
     305            self._write(' ')
     306            self.visit(node.type)
     307            if not node.inst:
     308                return
     309            self._write(', ')
     310            self.visit(node.inst)
     311            if not node.tback:
     312                return
     313            self._write(', ')
     314            self.visit(node.tback)
     315    else:
     316        # Raise(expr? exc from expr? cause)
     317        def visit_Raise(self, node):
     318            self._new_line()
     319            self._write('raise')
     320            if not node.exc:
     321                return
     322            self._write(' ')
     323            self.visit(node.exc)
     324            if not node.cause:
     325                return
     326            self._write(' from ')
     327            self.visit(node.cause)
    308328
    309329    # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
    310330    def visit_TryExcept(self, node):
     
    626646    def visit_Str(self, node):
    627647        self._write(repr(node.s))
    628648
     649    if not IS_PYTHON2:
     650        # Bytes(bytes s)
     651        def visit_Bytes(self, node):
     652            self._write(repr(node.s))
     653
    629654    # Attribute(expr value, identifier attr, expr_context ctx)
    630655    def visit_Attribute(self, node):
    631656        self.visit(node.value)
  • genshi/template/base.py

    diff -r 4bbd2b021cb5 genshi/template/base.py
    a b  
    1515
    1616from collections import deque
    1717import os
    18 from StringIO import StringIO
    1918import sys
    2019
     20from genshi.compat import StringIO, BytesIO
    2121from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
    2222from genshi.input import ParseError
    2323
     
    398398        self._init_loader()
    399399        self._prepared = False
    400400
    401         if isinstance(source, basestring):
    402             source = StringIO(source)
    403         else:
    404             source = source
     401        if not isinstance(source, Stream) and not hasattr(source, 'read'):
     402            if isinstance(source, unicode):
     403                source = StringIO(source)
     404            else:
     405                source = BytesIO(source)
    405406        try:
    406407            self._stream = self._parse(source, encoding)
    407408        except ParseError, e:
  • genshi/template/directives.py

    diff -r 4bbd2b021cb5 genshi/template/directives.py
    a b  
    622622        if not info:
    623623            raise TemplateRuntimeError('"when" directives can only be used '
    624624                                       'inside a "choose" directive',
    625                                        self.filename, *stream.next()[2][1:])
     625                                       self.filename, *(stream.next())[2][1:])
    626626        if info[0]:
    627627            return []
    628628        if not self.expr and not info[1]:
    629629            raise TemplateRuntimeError('either "choose" or "when" directive '
    630630                                       'must have a test expression',
    631                                        self.filename, *stream.next()[2][1:])
     631                                       self.filename, *(stream.next())[2][1:])
    632632        if info[1]:
    633633            value = info[2]
    634634            if self.expr:
     
    661661        if not info:
    662662            raise TemplateRuntimeError('an "otherwise" directive can only be '
    663663                                       'used inside a "choose" directive',
    664                                        self.filename, *stream.next()[2][1:])
     664                                       self.filename, *(stream.next())[2][1:])
    665665        if info[0]:
    666666            return []
    667667        info[0] = True
  • genshi/template/eval.py

    diff -r 4bbd2b021cb5 genshi/template/eval.py
    a b  
    2424from genshi.template.base import TemplateRuntimeError
    2525from genshi.util import flatten
    2626
     27from genshi.compat import get_code_params, build_code_chunk, IS_PYTHON2
     28
    2729__all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup',
    2830           'Undefined', 'UndefinedError']
    2931__docformat__ = 'restructuredtext en'
     
    98100    def __getstate__(self):
    99101        state = {'source': self.source, 'ast': self.ast,
    100102                 'lookup': self._globals.im_self}
    101         c = self.code
    102         state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code,
    103                          c.co_consts, c.co_names, c.co_varnames, c.co_filename,
    104                          c.co_name, c.co_firstlineno, c.co_lnotab, (), ())
     103        state['code'] = get_code_params(self.code)
    105104        return state
    106105
    107106    def __setstate__(self, state):
     
    236235    of that variable, will raise an exception that includes the name used to
    237236    reference that undefined variable.
    238237   
    239     >>> foo('bar')
    240     Traceback (most recent call last):
    241         ...
    242     UndefinedError: "foo" not defined
     238    >>> try:
     239    ...     foo('bar')
     240    ... except UndefinedError, e:
     241    ...     print e.msg
     242    "foo" not defined
    243243
    244     >>> foo.bar
    245     Traceback (most recent call last):
    246         ...
    247     UndefinedError: "foo" not defined
     244    >>> try:
     245    ...     foo.bar
     246    ... except UndefinedError, e:
     247    ...     print e.msg
     248    "foo" not defined
    248249   
    249250    :see: `LenientLookup`
    250251    """
     
    388389    raise an ``UndefinedError``:
    389390   
    390391    >>> expr = Expression('nothing', lookup='strict')
    391     >>> expr.evaluate({})
    392     Traceback (most recent call last):
    393         ...
    394     UndefinedError: "nothing" not defined
     392    >>> try:
     393    ...     expr.evaluate({})
     394    ... except UndefinedError, e:
     395    ...     print e.msg
     396    "nothing" not defined
    395397   
    396398    The same happens when a non-existing attribute or item is accessed on an
    397399    existing object:
    398400   
    399401    >>> expr = Expression('something.nil', lookup='strict')
    400     >>> expr.evaluate({'something': dict()})
    401     Traceback (most recent call last):
    402         ...
    403     UndefinedError: {} has no member named "nil"
     402    >>> try:
     403    ...     expr.evaluate({'something': dict()})
     404    ... except UndefinedError, e:
     405    ...     print e.msg
     406    {} has no member named "nil"
    404407    """
    405408
    406409    @classmethod
     
    421424                rest = '\n'.join(['    %s' % line for line in rest.splitlines()])
    422425            source = '\n'.join([first, rest])
    423426    if isinstance(source, unicode):
    424         source = '\xef\xbb\xbf' + source.encode('utf-8')
     427        source = (u'\ufeff' + source).encode('utf-8')
    425428    return parse(source, mode)
    426429
    427430
    428431def _compile(node, source=None, mode='eval', filename=None, lineno=-1,
    429432             xform=None):
    430     if isinstance(filename, unicode):
    431         # unicode file names not allowed for code objects
    432         filename = filename.encode('utf-8', 'replace')
    433     elif not filename:
     433    if not filename:
    434434        filename = '<string>'
     435    if IS_PYTHON2:
     436        # Python 2 requires non-unicode filenames
     437        if isinstance(filename, unicode):
     438            filename = filename.encode('utf-8', 'replace')
     439    else:
     440        # Python 3 requires unicode filenames
     441        if not isinstance(filename, unicode):
     442            filename = filename.decode('utf-8', 'replace')
    435443    if lineno <= 0:
    436444        lineno = 1
    437445
     
    458466    try:
    459467        # We'd like to just set co_firstlineno, but it's readonly. So we need
    460468        # to clone the code object while adjusting the line number
    461         return CodeType(0, code.co_nlocals, code.co_stacksize,
    462                         code.co_flags | 0x0040, code.co_code, code.co_consts,
    463                         code.co_names, code.co_varnames, filename, name,
    464                         lineno, code.co_lnotab, (), ())
     469        return build_code_chunk(code, filename, name, lineno)
    465470    except RuntimeError:
    466471        return code
    467472
     
    493498    def _extract_names(self, node):
    494499        names = set()
    495500        def _process(node):
     501            if not IS_PYTHON2 and isinstance(node, _ast.arg):
     502                names.add(node.arg)
    496503            if isinstance(node, _ast.Name):
    497504                names.add(node.id)
    498505            elif isinstance(node, _ast.alias):
     
    513520        return names
    514521
    515522    def visit_Str(self, node):
    516         if isinstance(node.s, str):
     523        if not isinstance(node.s, unicode):
    517524            try: # If the string is ASCII, return a `str` object
    518525                node.s.decode('ascii')
    519526            except ValueError: # Otherwise return a `unicode` object
  • genshi/template/loader.py

    diff -r 4bbd2b021cb5 genshi/template/loader.py
    a b  
    4646   
    4747    >>> import tempfile
    4848    >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template')
    49     >>> os.write(fd, '<p>$var</p>')
     49    >>> os.write(fd, u'<p>$var</p>'.encode('utf-8'))
    5050    11
    5151    >>> os.close(fd)
    5252   
     
    283283        """
    284284        def _load_from_directory(filename):
    285285            filepath = os.path.join(path, filename)
    286             fileobj = open(filepath, 'U')
     286            fileobj = open(filepath, 'rbU')
    287287            mtime = os.path.getmtime(filepath)
    288288            def _uptodate():
    289289                return mtime == os.path.getmtime(filepath)
  • genshi/template/plugin.py

    diff -r 4bbd2b021cb5 genshi/template/plugin.py
    a b  
    4444            options = {}
    4545        self.options = options
    4646
    47         self.default_encoding = options.get('genshi.default_encoding', 'utf-8')
     47        self.default_encoding = options.get('genshi.default_encoding', None)
    4848        auto_reload = options.get('genshi.auto_reload', '1')
    4949        if isinstance(auto_reload, basestring):
    5050            auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true')
  • genshi/template/tests/directives.py

    diff -r 4bbd2b021cb5 genshi/template/tests/directives.py
    a b  
    11371137          <py:with vars="x = x * 2; y = x / 2;">${x} ${y}</py:with>
    11381138        </div>""")
    11391139        self.assertEqual("""<div>
    1140           84 42
    1141         </div>""", tmpl.generate(x=42).render(encoding=None))
     1140          84 %s
     1141        </div>""" % (84 / 2), tmpl.generate(x=42).render(encoding=None))
    11421142
    11431143    def test_semicolon_escape(self):
    11441144        tmpl = MarkupTemplate("""<div xmlns:py="http://genshi.edgewall.org/">
  • genshi/template/tests/eval.py

    diff -r 4bbd2b021cb5 genshi/template/tests/eval.py
    a b  
    1414import doctest
    1515import os
    1616import pickle
    17 from StringIO import StringIO
    1817import sys
    1918from tempfile import mkstemp
    2019import unittest
     
    2322from genshi.template.base import Context
    2423from genshi.template.eval import Expression, Suite, Undefined, UndefinedError, \
    2524                                 UNDEFINED
     25from genshi.compat import BytesIO, IS_PYTHON2, wrapped_bytes
    2626
    2727
    2828class ExpressionTestCase(unittest.TestCase):
     
    3939
    4040    def test_pickle(self):
    4141        expr = Expression('1 < 2')
    42         buf = StringIO()
     42        buf = BytesIO()
    4343        pickle.dump(expr, buf, 2)
    4444        buf.seek(0)
    4545        unpickled = pickle.load(buf)
     
    5858    def test_str_literal(self):
    5959        self.assertEqual('foo', Expression('"foo"').evaluate({}))
    6060        self.assertEqual('foo', Expression('"""foo"""').evaluate({}))
    61         self.assertEqual('foo', Expression("'foo'").evaluate({}))
     61        self.assertEqual(u'foo'.encode('utf-8'),
     62                         Expression(wrapped_bytes("b'foo'")).evaluate({}))
    6263        self.assertEqual('foo', Expression("'''foo'''").evaluate({}))
    6364        self.assertEqual('foo', Expression("u'foo'").evaluate({}))
    6465        self.assertEqual('foo', Expression("r'foo'").evaluate({}))
     
    6869        self.assertEqual(u'ß', expr.evaluate({}))
    6970        expr = Expression("u'\xfe'")
    7071        self.assertEqual(u'ß', expr.evaluate({}))
    71         expr = Expression("'\xc3\xbe'")
    72         self.assertEqual(u'ß', expr.evaluate({}))
     72        # On Python2 strings are converted to unicode if they contained
     73        # non-ASCII characters.
     74        # On Py3k, we have no need to do this as non-prefixed strings aren't
     75        # raw.
     76        expr = Expression(wrapped_bytes(r"b'\xc3\xbe'"))
     77        if IS_PYTHON2:
     78            self.assertEqual(u'ß', expr.evaluate({}))
     79        else:
     80            self.assertEqual(u'ß'.encode('utf-8'), expr.evaluate({}))
    7381
    7482    def test_num_literal(self):
    7583        self.assertEqual(42, Expression("42").evaluate({}))
    76         self.assertEqual(42L, Expression("42L").evaluate({}))
     84        if IS_PYTHON2:
     85            self.assertEqual(42L, Expression("42L").evaluate({}))
    7786        self.assertEqual(.42, Expression(".42").evaluate({}))
    78         self.assertEqual(07, Expression("07").evaluate({}))
     87        if IS_PYTHON2:
     88            self.assertEqual(07, Expression("07").evaluate({}))
    7989        self.assertEqual(0xF2, Expression("0xF2").evaluate({}))
    8090        self.assertEqual(0XF2, Expression("0XF2").evaluate({}))
    8191
     
    246256    def test_lambda(self):
    247257        data = {'items': range(5)}
    248258        expr = Expression("filter(lambda x: x > 2, items)")
    249         self.assertEqual([3, 4], expr.evaluate(data))
     259        self.assertEqual([3, 4], list(expr.evaluate(data)))
    250260
    251261    def test_lambda_tuple_arg(self):
     262        # This syntax goes away in Python 3
     263        if not IS_PYTHON2:
     264            return
    252265        data = {'items': [(1, 2), (2, 1)]}
    253266        expr = Expression("filter(lambda (x, y): x > y, items)")
    254         self.assertEqual([(2, 1)], expr.evaluate(data))
     267        self.assertEqual([(2, 1)], list(expr.evaluate(data)))
    255268
    256269    def test_list_comprehension(self):
    257270        expr = Expression("[n for n in numbers if n < 2]")
     
    470483
    471484    def test_pickle(self):
    472485        suite = Suite('foo = 42')
    473         buf = StringIO()
     486        buf = BytesIO()
    474487        pickle.dump(suite, buf, 2)
    475488        buf.seek(0)
    476489        unpickled = pickle.load(buf)
     
    645658        assert 'plain' in data
    646659
    647660    def test_import(self):
    648         suite = Suite("from itertools import ifilter")
     661        suite = Suite("from itertools import repeat")
    649662        data = {}
    650663        suite.execute(data)
    651         assert 'ifilter' in data
     664        assert 'repeat' in data
    652665
    653666    def test_import_star(self):
    654667        suite = Suite("from itertools import *")
    655668        data = Context()
    656669        suite.execute(data)
    657         assert 'ifilter' in data
     670        assert 'repeat' in data
    658671
    659672    def test_import_in_def(self):
    660673        suite = Suite("""def fun():
    661     from itertools import ifilter
    662     return ifilter(None, range(3))
     674    from itertools import repeat
     675    return repeat(1, 3)
    663676""")
    664677        data = Context()
    665678        suite.execute(data)
    666         assert 'ifilter' not in data
    667         self.assertEqual([1, 2], list(data['fun']()))
     679        assert 'repeat' not in data
     680        self.assertEqual([1, 1, 1], list(data['fun']()))
    668681
    669682    def test_for(self):
    670683        suite = Suite("""x = []
     
    766779        self.assertEqual("foo", d["k"])
    767780
    768781    def test_exec(self):
    769         suite = Suite("x = 1; exec d['k']; assert x == 42, x")
     782        suite = Suite("x = 1; exec(d['k']); assert x == 42, x")
    770783        suite.execute({"d": {"k": "x = 42"}})
    771784
    772785    def test_return(self):
     
    828841
    829842        def test_yield_expression(self):
    830843            d = {}
    831             suite = Suite("""results = []
     844            suite = Suite("""from genshi.compat import next
     845results = []
    832846def counter(maximum):
    833847    i = 0
    834848    while i < maximum:
     
    838852        else:
    839853            i += 1
    840854it = counter(5)
    841 results.append(it.next())
     855results.append(next(it))
    842856results.append(it.send(3))
    843 results.append(it.next())
     857results.append(next(it))
    844858""")
    845859            suite.execute(d)
    846860            self.assertEqual([0, 3, 4], d['results'])
  • genshi/template/tests/loader.py

    diff -r 4bbd2b021cb5 genshi/template/tests/loader.py
    a b  
    347347        assert 'tmpl2.html' not in loader._cache
    348348
    349349    def test_load_with_default_encoding(self):
    350         f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
     350        f = open(os.path.join(self.dirname, 'tmpl.html'), 'wb')
    351351        try:
    352352            f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
    353353        finally:
     
    356356        loader.load('tmpl.html')
    357357
    358358    def test_load_with_explicit_encoding(self):
    359         f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
     359        f = open(os.path.join(self.dirname, 'tmpl.html'), 'wb')
    360360        try:
    361361            f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
    362362        finally:
  • genshi/template/tests/markup.py

    diff -r 4bbd2b021cb5 genshi/template/tests/markup.py
    a b  
    1515import os
    1616import pickle
    1717import shutil
    18 from StringIO import StringIO
    1918import sys
    2019import tempfile
    2120import unittest
    2221
     22from genshi.compat import BytesIO, StringIO
    2323from genshi.core import Markup
    2424from genshi.input import XML
    2525from genshi.template.base import BadDirectiveError, TemplateSyntaxError
     
    4343    def test_pickle(self):
    4444        stream = XML('<root>$var</root>')
    4545        tmpl = MarkupTemplate(stream)
    46         buf = StringIO()
     46        buf = BytesIO()
    4747        pickle.dump(tmpl, buf, 2)
    4848        buf.seek(0)
    4949        unpickled = pickle.load(buf)
  • genshi/template/tests/plugin.py

    diff -r 4bbd2b021cb5 genshi/template/tests/plugin.py
    a b  
    3030
    3131    def test_init_no_options(self):
    3232        plugin = MarkupTemplateEnginePlugin()
    33         self.assertEqual('utf-8', plugin.default_encoding)
     33        self.assertEqual(None, plugin.default_encoding)
    3434        self.assertEqual('html', plugin.default_format)
    3535        self.assertEqual(None, plugin.default_doctype)
    3636
     
    165165    def test_helper_functions(self):
    166166        plugin = MarkupTemplateEnginePlugin()
    167167        tmpl = plugin.load_template(PACKAGE + '.templates.functions')
    168         output = plugin.render({'snippet': '<b>Foo</b>'}, template=tmpl)
     168        output = plugin.render({'snippet': u'<b>Foo</b>'}, template=tmpl)
    169169        self.assertEqual("""<div>
    170170False
    171171bar
     
    178178
    179179    def test_init_no_options(self):
    180180        plugin = TextTemplateEnginePlugin()
    181         self.assertEqual('utf-8', plugin.default_encoding)
     181        self.assertEqual(None, plugin.default_encoding)
    182182        self.assertEqual('text', plugin.default_format)
    183183
    184184        self.assertEqual([], plugin.loader.search_path)
  • genshi/template/text.py

    diff -r 4bbd2b021cb5 genshi/template/text.py
    a b  
    162162        depth = 0
    163163
    164164        source = source.read()
    165         if isinstance(source, str):
     165        if not isinstance(source, unicode):
    166166            source = source.decode(encoding or 'utf-8', 'replace')
    167167        offset = 0
    168168        lineno = 1
     
    279279        depth = 0
    280280
    281281        source = source.read()
    282         if isinstance(source, str):
     282        if not isinstance(source, unicode):
    283283            source = source.decode(encoding or 'utf-8', 'replace')
    284284        offset = 0
    285285        lineno = 1
  • genshi/tests/core.py

    diff -r 4bbd2b021cb5 genshi/tests/core.py
    a b  
    1313
    1414import doctest
    1515import pickle
    16 from StringIO import StringIO
    17 try:
    18     from cStringIO import StringIO as cStringIO
    19 except ImportError:
    20     cStringIO = StringIO
    2116import unittest
    2217
    2318from genshi import core
    2419from genshi.core import Markup, Attrs, Namespace, QName, escape, unescape
    2520from genshi.input import XML, ParseError
     21from genshi.compat import StringIO, BytesIO
    2622
    2723
    2824class StreamTestCase(unittest.TestCase):
    2925
    3026    def test_render_utf8(self):
    3127        xml = XML('<li>Über uns</li>')
    32         self.assertEqual('<li>Über uns</li>', xml.render())
     28        self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), xml.render(encoding='utf-8'))
    3329
    3430    def test_render_unicode(self):
    3531        xml = XML('<li>Über uns</li>')
     32        self.assertEqual(u'<li>Über uns</li>', xml.render())
    3633        self.assertEqual(u'<li>Über uns</li>', xml.render(encoding=None))
    3734
    3835    def test_render_ascii(self):
    3936        xml = XML('<li>Über uns</li>')
    40         self.assertEqual('<li>&#220;ber uns</li>', xml.render(encoding='ascii'))
     37        self.assertEqual(u'<li>&#220;ber uns</li>'.encode('ascii'), xml.render(encoding='ascii'))
    4138
    4239    def test_render_output_stream_utf8(self):
    4340        xml = XML('<li>Über uns</li>')
    44         strio = cStringIO()
    45         self.assertEqual(None, xml.render(out=strio))
    46         self.assertEqual('<li>Über uns</li>', strio.getvalue())
     41        strio = BytesIO()
     42        self.assertEqual(None, xml.render(encoding='utf-8', out=strio))
     43        self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), strio.getvalue())
    4744
    4845    def test_render_output_stream_unicode(self):
    4946        xml = XML('<li>Über uns</li>')
     
    5350
    5451    def test_pickle(self):
    5552        xml = XML('<li>Foo</li>')
    56         buf = StringIO()
     53        buf = BytesIO()
    5754        pickle.dump(xml, buf, 2)
    5855        buf.seek(0)
    5956        xml = pickle.load(buf)
     
    6360class MarkupTestCase(unittest.TestCase):
    6461
    6562    def test_new_with_encoding(self):
    66         markup = Markup('Döner', encoding='utf-8')
    67         self.assertEquals("<Markup u'D\\xf6ner'>", repr(markup))
     63        markup = Markup(u'Döner'.encode('utf-8'), encoding='utf-8')
     64        # mimic Markup.__repr__ when constructing output for Python 2/3 compatibility
     65        self.assertEquals("<Markup %r>" % u'D\u00f6ner', repr(markup))
    6866
    6967    def test_repr(self):
    7068        markup = Markup('foo')
     
    158156
    159157    def test_pickle(self):
    160158        markup = Markup('foo')
    161         buf = StringIO()
     159        buf = BytesIO()
    162160        pickle.dump(markup, buf, 2)
    163161        buf.seek(0)
    164162        self.assertEquals("<Markup u'foo'>", repr(pickle.load(buf)))
     
    168166
    169167    def test_pickle(self):
    170168        attrs = Attrs([("attr1", "foo"), ("attr2", "bar")])
    171         buf = StringIO()
     169        buf = BytesIO()
    172170        pickle.dump(attrs, buf, 2)
    173171        buf.seek(0)
    174172        unpickled = pickle.load(buf)
     
    196194
    197195    def test_pickle(self):
    198196        ns = Namespace('http://www.example.org/namespace')
    199         buf = StringIO()
     197        buf = BytesIO()
    200198        pickle.dump(ns, buf, 2)
    201199        buf.seek(0)
    202200        unpickled = pickle.load(buf)
     
    209207
    210208    def test_pickle(self):
    211209        qname = QName('http://www.example.org/namespace}elem')
    212         buf = StringIO()
     210        buf = BytesIO()
    213211        pickle.dump(qname, buf, 2)
    214212        buf.seek(0)
    215213        unpickled = pickle.load(buf)
  • genshi/tests/input.py

    diff -r 4bbd2b021cb5 genshi/tests/input.py
    a b  
    1212# history and logs, available at http://genshi.edgewall.org/log/.
    1313
    1414import doctest
    15 from StringIO import StringIO
    1615import sys
    1716import unittest
    1817
    1918from genshi.core import Attrs, Stream
    2019from genshi.input import XMLParser, HTMLParser, ParseError
     20from genshi.compat import StringIO, BytesIO
    2121
    2222
    2323class XMLParserTestCase(unittest.TestCase):
     
    5959
    6060    def test_latin1_encoded(self):
    6161        text = u'<div>\xf6</div>'.encode('iso-8859-1')
    62         events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
     62        events = list(XMLParser(BytesIO(text), encoding='iso-8859-1'))
    6363        kind, data, pos = events[1]
    6464        self.assertEqual(Stream.TEXT, kind)
    6565        self.assertEqual(u'\xf6', data)
     
    6868        text = u"""<?xml version="1.0" encoding="iso-8859-1" ?>
    6969        <div>\xf6</div>
    7070        """.encode('iso-8859-1')
    71         events = list(XMLParser(StringIO(text)))
     71        events = list(XMLParser(BytesIO(text)))
    7272        kind, data, pos = events[2]
    7373        self.assertEqual(Stream.TEXT, kind)
    7474        self.assertEqual(u'\xf6', data)
     
    116116class HTMLParserTestCase(unittest.TestCase):
    117117
    118118    def test_text_node_pos_single_line(self):
    119         text = '<elem>foo bar</elem>'
     119        text = u'<elem>foo bar</elem>'
    120120        events = list(HTMLParser(StringIO(text)))
    121121        kind, data, pos = events[1]
    122122        self.assertEqual(Stream.TEXT, kind)
     
    124124        self.assertEqual((None, 1, 6), pos)
    125125
    126126    def test_text_node_pos_multi_line(self):
    127         text = '''<elem>foo
     127        text = u'''<elem>foo
    128128bar</elem>'''
    129129        events = list(HTMLParser(StringIO(text)))
    130130        kind, data, pos = events[1]
     
    134134
    135135    def test_input_encoding_text(self):
    136136        text = u'<div>\xf6</div>'.encode('iso-8859-1')
    137         events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
     137        events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
    138138        kind, data, pos = events[1]
    139139        self.assertEqual(Stream.TEXT, kind)
    140140        self.assertEqual(u'\xf6', data)
    141141
    142142    def test_input_encoding_attribute(self):
    143143        text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
    144         events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
     144        events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
    145145        kind, (tag, attrib), pos = events[0]
    146146        self.assertEqual(Stream.START, kind)
    147147        self.assertEqual(u'\xf6', attrib.get('title'))
     
    154154        self.assertEqual(u'\u2013', data)
    155155
    156156    def test_html_entity_in_attribute(self):
    157         text = '<p title="&nbsp;"></p>'
     157        text = u'<p title="&nbsp;"></p>'
    158158        events = list(HTMLParser(StringIO(text)))
    159159        kind, data, pos = events[0]
    160160        self.assertEqual(Stream.START, kind)
     
    163163        self.assertEqual(Stream.END, kind)
    164164
    165165    def test_html_entity_in_text(self):
    166         text = '<p>&nbsp;</p>'
     166        text = u'<p>&nbsp;</p>'
    167167        events = list(HTMLParser(StringIO(text)))
    168168        kind, data, pos = events[1]
    169169        self.assertEqual(Stream.TEXT, kind)
    170170        self.assertEqual(u'\xa0', data)
    171171
    172172    def test_processing_instruction(self):
    173         text = '<?php echo "Foobar" ?>'
     173        text = u'<?php echo "Foobar" ?>'
    174174        events = list(HTMLParser(StringIO(text)))
    175175        kind, (target, data), pos = events[0]
    176176        self.assertEqual(Stream.PI, kind)
     
    205205        self.assertEqual(1, standalone)
    206206
    207207    def test_processing_instruction_trailing_qmark(self):
    208         text = '<?php echo "Foobar" ??>'
     208        text = u'<?php echo "Foobar" ??>'
    209209        events = list(HTMLParser(StringIO(text)))
    210210        kind, (target, data), pos = events[0]
    211211        self.assertEqual(Stream.PI, kind)
     
    213213        self.assertEqual('echo "Foobar" ?', data)
    214214
    215215    def test_out_of_order_tags1(self):
    216         text = '<span><b>Foobar</span></b>'
     216        text = u'<span><b>Foobar</span></b>'
    217217        events = list(HTMLParser(StringIO(text)))
    218218        self.assertEqual(5, len(events))
    219219        self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     
    223223        self.assertEqual((Stream.END, 'span'), events[4][:2])
    224224
    225225    def test_out_of_order_tags2(self):
    226         text = '<span class="baz"><b><i>Foobar</span></b></i>'
    227         events = list(HTMLParser(StringIO(text)))
     226        text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8')
     227        events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
    228228        self.assertEqual(7, len(events))
    229229        self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
    230230                         events[0][:2])
     
    236236        self.assertEqual((Stream.END, 'span'), events[6][:2])
    237237
    238238    def test_out_of_order_tags3(self):
    239         text = '<span><b>Foobar</i>'
    240         events = list(HTMLParser(StringIO(text)))
     239        text = u'<span><b>Foobar</i>'.encode('utf-8')
     240        events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
    241241        self.assertEqual(5, len(events))
    242242        self.assertEqual((Stream.START, ('span', ())), events[0][:2])
    243243        self.assertEqual((Stream.START, ('b', ())), events[1][:2])
     
    246246        self.assertEqual((Stream.END, 'span'), events[4][:2])
    247247
    248248    def test_hex_charref(self):
    249         text = '<span>&#x27;</span>'
     249        text = u'<span>&#x27;</span>'
    250250        events = list(HTMLParser(StringIO(text)))
    251251        self.assertEqual(3, len(events))
    252252        self.assertEqual((Stream.START, ('span', ())), events[0][:2])
  • genshi/tests/output.py

    diff -r 4bbd2b021cb5 genshi/tests/output.py
    a b  
    356356        </div>""", output)
    357357
    358358    def test_html5_doctype(self):
    359         stream = HTML('<html></html>')
     359        stream = HTML(u'<html></html>')
    360360        output = stream.render(XHTMLSerializer, doctype=DocType.HTML5,
    361361                               encoding=None)
    362362        self.assertEqual('<!DOCTYPE html>\n<html></html>', output)
     
    427427        </style>""", output)
    428428
    429429    def test_html5_doctype(self):
    430         stream = HTML('<html></html>')
     430        stream = HTML(u'<html></html>')
    431431        output = stream.render(HTMLSerializer, doctype=DocType.HTML5,
    432432                               encoding=None)
    433433        self.assertEqual('<!DOCTYPE html>\n<html></html>', output)
  • genshi/util.py

    diff -r 4bbd2b021cb5 genshi/util.py
    a b  
    1515
    1616import htmlentitydefs as entities
    1717import re
     18import sys
     19
     20from compat import any, all, stringrepr
    1821
    1922__docformat__ = 'restructuredtext en'
    2023
     
    246249    """
    247250    return _STRIPTAGS_RE.sub('', text)
    248251
    249 
    250 def stringrepr(string):
    251     ascii = string.encode('ascii', 'backslashreplace')
    252     quoted = "'" +  ascii.replace("'", "\\'") + "'"
    253     if len(ascii) > len(string):
    254         return 'u' + quoted
    255     return quoted
    256 
    257 
    258 # Compatibility fallback implementations for older Python versions
    259 
    260 try:
    261     all = all
    262     any = any
    263 except NameError:
    264     def any(S):
    265         for x in S:
    266             if x:
    267                return True
    268         return False
    269 
    270     def all(S):
    271         for x in S:
    272             if not x:
    273                return False
    274         return True
  • setup.py

    diff -r 4bbd2b021cb5 setup.py
    a b  
    4141    def run(self):
    4242        try:
    4343            build_ext.run(self)
    44         except DistutilsPlatformError, e:
     44        except DistutilsPlatformError:
     45            _etype, e, _tb = sys.exc_info()
    4546            self._unavailable(e)
    4647
    4748    def build_extension(self, ext):
     
    4950            build_ext.build_extension(self, ext)
    5051            global _speedup_available
    5152            _speedup_available = True
    52         except CCompilerError, e:
     53        except CCompilerError:
     54            _etype, e, _tb = sys.exc_info()
    5355            self._unavailable(e)
    5456
    5557    def _unavailable(self, exc):
     
    8688    cmdclass['bdist_egg'] = my_bdist_egg
    8789
    8890
     91# Use 2to3 if we're running under Python 3 (with Distribute)
     92extra = {}
     93if sys.version_info >= (3,):
     94    extra['use_2to3'] = True
     95    extra['convert_2to3_doctests'] = []
     96    extra['use_2to3_fixers'] = ['fixes']
     97    # include tests for python3 setup.py test
     98    packages = [
     99        'genshi', 'genshi.filters', 'genshi.template',
     100        'genshi.tests', 'genshi.filters.tests',
     101        'genshi.template.tests',
     102        'genshi.template.tests.templates',
     103    ]
     104    # Install genshi template tests
     105    extra['include_package_data'] = True
     106else:
     107    packages = ['genshi', 'genshi.filters', 'genshi.template']
     108
     109
    89110setup(
    90111    name = 'Genshi',
    91112    version = '0.7',
     
    114135        'Topic :: Text Processing :: Markup :: XML'
    115136    ],
    116137    keywords = ['python.templating.engines'],
    117     packages = ['genshi', 'genshi.filters', 'genshi.template'],
     138    packages = packages,
    118139    test_suite = 'genshi.tests.suite',
    119140
    120141    extras_require = {
     
    132153    """,
    133154
    134155    features = {'speedups': speedups},
    135     cmdclass = cmdclass
     156    cmdclass = cmdclass,
     157
     158    **extra
    136159)