Meeting20100904: genshi-py3k.diff
File genshi-py3k.diff, 129.2 KB (added by , 14 years ago) |
---|
-
new file .hgignore
diff -r 4bbd2b021cb5 .hgignore
- + 1 .*\.pyc 2 .*.egg-info/ 3 .*~ 4 build/ -
MANIFEST.in
diff -r 4bbd2b021cb5 MANIFEST.in
a b 2 2 recursive-exclude doc/logo.lineform * 3 3 include doc/api/*.* 4 4 include doc/*.html 5 recursive-include genshi/template/tests/templates *.html *.txt -
README.txt
diff -r 4bbd2b021cb5 README.txt
a b 10 10 directory, and visit the Genshi web site: 11 11 12 12 <http://genshi.edgewall.org/> 13 14 About this repository 15 ===================== 16 17 This is a work area for porting Genshi to Python 3. -
doc/common/doctools.py
diff -r 4bbd2b021cb5 doc/common/doctools.py
a b 62 62 code_block.content = 1 63 63 rst.directives.register_directive('code-block', code_block) 64 64 except ImportError: 65 print 'Pygments not installed, syntax highlighting disabled'65 print('Pygments not installed, syntax highlighting disabled') 66 66 67 67 loader = TemplateLoader(['doc', 'doc/common'], variable_lookup='strict') 68 68 for source in glob('doc/*.txt'): 69 69 dest = os.path.splitext(source)[0] + '.html' 70 70 if self.force or not os.path.exists(dest) or \ 71 71 os.path.getmtime(dest) < os.path.getmtime(source): 72 print 'building documentation file %s' % dest72 print('building documentation file %s' % dest) 73 73 publish_cmdline(writer_name='html', 74 74 argv=['--config=%s' % docutils_conf, source, 75 75 dest]) … … 104 104 sys.argv[1:] = old_argv 105 105 106 106 except ImportError: 107 print 'epydoc not installed, skipping API documentation.'107 print('epydoc not installed, skipping API documentation.') 108 108 109 109 110 110 class test_doc(Command): … … 119 119 120 120 def run(self): 121 121 for filename in glob('doc/*.txt'): 122 print 'testing documentation file %s' % filename122 print('testing documentation file %s' % filename) 123 123 doctest.testfile(filename, False, optionflags=doctest.ELLIPSIS) -
new file examples_to_py3k.sh
diff -r 4bbd2b021cb5 examples_to_py3k.sh
- + 1 #!/bin/sh 2 # 3 # Script to run 2to3 on files not covered by setup.py 4 # 5 export PYTHONIOENCODING=utf8 6 7 # General 2to3 run 8 2to3 -w --no-diffs examples/ -
new file fixes/fix_unicode_in_strings.py
diff -r 4bbd2b021cb5 fixes/fix_unicode_in_strings.py
- + 1 """Fixer that changes expressions inside strings literals from u"..." to "...". 2 3 """ 4 5 import re 6 from lib2to3 import fixer_base 7 8 _literal_re = re.compile(r"(.+?)\b[uU]([rR]?[\'\"])") 9 10 class FixUnicodeInStrings(fixer_base.BaseFix): 11 12 PATTERN = "STRING" 13 14 def transform(self, node, results): 15 new = node.clone() 16 new.value = _literal_re.sub(r"\1\2", new.value) 17 return new -
genshi/_speedups.c
diff -r 4bbd2b021cb5 genshi/_speedups.c
a b 14 14 #include <Python.h> 15 15 #include <structmember.h> 16 16 17 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) 18 typedef int Py_ssize_t; 19 #define PY_SSIZE_T_MAX INT_MAX 20 #define PY_SSIZE_T_MIN INT_MIN 17 #if PY_MAJOR_VERSION > 2 18 # define IS_PY3K 19 #elif PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) 20 typedef int Py_ssize_t; 21 # define PY_SSIZE_T_MAX INT_MAX 22 # define PY_SSIZE_T_MIN INT_MIN 23 #endif 24 25 /* We only use Unicode Strings in this module */ 26 #ifndef IS_PY3K 27 # define PyObject_Str PyObject_Unicode 21 28 #endif 22 29 23 30 static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2; … … 73 80 Py_DECREF(args); 74 81 return ret; 75 82 } 76 in = (PyUnicodeObject *) PyObject_ Unicode(text);83 in = (PyUnicodeObject *) PyObject_Str(text); 77 84 if (in == NULL) { 78 85 return NULL; 79 86 } … … 390 397 PyObject *unicode, *result, *args; 391 398 392 399 if (PyObject_TypeCheck(self, &MarkupType)) { 393 unicode = PyObject_ Unicode(self);400 unicode = PyObject_Str(self); 394 401 if (unicode == NULL) return NULL; 395 402 result = PyNumber_Multiply(unicode, num); 396 403 } else { // __rmul__ 397 unicode = PyObject_ Unicode(num);404 unicode = PyObject_Str(num); 398 405 if (unicode == NULL) return NULL; 399 406 result = PyNumber_Multiply(unicode, self); 400 407 } … … 418 425 { 419 426 PyObject *format, *result, *args; 420 427 428 #ifdef IS_PY3K 429 format = PyUnicode_FromString("<Markup %r>"); 430 #else 421 431 format = PyString_FromString("<Markup %r>"); 432 #endif 422 433 if (format == NULL) return NULL; 423 result = PyObject_ Unicode(self);434 result = PyObject_Str(self); 424 435 if (result == NULL) { 425 436 Py_DECREF(format); 426 437 return NULL; … … 432 443 return NULL; 433 444 } 434 445 PyTuple_SET_ITEM(args, 0, result); 446 #ifdef IS_PY3K 447 result = PyUnicode_Format(format, args); 448 #else 435 449 result = PyString_Format(format, args); 450 #endif 436 451 Py_DECREF(format); 437 452 Py_DECREF(args); 438 453 return result; … … 553 568 Markup_add, /*nb_add*/ 554 569 0, /*nb_subtract*/ 555 570 Markup_mul, /*nb_multiply*/ 571 #ifndef IS_PY3K 556 572 0, /*nb_divide*/ 573 #endif 557 574 Markup_mod, /*nb_remainder*/ 558 575 }; 559 576 560 577 PyTypeObject MarkupType = { 578 #ifdef IS_PY3K 579 PyVarObject_HEAD_INIT(NULL, 0) 580 #else 561 581 PyObject_HEAD_INIT(NULL) 562 582 0, 583 #endif 563 584 "genshi._speedups.Markup", 564 585 sizeof(MarkupObject), 565 586 0, … … 567 588 0, /*tp_print*/ 568 589 0, /*tp_getattr*/ 569 590 0, /*tp_setattr*/ 591 #ifdef IS_PY3K 592 0, /*tp_reserved*/ 593 #else 570 594 0, /*tp_compare*/ 595 #endif 571 596 Markup_repr, /*tp_repr*/ 572 597 &Markup_as_number, /*tp_as_number*/ 573 598 0, /*tp_as_sequence*/ … … 580 605 0, /*tp_setattro*/ 581 606 0, /*tp_as_buffer*/ 582 607 608 #ifdef IS_PY3K 609 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_UNICODE_SUBCLASS, /*tp_flags*/ 610 #elif defined(Py_TPFLAGS_UNICODE_SUBCLASS) 611 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES | Py_TPFLAGS_UNICODE_SUBCLASS, /*tp_flags*/ 612 #else 583 613 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/ 614 #endif 615 584 616 Markup__doc__,/*tp_doc*/ 585 617 586 618 0, /*tp_traverse*/ … … 616 648 0 /*tp_weaklist*/ 617 649 }; 618 650 651 #ifdef IS_PY3K 652 struct PyModuleDef module_def = { 653 PyModuleDef_HEAD_INIT, /*m_base*/ 654 "_speedups", /*m_name*/ 655 NULL, /*m_doc*/ 656 -1, /*m_size*/ 657 NULL, /*m_methods*/ 658 NULL, /*m_reload*/ 659 NULL, /*m_traverse*/ 660 NULL, /*m_clear*/ 661 NULL /*m_free*/ 662 }; 663 664 PyObject * 665 PyInit__speedups(void) 666 #else 619 667 PyMODINIT_FUNC 620 668 init_speedups(void) 669 #endif 621 670 { 622 671 PyObject *module; 623 672 … … 626 675 MarkupType.tp_base = &PyUnicode_Type; 627 676 628 677 if (PyType_Ready(&MarkupType) < 0) 678 #ifdef IS_PY3K 679 return NULL; 680 #else 629 681 return; 682 #endif 630 683 631 684 init_constants(); 632 685 686 #ifdef IS_PY3K 687 module = PyModule_Create(&module_def); 688 #else 633 689 module = Py_InitModule("_speedups", NULL); 690 #endif 634 691 Py_INCREF(&MarkupType); 635 692 PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType); 693 694 #ifdef IS_PY3K 695 return module; 696 #endif 636 697 } -
new file genshi/compat.py
diff -r 4bbd2b021cb5 genshi/compat.py
- + 1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (C) 2006-2009 Edgewall Software 4 # All rights reserved. 5 # 6 # This software is licensed as described in the file COPYING, which 7 # you should have received as part of this distribution. The terms 8 # are also available at http://genshi.edgewall.org/wiki/License. 9 # 10 # This software consists of voluntary contributions made by many 11 # individuals. For the exact contribution history, see the revision 12 # history and logs, available at http://genshi.edgewall.org/log/. 13 14 """Various Python version compatibility classes and functions.""" 15 16 import sys 17 from types import CodeType 18 19 20 IS_PYTHON2 = (sys.version_info[0] == 2) 21 22 23 # This function should only be called in Python 2, and will fail in Python 3 24 25 if IS_PYTHON2: 26 def stringrepr(string): 27 ascii = string.encode('ascii', 'backslashreplace') 28 quoted = "'" + ascii.replace("'", "\\'") + "'" 29 if len(ascii) > len(string): 30 return 'u' + quoted 31 return quoted 32 else: 33 def stringrepr(string): 34 raise RuntimeError( 35 'Python 2 compatibility function. Not usable in Python 3.') 36 37 38 # We need to differentiate between StringIO and BytesIO in places 39 40 if IS_PYTHON2: 41 from StringIO import StringIO 42 try: 43 from cStringIO import StringIO as BytesIO 44 except ImportError: 45 BytesIO = StringIO 46 else: 47 from io import StringIO, BytesIO 48 49 50 # We want to test bytestring input to some stuff. 51 52 if IS_PYTHON2: 53 def wrapped_bytes(bstr): 54 assert bstr.startswith('b') 55 return bstr[1:] 56 else: 57 def wrapped_bytes(bstr): 58 assert bstr.startswith('b') 59 return bstr 60 61 62 # We do some scary stuff with CodeType() in template/eval.py 63 64 if IS_PYTHON2: 65 def get_code_params(code): 66 return (code.co_nlocals, code.co_stacksize, code.co_flags, 67 code.co_code, code.co_consts, code.co_names, code.co_varnames, 68 code.co_filename, code.co_name, code.co_firstlineno, 69 code.co_lnotab, (), ()) 70 71 def build_code_chunk(code, filename, name, lineno): 72 return CodeType(0, code.co_nlocals, code.co_stacksize, 73 code.co_flags | 0x0040, code.co_code, code.co_consts, 74 code.co_names, code.co_varnames, filename, name, 75 lineno, code.co_lnotab, (), ()) 76 else: 77 def get_code_params(code): 78 return (code.co_nlocals, code.co_kwonlyargcount, code.co_stacksize, 79 code.co_flags, code.co_code, code.co_consts, code.co_names, 80 code.co_varnames, code.co_filename, code.co_name, 81 code.co_firstlineno, code.co_lnotab, (), ()) 82 83 def build_code_chunk(code, filename, name, lineno): 84 return CodeType(0, code.co_nlocals, code.co_kwonlyargcount, 85 code.co_stacksize, code.co_flags | 0x0040, 86 code.co_code, code.co_consts, code.co_names, 87 code.co_varnames, filename, name, lineno, 88 code.co_lnotab, (), ()) 89 90 # Compatibility fallback implementations for Python < 2.6 91 92 try: 93 next = next 94 except NameError: 95 def next(iterator): 96 return iterator.next() 97 98 # Compatibility fallback implementations for Python < 2.5 99 100 try: 101 all = all 102 any = any 103 except NameError: 104 def any(S): 105 for x in S: 106 if x: 107 return True 108 return False 109 110 def all(S): 111 for x in S: 112 if not x: 113 return False 114 return True 115 -
genshi/core.py
diff -r 4bbd2b021cb5 genshi/core.py
a b 17 17 reduce # builtin in Python < 3 18 18 except NameError: 19 19 from functools import reduce 20 import sys 20 21 from itertools import chain 21 22 import operator 22 23 … … 92 93 Assume the following stream produced by the `HTML` function: 93 94 94 95 >>> from genshi.input import HTML 95 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''' )96 >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8') 96 97 >>> print(html) 97 98 <p onclick="alert('Whoa')">Hello, world!</p> 98 99 … … 153 154 """ 154 155 return reduce(operator.or_, (self,) + filters) 155 156 156 def render(self, method=None, encoding= 'utf-8', out=None, **kwargs):157 def render(self, method=None, encoding=None, out=None, **kwargs): 157 158 """Return a string representation of the stream. 158 159 159 160 Any additional keyword arguments are passed to the serializer, and thus … … 187 188 XPath expression. 188 189 189 190 >>> from genshi import HTML 190 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>' )191 >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8') 191 192 >>> print(stream.select('elem')) 192 193 <elem>foo</elem><elem>bar</elem> 193 194 >>> print(stream.select('elem/text()')) … … 667 668 def __hash__(self): 668 669 return hash(self.uri) 669 670 670 def __repr__(self): 671 return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) 671 if sys.version_info[0] == 2: 672 # Only use stringrepr in python 2 673 def __repr__(self): 674 return '%s(%s)' % (type(self).__name__, stringrepr(self.uri)) 675 else: 676 def __repr__(self): 677 return '%s(%r)' % (type(self).__name__, self.uri) 672 678 673 679 def __str__(self): 674 680 return self.uri.encode('utf-8') … … 728 734 def __getnewargs__(self): 729 735 return (self.lstrip('{'),) 730 736 731 def __repr__(self): 732 return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) 737 if sys.version_info[0] == 2: 738 # Only use stringrepr in python 2 739 def __repr__(self): 740 return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{'))) 741 else: 742 def __repr__(self): 743 return '%s(%r)' % (type(self).__name__, self.lstrip('{')) -
genshi/filters/html.py
diff -r 4bbd2b021cb5 genshi/filters/html.py
a b 32 32 >>> from genshi.input import HTML 33 33 >>> html = HTML('''<form> 34 34 ... <p><input type="text" name="foo" /></p> 35 ... </form>''' )35 ... </form>''', encoding='utf-8') 36 36 >>> filler = HTMLFormFiller(data={'foo': 'bar'}) 37 37 >>> print(html | filler) 38 38 <form> … … 199 199 from the stream. 200 200 201 201 >>> from genshi import HTML 202 >>> html = HTML('<div><script>alert(document.cookie)</script></div>' )202 >>> html = HTML('<div><script>alert(document.cookie)</script></div>', encoding='utf-8') 203 203 >>> print(html | HTMLSanitizer()) 204 204 <div/> 205 205 … … 207 207 is instantiated. For example, to allow inline ``style`` attributes, the 208 208 following instantation would work: 209 209 210 >>> html = HTML('<div style="background: #000"></div>' )210 >>> html = HTML('<div style="background: #000"></div>', encoding='utf-8') 211 211 >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) 212 212 >>> print(html | sanitizer) 213 213 <div style="background: #000"/> … … 215 215 Note that even in this case, the filter *does* attempt to remove dangerous 216 216 constructs from style attributes: 217 217 218 >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>' )218 >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>', encoding='utf-8') 219 219 >>> print(html | sanitizer) 220 220 <div style="color: #000"/> 221 221 -
genshi/filters/i18n.py
diff -r 4bbd2b021cb5 genshi/filters/i18n.py
a b 33 33 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives 34 34 from genshi.template.directives import Directive, StripDirective 35 35 from genshi.template.markup import MarkupTemplate, EXEC 36 from genshi.compat import IS_PYTHON2 36 37 37 38 __all__ = ['Translator', 'extract'] 38 39 __docformat__ = 'restructuredtext en' … … 288 289 also need to pass a name for those parameters. Consider the following 289 290 examples: 290 291 291 >>> tmpl = MarkupTemplate('''\ 292 <html xmlns:i18n="http://genshi.edgewall.org/i18n"> 292 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> 293 293 ... <div i18n:choose="num; num"> 294 294 ... <p i18n:singular="">There is $num coin</p> 295 295 ... <p i18n:plural="">There are $num coins</p> … … 301 301 [(2, 'ngettext', (u'There is %(num)s coin', 302 302 u'There are %(num)s coins'), [])] 303 303 304 >>> tmpl = MarkupTemplate('''\ 305 <html xmlns:i18n="http://genshi.edgewall.org/i18n"> 304 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> 306 305 ... <div i18n:choose="num; num"> 307 306 ... <p i18n:singular="">There is $num coin</p> 308 307 ... <p i18n:plural="">There are $num coins</p> … … 324 323 325 324 When used as a element and not as an attribute: 326 325 327 >>> tmpl = MarkupTemplate('''\ 328 <html xmlns:i18n="http://genshi.edgewall.org/i18n"> 326 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> 329 327 ... <i18n:choose numeral="num" params="num"> 330 328 ... <p i18n:singular="">There is $num coin</p> 331 329 ... <p i18n:plural="">There are $num coins</p> … … 492 490 another i18n domain(catalog) to translate from. 493 491 494 492 >>> from genshi.filters.tests.i18n import DummyTranslations 495 >>> tmpl = MarkupTemplate('''\ 496 <html xmlns:i18n="http://genshi.edgewall.org/i18n"> 493 >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n"> 497 494 ... <p i18n:msg="">Bar</p> 498 495 ... <div i18n:domain="foo"> 499 496 ... <p i18n:msg="">FooBar</p> … … 663 660 if ctxt: 664 661 ctxt['_i18n.gettext'] = gettext 665 662 else: 666 gettext = self.translate.ugettext 667 ngettext = self.translate.ungettext 663 if IS_PYTHON2: 664 gettext = self.translate.ugettext 665 ngettext = self.translate.ungettext 666 else: 667 gettext = self.translate.gettext 668 ngettext = self.translate.ngettext 668 669 try: 669 dgettext = self.translate.dugettext 670 dngettext = self.translate.dungettext 670 if IS_PYTHON2: 671 dgettext = self.translate.dugettext 672 dngettext = self.translate.dungettext 673 else: 674 dgettext = self.translate.dgettext 675 dngettext = self.translate.dngettext 671 676 except AttributeError: 672 677 dgettext = lambda _, y: gettext(y) 673 678 dngettext = lambda _, s, p, n: ngettext(s, p, n) … … 678 683 ctxt['_i18n.dngettext'] = dngettext 679 684 680 685 if ctxt and ctxt.get('_i18n.domain'): 686 # TODO: This can cause infinite recursion if dgettext is defined 687 # via the AttributeError case above! 681 688 gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg) 682 689 683 690 for kind, data, pos in stream: … … 1168 1175 and node.func.id in gettext_functions: 1169 1176 strings = [] 1170 1177 def _add(arg): 1171 if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring): 1178 if isinstance(arg, _ast.Str) and isinstance(arg.s, unicode): 1179 strings.append(arg.s) 1180 elif isinstance(arg, _ast.Str): 1172 1181 strings.append(unicode(arg.s, 'utf-8')) 1173 1182 elif arg: 1174 1183 strings.append(None) -
genshi/filters/tests/__init__.py
diff -r 4bbd2b021cb5 genshi/filters/tests/__init__.py
a b 15 15 import unittest 16 16 17 17 def suite(): 18 from genshi.filters.tests import html, i18n, transform18 from genshi.filters.tests import test_html, i18n, transform 19 19 suite = unittest.TestSuite() 20 suite.addTest( html.suite())20 suite.addTest(test_html.suite()) 21 21 suite.addTest(i18n.suite()) 22 22 if hasattr(doctest, 'NORMALIZE_WHITESPACE'): 23 23 suite.addTest(transform.suite()) -
deleted file genshi/filters/tests/html.py
diff -r 4bbd2b021cb5 genshi/filters/tests/html.py
+ - 1 # -*- coding: utf-8 -*-2 #3 # Copyright (C) 2006-2009 Edgewall Software4 # All rights reserved.5 #6 # This software is licensed as described in the file COPYING, which7 # you should have received as part of this distribution. The terms8 # are also available at http://genshi.edgewall.org/wiki/License.9 #10 # This software consists of voluntary contributions made by many11 # individuals. For the exact contribution history, see the revision12 # history and logs, available at http://genshi.edgewall.org/log/.13 14 import doctest15 import unittest16 17 from genshi.input import HTML, ParseError18 from genshi.filters.html import HTMLFormFiller, HTMLSanitizer19 from genshi.template import MarkupTemplate20 21 class HTMLFormFillerTestCase(unittest.TestCase):22 23 def test_fill_input_text_no_value(self):24 html = HTML("""<form><p>25 <input type="text" name="foo" />26 </p></form>""") | HTMLFormFiller()27 self.assertEquals("""<form><p>28 <input type="text" name="foo"/>29 </p></form>""", html.render())30 31 def test_fill_input_text_single_value(self):32 html = HTML("""<form><p>33 <input type="text" name="foo" />34 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})35 self.assertEquals("""<form><p>36 <input type="text" name="foo" value="bar"/>37 </p></form>""", html.render())38 39 def test_fill_input_text_multi_value(self):40 html = HTML("""<form><p>41 <input type="text" name="foo" />42 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})43 self.assertEquals("""<form><p>44 <input type="text" name="foo" value="bar"/>45 </p></form>""", html.render())46 47 def test_fill_input_hidden_no_value(self):48 html = HTML("""<form><p>49 <input type="hidden" name="foo" />50 </p></form>""") | HTMLFormFiller()51 self.assertEquals("""<form><p>52 <input type="hidden" name="foo"/>53 </p></form>""", html.render())54 55 def test_fill_input_hidden_single_value(self):56 html = HTML("""<form><p>57 <input type="hidden" name="foo" />58 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})59 self.assertEquals("""<form><p>60 <input type="hidden" name="foo" value="bar"/>61 </p></form>""", html.render())62 63 def test_fill_input_hidden_multi_value(self):64 html = HTML("""<form><p>65 <input type="hidden" name="foo" />66 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})67 self.assertEquals("""<form><p>68 <input type="hidden" name="foo" value="bar"/>69 </p></form>""", html.render())70 71 def test_fill_textarea_no_value(self):72 html = HTML("""<form><p>73 <textarea name="foo"></textarea>74 </p></form>""") | HTMLFormFiller()75 self.assertEquals("""<form><p>76 <textarea name="foo"/>77 </p></form>""", html.render())78 79 def test_fill_textarea_single_value(self):80 html = HTML("""<form><p>81 <textarea name="foo"></textarea>82 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})83 self.assertEquals("""<form><p>84 <textarea name="foo">bar</textarea>85 </p></form>""", html.render())86 87 def test_fill_textarea_multi_value(self):88 html = HTML("""<form><p>89 <textarea name="foo"></textarea>90 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})91 self.assertEquals("""<form><p>92 <textarea name="foo">bar</textarea>93 </p></form>""", html.render())94 95 def test_fill_textarea_multiple(self):96 # Ensure that the subsequent textarea doesn't get the data from the97 # first98 html = HTML("""<form><p>99 <textarea name="foo"></textarea>100 <textarea name="bar"></textarea>101 </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})102 self.assertEquals("""<form><p>103 <textarea name="foo">Some text</textarea>104 <textarea name="bar"/>105 </p></form>""", html.render())106 107 def test_fill_textarea_preserve_original(self):108 html = HTML("""<form><p>109 <textarea name="foo"></textarea>110 <textarea name="bar">Original value</textarea>111 </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})112 self.assertEquals("""<form><p>113 <textarea name="foo">Some text</textarea>114 <textarea name="bar">Original value</textarea>115 </p></form>""", html.render())116 117 def test_fill_input_checkbox_single_value_auto_no_value(self):118 html = HTML("""<form><p>119 <input type="checkbox" name="foo" />120 </p></form>""") | HTMLFormFiller()121 self.assertEquals("""<form><p>122 <input type="checkbox" name="foo"/>123 </p></form>""", html.render())124 125 def test_fill_input_checkbox_single_value_auto(self):126 html = HTML("""<form><p>127 <input type="checkbox" name="foo" />128 </p></form>""")129 self.assertEquals("""<form><p>130 <input type="checkbox" name="foo"/>131 </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())132 self.assertEquals("""<form><p>133 <input type="checkbox" name="foo" checked="checked"/>134 </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())135 136 def test_fill_input_checkbox_single_value_defined(self):137 html = HTML("""<form><p>138 <input type="checkbox" name="foo" value="1" />139 </p></form>""")140 self.assertEquals("""<form><p>141 <input type="checkbox" name="foo" value="1" checked="checked"/>142 </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())143 self.assertEquals("""<form><p>144 <input type="checkbox" name="foo" value="1"/>145 </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())146 147 def test_fill_input_checkbox_multi_value_auto(self):148 html = HTML("""<form><p>149 <input type="checkbox" name="foo" />150 </p></form>""")151 self.assertEquals("""<form><p>152 <input type="checkbox" name="foo"/>153 </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())154 self.assertEquals("""<form><p>155 <input type="checkbox" name="foo" checked="checked"/>156 </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())157 158 def test_fill_input_checkbox_multi_value_defined(self):159 html = HTML("""<form><p>160 <input type="checkbox" name="foo" value="1" />161 </p></form>""")162 self.assertEquals("""<form><p>163 <input type="checkbox" name="foo" value="1" checked="checked"/>164 </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())165 self.assertEquals("""<form><p>166 <input type="checkbox" name="foo" value="1"/>167 </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())168 169 def test_fill_input_radio_no_value(self):170 html = HTML("""<form><p>171 <input type="radio" name="foo" />172 </p></form>""") | HTMLFormFiller()173 self.assertEquals("""<form><p>174 <input type="radio" name="foo"/>175 </p></form>""", html.render())176 177 def test_fill_input_radio_single_value(self):178 html = HTML("""<form><p>179 <input type="radio" name="foo" value="1" />180 </p></form>""")181 self.assertEquals("""<form><p>182 <input type="radio" name="foo" value="1" checked="checked"/>183 </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())184 self.assertEquals("""<form><p>185 <input type="radio" name="foo" value="1"/>186 </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())187 188 def test_fill_input_radio_multi_value(self):189 html = HTML("""<form><p>190 <input type="radio" name="foo" value="1" />191 </p></form>""")192 self.assertEquals("""<form><p>193 <input type="radio" name="foo" value="1" checked="checked"/>194 </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())195 self.assertEquals("""<form><p>196 <input type="radio" name="foo" value="1"/>197 </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())198 199 def test_fill_input_radio_empty_string(self):200 html = HTML("""<form><p>201 <input type="radio" name="foo" value="" />202 </p></form>""")203 self.assertEquals("""<form><p>204 <input type="radio" name="foo" value="" checked="checked"/>205 </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())206 207 def test_fill_input_radio_multi_empty_string(self):208 html = HTML("""<form><p>209 <input type="radio" name="foo" value="" />210 </p></form>""")211 self.assertEquals("""<form><p>212 <input type="radio" name="foo" value="" checked="checked"/>213 </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())214 215 def test_fill_select_no_value_auto(self):216 html = HTML("""<form><p>217 <select name="foo">218 <option>1</option>219 <option>2</option>220 <option>3</option>221 </select>222 </p></form>""") | HTMLFormFiller()223 self.assertEquals("""<form><p>224 <select name="foo">225 <option>1</option>226 <option>2</option>227 <option>3</option>228 </select>229 </p></form>""", html.render())230 231 def test_fill_select_no_value_defined(self):232 html = HTML("""<form><p>233 <select name="foo">234 <option value="1">1</option>235 <option value="2">2</option>236 <option value="3">3</option>237 </select>238 </p></form>""") | HTMLFormFiller()239 self.assertEquals("""<form><p>240 <select name="foo">241 <option value="1">1</option>242 <option value="2">2</option>243 <option value="3">3</option>244 </select>245 </p></form>""", html.render())246 247 def test_fill_select_single_value_auto(self):248 html = HTML("""<form><p>249 <select name="foo">250 <option>1</option>251 <option>2</option>252 <option>3</option>253 </select>254 </p></form>""") | HTMLFormFiller(data={'foo': '1'})255 self.assertEquals("""<form><p>256 <select name="foo">257 <option selected="selected">1</option>258 <option>2</option>259 <option>3</option>260 </select>261 </p></form>""", html.render())262 263 def test_fill_select_single_value_defined(self):264 html = HTML("""<form><p>265 <select name="foo">266 <option value="1">1</option>267 <option value="2">2</option>268 <option value="3">3</option>269 </select>270 </p></form>""") | HTMLFormFiller(data={'foo': '1'})271 self.assertEquals("""<form><p>272 <select name="foo">273 <option value="1" selected="selected">1</option>274 <option value="2">2</option>275 <option value="3">3</option>276 </select>277 </p></form>""", html.render())278 279 def test_fill_select_multi_value_auto(self):280 html = HTML("""<form><p>281 <select name="foo" multiple>282 <option>1</option>283 <option>2</option>284 <option>3</option>285 </select>286 </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})287 self.assertEquals("""<form><p>288 <select name="foo" multiple="multiple">289 <option selected="selected">1</option>290 <option>2</option>291 <option selected="selected">3</option>292 </select>293 </p></form>""", html.render())294 295 def test_fill_select_multi_value_defined(self):296 html = HTML("""<form><p>297 <select name="foo" multiple>298 <option value="1">1</option>299 <option value="2">2</option>300 <option value="3">3</option>301 </select>302 </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})303 self.assertEquals("""<form><p>304 <select name="foo" multiple="multiple">305 <option value="1" selected="selected">1</option>306 <option value="2">2</option>307 <option value="3" selected="selected">3</option>308 </select>309 </p></form>""", html.render())310 311 def test_fill_option_segmented_text(self):312 html = MarkupTemplate("""<form>313 <select name="foo">314 <option value="1">foo $x</option>315 </select>316 </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})317 self.assertEquals("""<form>318 <select name="foo">319 <option value="1" selected="selected">foo 1</option>320 </select>321 </form>""", html.render())322 323 def test_fill_option_segmented_text_no_value(self):324 html = MarkupTemplate("""<form>325 <select name="foo">326 <option>foo $x bar</option>327 </select>328 </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})329 self.assertEquals("""<form>330 <select name="foo">331 <option selected="selected">foo 1 bar</option>332 </select>333 </form>""", html.render())334 335 def test_fill_option_unicode_value(self):336 html = HTML("""<form>337 <select name="foo">338 <option value="ö">foo</option>339 </select>340 </form>""") | HTMLFormFiller(data={'foo': u'ö'})341 self.assertEquals(u"""<form>342 <select name="foo">343 <option value="ö" selected="selected">foo</option>344 </select>345 </form>""", html.render(encoding=None))346 347 def test_fill_input_password_disabled(self):348 html = HTML("""<form><p>349 <input type="password" name="pass" />350 </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})351 self.assertEquals("""<form><p>352 <input type="password" name="pass"/>353 </p></form>""", html.render())354 355 def test_fill_input_password_enabled(self):356 html = HTML("""<form><p>357 <input type="password" name="pass" />358 </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)359 self.assertEquals("""<form><p>360 <input type="password" name="pass" value="1234"/>361 </p></form>""", html.render())362 363 364 class HTMLSanitizerTestCase(unittest.TestCase):365 366 def test_sanitize_unchanged(self):367 html = HTML('<a href="#">fo<br />o</a>')368 self.assertEquals('<a href="#">fo<br/>o</a>',369 (html | HTMLSanitizer()).render())370 html = HTML('<a href="#with:colon">foo</a>')371 self.assertEquals('<a href="#with:colon">foo</a>',372 (html | HTMLSanitizer()).render())373 374 def test_sanitize_escape_text(self):375 html = HTML('<a href="#">fo&</a>')376 self.assertEquals('<a href="#">fo&</a>',377 (html | HTMLSanitizer()).render())378 html = HTML('<a href="#"><foo></a>')379 self.assertEquals('<a href="#"><foo></a>',380 (html | HTMLSanitizer()).render())381 382 def test_sanitize_entityref_text(self):383 html = HTML('<a href="#">foö</a>')384 self.assertEquals(u'<a href="#">foö</a>',385 (html | HTMLSanitizer()).render(encoding=None))386 387 def test_sanitize_escape_attr(self):388 html = HTML('<div title="<foo>"></div>')389 self.assertEquals('<div title="<foo>"/>',390 (html | HTMLSanitizer()).render())391 392 def test_sanitize_close_empty_tag(self):393 html = HTML('<a href="#">fo<br>o</a>')394 self.assertEquals('<a href="#">fo<br/>o</a>',395 (html | HTMLSanitizer()).render())396 397 def test_sanitize_invalid_entity(self):398 html = HTML('&junk;')399 self.assertEquals('&junk;', (html | HTMLSanitizer()).render())400 401 def test_sanitize_remove_script_elem(self):402 html = HTML('<script>alert("Foo")</script>')403 self.assertEquals('', (html | HTMLSanitizer()).render())404 html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')405 self.assertEquals('', (html | HTMLSanitizer()).render())406 self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')407 self.assertRaises(ParseError, HTML,408 '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')409 410 def test_sanitize_remove_onclick_attr(self):411 html = HTML('<div onclick=\'alert("foo")\' />')412 self.assertEquals('<div/>', (html | HTMLSanitizer()).render())413 414 def test_sanitize_remove_input_password(self):415 html = HTML('<form><input type="password" /></form>')416 self.assertEquals('<form/>', (html | HTMLSanitizer()).render())417 418 def test_sanitize_remove_comments(self):419 html = HTML('''<div><!-- conditional comment crap --></div>''')420 self.assertEquals('<div/>', (html | HTMLSanitizer()).render())421 422 def test_sanitize_remove_style_scripts(self):423 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))424 # Inline style with url() using javascript: scheme425 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')426 self.assertEquals('<div/>', (html | sanitizer).render())427 # Inline style with url() using javascript: scheme, using control char428 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')429 self.assertEquals('<div/>', (html | sanitizer).render())430 # Inline style with url() using javascript: scheme, in quotes431 html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')432 self.assertEquals('<div/>', (html | sanitizer).render())433 # IE expressions in CSS not allowed434 html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')435 self.assertEquals('<div/>', (html | sanitizer).render())436 html = HTML('<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')437 self.assertEquals('<div/>', (html | sanitizer).render())438 html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'439 'color: #fff\'>')440 self.assertEquals('<div style="color: #fff"/>',441 (html | sanitizer).render())442 # Inline style with url() using javascript: scheme, using unicode443 # escapes444 html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')445 self.assertEquals('<div/>', (html | sanitizer).render())446 html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')447 self.assertEquals('<div/>', (html | sanitizer).render())448 html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')449 self.assertEquals('<div/>', (html | sanitizer).render())450 html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')451 self.assertEquals('<div/>', (html | sanitizer).render())452 html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')453 self.assertEquals('<div/>', (html | sanitizer).render())454 455 def test_sanitize_remove_style_phishing(self):456 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))457 # The position property is not allowed458 html = HTML('<div style="position:absolute;top:0"></div>')459 self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())460 # Normal margins get passed through461 html = HTML('<div style="margin:10px 20px"></div>')462 self.assertEquals('<div style="margin:10px 20px"/>',463 (html | sanitizer).render())464 # But not negative margins465 html = HTML('<div style="margin:-1000px 0 0"></div>')466 self.assertEquals('<div/>', (html | sanitizer).render())467 html = HTML('<div style="margin-left:-2000px 0 0"></div>')468 self.assertEquals('<div/>', (html | sanitizer).render())469 html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>')470 self.assertEquals('<div/>', (html | sanitizer).render())471 472 def test_sanitize_remove_src_javascript(self):473 html = HTML('<img src=\'javascript:alert("foo")\'>')474 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())475 # Case-insensitive protocol matching476 html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')477 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())478 # Grave accents (not parsed)479 self.assertRaises(ParseError, HTML,480 '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')481 # Protocol encoded using UTF-8 numeric entities482 html = HTML('<IMG SRC=\'javascri'483 'pt:alert("foo")\'>')484 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())485 # Protocol encoded using UTF-8 numeric entities without a semicolon486 # (which is allowed because the max number of digits is used)487 html = HTML('<IMG SRC=\'java'488 'script'489 ':alert("foo")\'>')490 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())491 # Protocol encoded using UTF-8 numeric hex entities without a semicolon492 # (which is allowed because the max number of digits is used)493 html = HTML('<IMG SRC=\'javascri'494 'pt:alert("foo")\'>')495 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())496 # Embedded tab character in protocol497 html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')498 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())499 # Embedded tab character in protocol, but encoded this time500 html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>')501 self.assertEquals('<img/>', (html | HTMLSanitizer()).render())502 503 504 def suite():505 suite = unittest.TestSuite()506 suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))507 suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))508 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))509 return suite510 511 512 if __name__ == '__main__':513 unittest.main(defaultTest='suite') -
genshi/filters/tests/i18n.py
diff -r 4bbd2b021cb5 genshi/filters/tests/i18n.py
a b 14 14 from datetime import datetime 15 15 import doctest 16 16 from gettext import NullTranslations 17 from StringIO import StringIO18 17 import unittest 19 18 20 19 from genshi.core import Attrs 21 20 from genshi.template import MarkupTemplate, Context 22 21 from genshi.filters.i18n import Translator, extract 23 22 from genshi.input import HTML 23 from genshi.compat import IS_PYTHON2, StringIO 24 24 25 25 26 26 class DummyTranslations(NullTranslations): … … 39 39 def _domain_call(self, func, domain, *args, **kwargs): 40 40 return getattr(self._domains.get(domain, self), func)(*args, **kwargs) 41 41 42 def ugettext(self, message): 43 missing = object() 44 tmsg = self._catalog.get(message, missing) 45 if tmsg is missing: 46 if self._fallback: 47 return self._fallback.ugettext(message) 48 return unicode(message) 49 return tmsg 42 if IS_PYTHON2: 43 def ugettext(self, message): 44 missing = object() 45 tmsg = self._catalog.get(message, missing) 46 if tmsg is missing: 47 if self._fallback: 48 return self._fallback.ugettext(message) 49 return unicode(message) 50 return tmsg 51 else: 52 def gettext(self, message): 53 missing = object() 54 tmsg = self._catalog.get(message, missing) 55 if tmsg is missing: 56 if self._fallback: 57 return self._fallback.gettext(message) 58 return unicode(message) 59 return tmsg 50 60 51 def dugettext(self, domain, message): 52 return self._domain_call('ugettext', domain, message) 61 if IS_PYTHON2: 62 def dugettext(self, domain, message): 63 return self._domain_call('ugettext', domain, message) 64 else: 65 def dgettext(self, domain, message): 66 return self._domain_call('gettext', domain, message) 53 67 54 68 def ungettext(self, msgid1, msgid2, n): 55 69 try: … … 62 76 else: 63 77 return msgid2 64 78 65 def dungettext(self, domain, singular, plural, numeral): 66 return self._domain_call('ungettext', domain, singular, plural, numeral) 79 if not IS_PYTHON2: 80 ngettext = ungettext 81 del ungettext 82 83 if IS_PYTHON2: 84 def dungettext(self, domain, singular, plural, numeral): 85 return self._domain_call('ungettext', domain, singular, plural, numeral) 86 else: 87 def dngettext(self, domain, singular, plural, numeral): 88 return self._domain_call('ngettext', domain, singular, plural, numeral) 67 89 68 90 69 91 class TranslatorTestCase(unittest.TestCase): … … 72 94 """ 73 95 Verify that translated attributes end up in a proper `Attrs` instance. 74 96 """ 75 html = HTML( """<html>97 html = HTML(u"""<html> 76 98 <span title="Foo"></span> 77 99 </html>""") 78 100 translator = Translator(lambda s: u"Voh") … … 218 240 gettext = lambda s: u"FÃŒr Details siehe bitte [1:Hilfe]." 219 241 translator = Translator(gettext) 220 242 translator.setup(tmpl) 221 self.assertEqual( """<html>243 self.assertEqual(u"""<html> 222 244 <p>FÃŒr Details siehe bitte <a href="help.html">Hilfe</a>.</p> 223 </html>""" , tmpl.generate().render())245 </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) 224 246 225 247 def test_extract_i18n_msg_nonewline(self): 226 248 tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" … … 241 263 gettext = lambda s: u"FÃŒr Details siehe bitte [1:Hilfe]" 242 264 translator = Translator(gettext) 243 265 translator.setup(tmpl) 244 self.assertEqual( """<html>266 self.assertEqual(u"""<html> 245 267 <p>FÃŒr Details siehe bitte <a href="help.html">Hilfe</a></p> 246 268 </html>""", tmpl.generate().render()) 247 269 … … 264 286 gettext = lambda s: u"FÃŒr Details siehe bitte [1:Hilfe]" 265 287 translator = Translator(gettext) 266 288 translator.setup(tmpl) 267 self.assertEqual( """<html>289 self.assertEqual(u"""<html> 268 290 FÃŒr Details siehe bitte <a href="help.html">Hilfe</a> 269 </html>""" , tmpl.generate().render())291 </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) 270 292 271 293 def test_extract_i18n_msg_with_attributes(self): 272 294 tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" … … 394 416 gettext = lambda s: u"FÃŒr Details siehe bitte [1:[2:Hilfeseite]]." 395 417 translator = Translator(gettext) 396 418 translator.setup(tmpl) 397 self.assertEqual( """<html>419 self.assertEqual(u"""<html> 398 420 <p>FÃŒr Details siehe bitte <a href="help.html"><em>Hilfeseite</em></a>.</p> 399 421 </html>""", tmpl.generate().render()) 400 422 … … 449 471 gettext = lambda s: u"[1:] EintrÀge pro Seite anzeigen." 450 472 translator = Translator(gettext) 451 473 translator.setup(tmpl) 452 self.assertEqual( """<html>474 self.assertEqual(u"""<html> 453 475 <p><input type="text" name="num"/> EintrÀge pro Seite anzeigen.</p> 454 476 </html>""", tmpl.generate().render()) 455 477 … … 476 498 gettext = lambda s: u"FÃŒr [2:Details] siehe bitte [1:Hilfe]." 477 499 translator = Translator(gettext) 478 500 translator.setup(tmpl) 479 self.assertEqual( """<html>501 self.assertEqual(u"""<html> 480 502 <p>FÃŒr <em>Details</em> siehe bitte <a href="help.html">Hilfe</a>.</p> 481 503 </html>""", tmpl.generate().render()) 482 504 … … 500 522 <p i18n:msg=""> 501 523 Show me <input type="text" name="num" /> entries per page, starting at page <input type="text" name="num" />. 502 524 </p> 503 </html>""" )525 </html>""", encoding='utf-8') 504 526 gettext = lambda s: u"[1:] EintrÀge pro Seite, beginnend auf Seite [2:]." 505 527 translator = Translator(gettext) 506 528 translator.setup(tmpl) 507 self.assertEqual( """<html>508 <p><input type="text" name="num"/> Eintr\ xc3\xa4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>509 </html>""" , tmpl.generate().render())529 self.assertEqual(u"""<html> 530 <p><input type="text" name="num"/> Eintr\u00E4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p> 531 </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) 510 532 511 533 def test_extract_i18n_msg_with_param(self): 512 534 tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" … … 545 567 gettext = lambda s: u"%(name)s, sei gegrÃŒÃt!" 546 568 translator = Translator(gettext) 547 569 translator.setup(tmpl) 548 self.assertEqual( """<html>570 self.assertEqual(u"""<html> 549 571 <p>Jim, sei gegrÃŒÃt!</p> 550 572 </html>""", tmpl.generate(user=dict(name='Jim')).render()) 551 573 … … 559 581 gettext = lambda s: u"Sei gegrÃŒÃt, [1:Alter]!" 560 582 translator = Translator(gettext) 561 583 translator.setup(tmpl) 562 self.assertEqual( """<html>584 self.assertEqual(u"""<html> 563 585 <p>Sei gegrÃŒÃt, <a href="#42">Alter</a>!</p> 564 586 </html>""", tmpl.generate(anchor='42').render()) 565 587 … … 617 639 gettext = lambda s: u"[1:] EintrÀge pro Seite anzeigen." 618 640 translator = Translator(gettext) 619 641 translator.setup(tmpl) 620 self.assertEqual( """<html>642 self.assertEqual(u"""<html> 621 643 <p><input type="text" name="num" value="x"/> EintrÀge pro Seite anzeigen.</p> 622 644 </html>""", tmpl.generate().render()) 623 645 … … 676 698 })) 677 699 tmpl.filters.insert(0, translator) 678 700 tmpl.add_directives(Translator.NAMESPACE, translator) 679 self.assertEqual( """<html>701 self.assertEqual(u"""<html> 680 702 <p title="Voh bÀr">Voh</p> 681 703 </html>""", tmpl.generate().render()) 682 704 … … 720 742 }) 721 743 translator = Translator(translations) 722 744 translator.setup(tmpl) 723 self.assertEqual( """<html>745 self.assertEqual(u"""<html> 724 746 Modificado à um dia por Pedro 725 </html>""" , tmpl.generate(date='um dia', author="Pedro").render())747 </html>""".encode('utf-8'), tmpl.generate(date='um dia', author="Pedro").render(encoding='utf-8')) 726 748 727 749 728 750 def test_i18n_msg_ticket_251_extract(self): … … 749 771 }) 750 772 translator = Translator(translations) 751 773 translator.setup(tmpl) 752 self.assertEqual( """<html>774 self.assertEqual(u"""<html> 753 775 <p><tt><b>Trandução[ 0 ]</b>: <em>Uma moeda</em></tt></p> 754 </html>""" , tmpl.generate().render())776 </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8')) 755 777 756 778 def test_extract_i18n_msg_with_other_directives_nested(self): 757 779 tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" … … 811 833 self.assertEqual(1, len(messages)) 812 834 ctx = Context() 813 835 ctx.push({'trac': {'homepage': 'http://trac.edgewall.org/'}}) 814 self.assertEqual( """<html>836 self.assertEqual(u"""<html> 815 837 <p>Antes de o fazer, porém, 816 838 <strong>por favor tente <a href="http://trac.edgewall.org/search?ticket=yes&noquickjump=1&q=q">procurar</a> 817 839 por problemas semelhantes</strong>, uma vez que é muito provável que este problema … … 846 868 '[2:[3:trac.ini]]\n and cannot be edited on this page.', 847 869 messages[0][2] 848 870 ) 849 self.assertEqual( """<html>871 self.assertEqual(u"""<html> 850 872 <p class="hint"><strong>Nota:</strong> Este repositório está definido em 851 873 <code><a href="href.wiki(TracIni)">trac.ini</a></code> 852 874 e não pode ser editado nesta página.</p> 853 </html>""" , tmpl.generate(editable=False).render())875 </html>""".encode('utf-8'), tmpl.generate(editable=False).render(encoding='utf-8')) 854 876 855 877 def test_extract_i18n_msg_with_py_strip(self): 856 878 tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/" … … 1771 1793 loader = TemplateLoader([dirname], callback=callback) 1772 1794 tmpl = loader.load('tmpl10.html') 1773 1795 1796 if IS_PYTHON2: 1797 dgettext = translations.dugettext 1798 else: 1799 dgettext = translations.dgettext 1800 1774 1801 self.assertEqual("""<html> 1775 1802 <div>Included tmpl0</div> 1776 1803 <p title="foo_Bar 0">foo_Bar 0</p> … … 1797 1824 <p title="Voh">Voh 3</p> 1798 1825 <p title="Voh">Voh 3</p> 1799 1826 </html>""", tmpl.generate(idx=-1, 1800 dg= translations.dugettext).render())1827 dg=dgettext).render()) 1801 1828 finally: 1802 1829 shutil.rmtree(dirname) 1803 1830 -
new file genshi/filters/tests/test_html.py
diff -r 4bbd2b021cb5 genshi/filters/tests/test_html.py
- + 1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (C) 2006-2009 Edgewall Software 4 # All rights reserved. 5 # 6 # This software is licensed as described in the file COPYING, which 7 # you should have received as part of this distribution. The terms 8 # are also available at http://genshi.edgewall.org/wiki/License. 9 # 10 # This software consists of voluntary contributions made by many 11 # individuals. For the exact contribution history, see the revision 12 # history and logs, available at http://genshi.edgewall.org/log/. 13 14 import doctest 15 import unittest 16 17 from genshi.input import HTML, ParseError 18 from genshi.filters.html import HTMLFormFiller, HTMLSanitizer 19 from genshi.template import MarkupTemplate 20 21 class HTMLFormFillerTestCase(unittest.TestCase): 22 23 def test_fill_input_text_no_value(self): 24 html = HTML(u"""<form><p> 25 <input type="text" name="foo" /> 26 </p></form>""") | HTMLFormFiller() 27 self.assertEquals("""<form><p> 28 <input type="text" name="foo"/> 29 </p></form>""", html.render()) 30 31 def test_fill_input_text_single_value(self): 32 html = HTML(u"""<form><p> 33 <input type="text" name="foo" /> 34 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) 35 self.assertEquals("""<form><p> 36 <input type="text" name="foo" value="bar"/> 37 </p></form>""", html.render()) 38 39 def test_fill_input_text_multi_value(self): 40 html = HTML(u"""<form><p> 41 <input type="text" name="foo" /> 42 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) 43 self.assertEquals("""<form><p> 44 <input type="text" name="foo" value="bar"/> 45 </p></form>""", html.render()) 46 47 def test_fill_input_hidden_no_value(self): 48 html = HTML(u"""<form><p> 49 <input type="hidden" name="foo" /> 50 </p></form>""") | HTMLFormFiller() 51 self.assertEquals("""<form><p> 52 <input type="hidden" name="foo"/> 53 </p></form>""", html.render()) 54 55 def test_fill_input_hidden_single_value(self): 56 html = HTML(u"""<form><p> 57 <input type="hidden" name="foo" /> 58 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) 59 self.assertEquals("""<form><p> 60 <input type="hidden" name="foo" value="bar"/> 61 </p></form>""", html.render()) 62 63 def test_fill_input_hidden_multi_value(self): 64 html = HTML(u"""<form><p> 65 <input type="hidden" name="foo" /> 66 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) 67 self.assertEquals("""<form><p> 68 <input type="hidden" name="foo" value="bar"/> 69 </p></form>""", html.render()) 70 71 def test_fill_textarea_no_value(self): 72 html = HTML(u"""<form><p> 73 <textarea name="foo"></textarea> 74 </p></form>""") | HTMLFormFiller() 75 self.assertEquals("""<form><p> 76 <textarea name="foo"/> 77 </p></form>""", html.render()) 78 79 def test_fill_textarea_single_value(self): 80 html = HTML(u"""<form><p> 81 <textarea name="foo"></textarea> 82 </p></form>""") | HTMLFormFiller(data={'foo': 'bar'}) 83 self.assertEquals("""<form><p> 84 <textarea name="foo">bar</textarea> 85 </p></form>""", html.render()) 86 87 def test_fill_textarea_multi_value(self): 88 html = HTML(u"""<form><p> 89 <textarea name="foo"></textarea> 90 </p></form>""") | HTMLFormFiller(data={'foo': ['bar']}) 91 self.assertEquals("""<form><p> 92 <textarea name="foo">bar</textarea> 93 </p></form>""", html.render()) 94 95 def test_fill_textarea_multiple(self): 96 # Ensure that the subsequent textarea doesn't get the data from the 97 # first 98 html = HTML(u"""<form><p> 99 <textarea name="foo"></textarea> 100 <textarea name="bar"></textarea> 101 </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) 102 self.assertEquals("""<form><p> 103 <textarea name="foo">Some text</textarea> 104 <textarea name="bar"/> 105 </p></form>""", html.render()) 106 107 def test_fill_textarea_preserve_original(self): 108 html = HTML(u"""<form><p> 109 <textarea name="foo"></textarea> 110 <textarea name="bar">Original value</textarea> 111 </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'}) 112 self.assertEquals("""<form><p> 113 <textarea name="foo">Some text</textarea> 114 <textarea name="bar">Original value</textarea> 115 </p></form>""", html.render()) 116 117 def test_fill_input_checkbox_single_value_auto_no_value(self): 118 html = HTML(u"""<form><p> 119 <input type="checkbox" name="foo" /> 120 </p></form>""") | HTMLFormFiller() 121 self.assertEquals("""<form><p> 122 <input type="checkbox" name="foo"/> 123 </p></form>""", html.render()) 124 125 def test_fill_input_checkbox_single_value_auto(self): 126 html = HTML(u"""<form><p> 127 <input type="checkbox" name="foo" /> 128 </p></form>""") 129 self.assertEquals("""<form><p> 130 <input type="checkbox" name="foo"/> 131 </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render()) 132 self.assertEquals("""<form><p> 133 <input type="checkbox" name="foo" checked="checked"/> 134 </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render()) 135 136 def test_fill_input_checkbox_single_value_defined(self): 137 html = HTML("""<form><p> 138 <input type="checkbox" name="foo" value="1" /> 139 </p></form>""", encoding='ascii') 140 self.assertEquals("""<form><p> 141 <input type="checkbox" name="foo" value="1" checked="checked"/> 142 </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render()) 143 self.assertEquals("""<form><p> 144 <input type="checkbox" name="foo" value="1"/> 145 </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render()) 146 147 def test_fill_input_checkbox_multi_value_auto(self): 148 html = HTML("""<form><p> 149 <input type="checkbox" name="foo" /> 150 </p></form>""", encoding='ascii') 151 self.assertEquals("""<form><p> 152 <input type="checkbox" name="foo"/> 153 </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render()) 154 self.assertEquals("""<form><p> 155 <input type="checkbox" name="foo" checked="checked"/> 156 </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render()) 157 158 def test_fill_input_checkbox_multi_value_defined(self): 159 html = HTML(u"""<form><p> 160 <input type="checkbox" name="foo" value="1" /> 161 </p></form>""") 162 self.assertEquals("""<form><p> 163 <input type="checkbox" name="foo" value="1" checked="checked"/> 164 </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render()) 165 self.assertEquals("""<form><p> 166 <input type="checkbox" name="foo" value="1"/> 167 </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render()) 168 169 def test_fill_input_radio_no_value(self): 170 html = HTML(u"""<form><p> 171 <input type="radio" name="foo" /> 172 </p></form>""") | HTMLFormFiller() 173 self.assertEquals("""<form><p> 174 <input type="radio" name="foo"/> 175 </p></form>""", html.render()) 176 177 def test_fill_input_radio_single_value(self): 178 html = HTML(u"""<form><p> 179 <input type="radio" name="foo" value="1" /> 180 </p></form>""") 181 self.assertEquals("""<form><p> 182 <input type="radio" name="foo" value="1" checked="checked"/> 183 </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render()) 184 self.assertEquals("""<form><p> 185 <input type="radio" name="foo" value="1"/> 186 </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render()) 187 188 def test_fill_input_radio_multi_value(self): 189 html = HTML(u"""<form><p> 190 <input type="radio" name="foo" value="1" /> 191 </p></form>""") 192 self.assertEquals("""<form><p> 193 <input type="radio" name="foo" value="1" checked="checked"/> 194 </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render()) 195 self.assertEquals("""<form><p> 196 <input type="radio" name="foo" value="1"/> 197 </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render()) 198 199 def test_fill_input_radio_empty_string(self): 200 html = HTML(u"""<form><p> 201 <input type="radio" name="foo" value="" /> 202 </p></form>""") 203 self.assertEquals("""<form><p> 204 <input type="radio" name="foo" value="" checked="checked"/> 205 </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render()) 206 207 def test_fill_input_radio_multi_empty_string(self): 208 html = HTML(u"""<form><p> 209 <input type="radio" name="foo" value="" /> 210 </p></form>""") 211 self.assertEquals("""<form><p> 212 <input type="radio" name="foo" value="" checked="checked"/> 213 </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render()) 214 215 def test_fill_select_no_value_auto(self): 216 html = HTML(u"""<form><p> 217 <select name="foo"> 218 <option>1</option> 219 <option>2</option> 220 <option>3</option> 221 </select> 222 </p></form>""") | HTMLFormFiller() 223 self.assertEquals("""<form><p> 224 <select name="foo"> 225 <option>1</option> 226 <option>2</option> 227 <option>3</option> 228 </select> 229 </p></form>""", html.render()) 230 231 def test_fill_select_no_value_defined(self): 232 html = HTML(u"""<form><p> 233 <select name="foo"> 234 <option value="1">1</option> 235 <option value="2">2</option> 236 <option value="3">3</option> 237 </select> 238 </p></form>""") | HTMLFormFiller() 239 self.assertEquals("""<form><p> 240 <select name="foo"> 241 <option value="1">1</option> 242 <option value="2">2</option> 243 <option value="3">3</option> 244 </select> 245 </p></form>""", html.render()) 246 247 def test_fill_select_single_value_auto(self): 248 html = HTML(u"""<form><p> 249 <select name="foo"> 250 <option>1</option> 251 <option>2</option> 252 <option>3</option> 253 </select> 254 </p></form>""") | HTMLFormFiller(data={'foo': '1'}) 255 self.assertEquals("""<form><p> 256 <select name="foo"> 257 <option selected="selected">1</option> 258 <option>2</option> 259 <option>3</option> 260 </select> 261 </p></form>""", html.render()) 262 263 def test_fill_select_single_value_defined(self): 264 html = HTML(u"""<form><p> 265 <select name="foo"> 266 <option value="1">1</option> 267 <option value="2">2</option> 268 <option value="3">3</option> 269 </select> 270 </p></form>""") | HTMLFormFiller(data={'foo': '1'}) 271 self.assertEquals("""<form><p> 272 <select name="foo"> 273 <option value="1" selected="selected">1</option> 274 <option value="2">2</option> 275 <option value="3">3</option> 276 </select> 277 </p></form>""", html.render()) 278 279 def test_fill_select_multi_value_auto(self): 280 html = HTML(u"""<form><p> 281 <select name="foo" multiple> 282 <option>1</option> 283 <option>2</option> 284 <option>3</option> 285 </select> 286 </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) 287 self.assertEquals("""<form><p> 288 <select name="foo" multiple="multiple"> 289 <option selected="selected">1</option> 290 <option>2</option> 291 <option selected="selected">3</option> 292 </select> 293 </p></form>""", html.render()) 294 295 def test_fill_select_multi_value_defined(self): 296 html = HTML(u"""<form><p> 297 <select name="foo" multiple> 298 <option value="1">1</option> 299 <option value="2">2</option> 300 <option value="3">3</option> 301 </select> 302 </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']}) 303 self.assertEquals("""<form><p> 304 <select name="foo" multiple="multiple"> 305 <option value="1" selected="selected">1</option> 306 <option value="2">2</option> 307 <option value="3" selected="selected">3</option> 308 </select> 309 </p></form>""", html.render()) 310 311 def test_fill_option_segmented_text(self): 312 html = MarkupTemplate(u"""<form> 313 <select name="foo"> 314 <option value="1">foo $x</option> 315 </select> 316 </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'}) 317 self.assertEquals(u"""<form> 318 <select name="foo"> 319 <option value="1" selected="selected">foo 1</option> 320 </select> 321 </form>""", html.render()) 322 323 def test_fill_option_segmented_text_no_value(self): 324 html = MarkupTemplate("""<form> 325 <select name="foo"> 326 <option>foo $x bar</option> 327 </select> 328 </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'}) 329 self.assertEquals("""<form> 330 <select name="foo"> 331 <option selected="selected">foo 1 bar</option> 332 </select> 333 </form>""", html.render()) 334 335 def test_fill_option_unicode_value(self): 336 html = HTML(u"""<form> 337 <select name="foo"> 338 <option value="ö">foo</option> 339 </select> 340 </form>""") | HTMLFormFiller(data={'foo': u'ö'}) 341 self.assertEquals(u"""<form> 342 <select name="foo"> 343 <option value="ö" selected="selected">foo</option> 344 </select> 345 </form>""", html.render(encoding=None)) 346 347 def test_fill_input_password_disabled(self): 348 html = HTML(u"""<form><p> 349 <input type="password" name="pass" /> 350 </p></form>""") | HTMLFormFiller(data={'pass': 'bar'}) 351 self.assertEquals("""<form><p> 352 <input type="password" name="pass"/> 353 </p></form>""", html.render()) 354 355 def test_fill_input_password_enabled(self): 356 html = HTML(u"""<form><p> 357 <input type="password" name="pass" /> 358 </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True) 359 self.assertEquals("""<form><p> 360 <input type="password" name="pass" value="1234"/> 361 </p></form>""", html.render()) 362 363 364 class HTMLSanitizerTestCase(unittest.TestCase): 365 366 def test_sanitize_unchanged(self): 367 html = HTML(u'<a href="#">fo<br />o</a>') 368 self.assertEquals('<a href="#">fo<br/>o</a>', 369 (html | HTMLSanitizer()).render()) 370 html = HTML(u'<a href="#with:colon">foo</a>') 371 self.assertEquals('<a href="#with:colon">foo</a>', 372 (html | HTMLSanitizer()).render()) 373 374 def test_sanitize_escape_text(self): 375 html = HTML(u'<a href="#">fo&</a>') 376 self.assertEquals('<a href="#">fo&</a>', 377 (html | HTMLSanitizer()).render()) 378 html = HTML(u'<a href="#"><foo></a>') 379 self.assertEquals('<a href="#"><foo></a>', 380 (html | HTMLSanitizer()).render()) 381 382 def test_sanitize_entityref_text(self): 383 html = HTML(u'<a href="#">foö</a>') 384 self.assertEquals(u'<a href="#">foö</a>', 385 (html | HTMLSanitizer()).render(encoding=None)) 386 387 def test_sanitize_escape_attr(self): 388 html = HTML(u'<div title="<foo>"></div>') 389 self.assertEquals('<div title="<foo>"/>', 390 (html | HTMLSanitizer()).render()) 391 392 def test_sanitize_close_empty_tag(self): 393 html = HTML(u'<a href="#">fo<br>o</a>') 394 self.assertEquals('<a href="#">fo<br/>o</a>', 395 (html | HTMLSanitizer()).render()) 396 397 def test_sanitize_invalid_entity(self): 398 html = HTML(u'&junk;') 399 self.assertEquals('&junk;', (html | HTMLSanitizer()).render()) 400 401 def test_sanitize_remove_script_elem(self): 402 html = HTML(u'<script>alert("Foo")</script>') 403 self.assertEquals('', (html | HTMLSanitizer()).render()) 404 html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>') 405 self.assertEquals('', (html | HTMLSanitizer()).render()) 406 self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>') 407 self.assertRaises(ParseError, HTML, 408 u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>') 409 410 def test_sanitize_remove_onclick_attr(self): 411 html = HTML(u'<div onclick=\'alert("foo")\' />') 412 self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) 413 414 def test_sanitize_remove_input_password(self): 415 html = HTML(u'<form><input type="password" /></form>') 416 self.assertEquals('<form/>', (html | HTMLSanitizer()).render()) 417 418 def test_sanitize_remove_comments(self): 419 html = HTML(u'''<div><!-- conditional comment crap --></div>''') 420 self.assertEquals('<div/>', (html | HTMLSanitizer()).render()) 421 422 def test_sanitize_remove_style_scripts(self): 423 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) 424 # Inline style with url() using javascript: scheme 425 html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') 426 self.assertEquals('<div/>', (html | sanitizer).render()) 427 # Inline style with url() using javascript: scheme, using control char 428 html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') 429 self.assertEquals('<div/>', (html | sanitizer).render()) 430 # Inline style with url() using javascript: scheme, in quotes 431 html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>') 432 self.assertEquals('<div/>', (html | sanitizer).render()) 433 # IE expressions in CSS not allowed 434 html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>') 435 self.assertEquals('<div/>', (html | sanitizer).render()) 436 html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>') 437 self.assertEquals('<div/>', (html | sanitizer).render()) 438 html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));' 439 'color: #fff\'>') 440 self.assertEquals('<div style="color: #fff"/>', 441 (html | sanitizer).render()) 442 # Inline style with url() using javascript: scheme, using unicode 443 # escapes 444 html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>') 445 self.assertEquals('<div/>', (html | sanitizer).render()) 446 html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>') 447 self.assertEquals('<div/>', (html | sanitizer).render()) 448 html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>') 449 self.assertEquals('<div/>', (html | sanitizer).render()) 450 html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>') 451 self.assertEquals('<div/>', (html | sanitizer).render()) 452 html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>') 453 self.assertEquals('<div/>', (html | sanitizer).render()) 454 455 def test_sanitize_remove_style_phishing(self): 456 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style'])) 457 # The position property is not allowed 458 html = HTML(u'<div style="position:absolute;top:0"></div>') 459 self.assertEquals('<div style="top:0"/>', (html | sanitizer).render()) 460 # Normal margins get passed through 461 html = HTML(u'<div style="margin:10px 20px"></div>') 462 self.assertEquals('<div style="margin:10px 20px"/>', 463 (html | sanitizer).render()) 464 # But not negative margins 465 html = HTML(u'<div style="margin:-1000px 0 0"></div>') 466 self.assertEquals('<div/>', (html | sanitizer).render()) 467 html = HTML(u'<div style="margin-left:-2000px 0 0"></div>') 468 self.assertEquals('<div/>', (html | sanitizer).render()) 469 html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>') 470 self.assertEquals('<div/>', (html | sanitizer).render()) 471 472 def test_sanitize_remove_src_javascript(self): 473 html = HTML(u'<img src=\'javascript:alert("foo")\'>') 474 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 475 # Case-insensitive protocol matching 476 html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>') 477 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 478 # Grave accents (not parsed) 479 self.assertRaises(ParseError, HTML, 480 u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>') 481 # Protocol encoded using UTF-8 numeric entities 482 html = HTML(u'<IMG SRC=\'javascri' 483 'pt:alert("foo")\'>') 484 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 485 # Protocol encoded using UTF-8 numeric entities without a semicolon 486 # (which is allowed because the max number of digits is used) 487 html = HTML(u'<IMG SRC=\'java' 488 'script' 489 ':alert("foo")\'>') 490 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 491 # Protocol encoded using UTF-8 numeric hex entities without a semicolon 492 # (which is allowed because the max number of digits is used) 493 html = HTML(u'<IMG SRC=\'javascri' 494 'pt:alert("foo")\'>') 495 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 496 # Embedded tab character in protocol 497 html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>') 498 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 499 # Embedded tab character in protocol, but encoded this time 500 html = HTML(u'<IMG SRC=\'jav	ascript:alert("foo");\'>') 501 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 502 503 504 def suite(): 505 suite = unittest.TestSuite() 506 suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__)) 507 suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test')) 508 suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test')) 509 return suite 510 511 512 if __name__ == '__main__': 513 unittest.main(defaultTest='suite') -
genshi/filters/tests/transform.py
diff -r 4bbd2b021cb5 genshi/filters/tests/transform.py
a b 48 48 49 49 def _transform(html, transformer, with_attrs=False): 50 50 """Apply transformation returning simplified marked stream.""" 51 if isinstance(html, basestring): 52 html = HTML(html) 51 if isinstance(html, basestring) and not isinstance(html, unicode): 52 html = HTML(html, encoding='utf-8') 53 elif isinstance(html, unicode): 54 html = HTML(html, encoding='utf-8') 53 55 stream = transformer(html, keep_marks=True) 54 56 return _simplify(stream, with_attrs) 55 57 … … 57 59 class SelectTest(unittest.TestCase): 58 60 """Test .select()""" 59 61 def _select(self, select): 60 html = HTML(FOOBAR )62 html = HTML(FOOBAR, encoding='utf-8') 61 63 if isinstance(select, basestring): 62 64 select = [select] 63 65 transformer = Transformer(select[0]) … … 138 140 139 141 def test_select_text_context(self): 140 142 self.assertEqual( 141 list(Transformer('.')(HTML( 'foo'), keep_marks=True)),143 list(Transformer('.')(HTML(u'foo'), keep_marks=True)), 142 144 [('OUTSIDE', ('TEXT', u'foo', (None, 1, 0)))], 143 145 ) 144 146 … … 205 207 206 208 def test_invert_text_context(self): 207 209 self.assertEqual( 208 _simplify(Transformer('.').invert()(HTML( 'foo'), keep_marks=True)),210 _simplify(Transformer('.').invert()(HTML(u'foo'), keep_marks=True)), 209 211 [(None, 'TEXT', u'foo')], 210 212 ) 211 213 … … 271 273 272 274 def test_empty_text_context(self): 273 275 self.assertEqual( 274 _simplify(Transformer('.')(HTML( 'foo'), keep_marks=True)),276 _simplify(Transformer('.')(HTML(u'foo'), keep_marks=True)), 275 277 [(OUTSIDE, TEXT, u'foo')], 276 278 ) 277 279 … … 656 658 657 659 def __iter__(self): 658 660 self.count += 1 659 return iter(HTML( 'CONTENT %i' % self.count))661 return iter(HTML(u'CONTENT %i' % self.count)) 660 662 661 if isinstance(html, basestring): 663 if isinstance(html, basestring) and not isinstance(html, unicode): 664 html = HTML(html, encoding='utf-8') 665 else: 662 666 html = HTML(html) 663 667 if content is None: 664 668 content = Injector() -
genshi/filters/transform.py
diff -r 4bbd2b021cb5 genshi/filters/transform.py
a b 31 31 ... <body> 32 32 ... Some <em>body</em> text. 33 33 ... </body> 34 ... </html>''') 34 ... </html>''', 35 ... encoding='utf-8') 35 36 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT) 36 37 ... .unwrap().wrap(tag.u)) 37 38 <html> … … 136 137 mark. 137 138 138 139 >>> html = HTML('<html><head><title>Some Title</title></head>' 139 ... '<body>Some <em>body</em> text.</body></html>') 140 ... '<body>Some <em>body</em> text.</body></html>', 141 ... encoding='utf-8') 140 142 141 143 Transformations act on selected stream events matching an XPath expression. 142 144 Here's an example of removing some markup (the title, in this case) … … 215 217 ... yield mark, (kind, data.upper(), pos) 216 218 ... else: 217 219 ... yield mark, (kind, data, pos) 218 >>> short_stream = HTML('<body>Some <em>test</em> text</body>') 220 >>> short_stream = HTML('<body>Some <em>test</em> text</body>', 221 ... encoding='utf-8') 219 222 >>> print(short_stream | Transformer('.//em/text()').apply(upper)) 220 223 <body>Some <em>TEST</em> text</body> 221 224 """ … … 233 236 """Mark events matching the given XPath expression, within the current 234 237 selection. 235 238 236 >>> html = HTML('<body>Some <em>test</em> text</body>' )239 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') 237 240 >>> print(html | Transformer().select('.//em').trace()) 238 241 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) 239 242 (None, ('TEXT', u'Some ', (None, 1, 6))) … … 257 260 Specificaly, all marks are converted to null marks, and all null marks 258 261 are converted to OUTSIDE marks. 259 262 260 >>> html = HTML('<body>Some <em>test</em> text</body>' )263 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') 261 264 >>> print(html | Transformer('//em').invert().trace()) 262 265 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) 263 266 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) … … 277 280 278 281 Example: 279 282 280 >>> html = HTML('<body>Some <em>test</em> text</body>' )283 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') 281 284 >>> print(html | Transformer('//em').end().trace()) 282 285 ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0))) 283 286 ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6))) … … 301 304 Example: 302 305 303 306 >>> html = HTML('<html><head><title>Some Title</title></head>' 304 ... '<body>Some <em>body</em> text.</body></html>') 307 ... '<body>Some <em>body</em> text.</body></html>', 308 ... encoding='utf-8') 305 309 >>> print(html | Transformer('.//em').empty()) 306 310 <html><head><title>Some Title</title></head><body>Some <em/> 307 311 text.</body></html> … … 316 320 Example: 317 321 318 322 >>> html = HTML('<html><head><title>Some Title</title></head>' 319 ... '<body>Some <em>body</em> text.</body></html>') 323 ... '<body>Some <em>body</em> text.</body></html>', 324 ... encoding='utf-8') 320 325 >>> print(html | Transformer('.//em').remove()) 321 326 <html><head><title>Some Title</title></head><body>Some 322 327 text.</body></html> … … 333 338 Example: 334 339 335 340 >>> html = HTML('<html><head><title>Some Title</title></head>' 336 ... '<body>Some <em>body</em> text.</body></html>') 341 ... '<body>Some <em>body</em> text.</body></html>', 342 ... encoding='utf-8') 337 343 >>> print(html | Transformer('.//em').unwrap()) 338 344 <html><head><title>Some Title</title></head><body>Some body 339 345 text.</body></html> … … 346 352 """Wrap selection in an element. 347 353 348 354 >>> html = HTML('<html><head><title>Some Title</title></head>' 349 ... '<body>Some <em>body</em> text.</body></html>') 355 ... '<body>Some <em>body</em> text.</body></html>', 356 ... encoding='utf-8') 350 357 >>> print(html | Transformer('.//em').wrap('strong')) 351 358 <html><head><title>Some Title</title></head><body>Some 352 359 <strong><em>body</em></strong> text.</body></html> … … 362 369 """Replace selection with content. 363 370 364 371 >>> html = HTML('<html><head><title>Some Title</title></head>' 365 ... '<body>Some <em>body</em> text.</body></html>') 372 ... '<body>Some <em>body</em> text.</body></html>', 373 ... encoding='utf-8') 366 374 >>> print(html | Transformer('.//title/text()').replace('New Title')) 367 375 <html><head><title>New Title</title></head><body>Some <em>body</em> 368 376 text.</body></html> … … 380 388 tag: 381 389 382 390 >>> html = HTML('<html><head><title>Some Title</title></head>' 383 ... '<body>Some <em>body</em> text.</body></html>') 391 ... '<body>Some <em>body</em> text.</body></html>', 392 ... encoding='utf-8') 384 393 >>> print(html | Transformer('.//em').before('emphasised ')) 385 394 <html><head><title>Some Title</title></head><body>Some emphasised 386 395 <em>body</em> text.</body></html> … … 397 406 Here, we insert some text after the </em> closing tag: 398 407 399 408 >>> html = HTML('<html><head><title>Some Title</title></head>' 400 ... '<body>Some <em>body</em> text.</body></html>') 409 ... '<body>Some <em>body</em> text.</body></html>', 410 ... encoding='utf-8') 401 411 >>> print(html | Transformer('.//em').after(' rock')) 402 412 <html><head><title>Some Title</title></head><body>Some <em>body</em> 403 413 rock text.</body></html> … … 414 424 Inserting some new text at the start of the <body>: 415 425 416 426 >>> html = HTML('<html><head><title>Some Title</title></head>' 417 ... '<body>Some <em>body</em> text.</body></html>') 427 ... '<body>Some <em>body</em> text.</body></html>', 428 ... encoding='utf-8') 418 429 >>> print(html | Transformer('.//body').prepend('Some new body text. ')) 419 430 <html><head><title>Some Title</title></head><body>Some new body text. 420 431 Some <em>body</em> text.</body></html> … … 429 440 """Insert content before the END event of the selection. 430 441 431 442 >>> html = HTML('<html><head><title>Some Title</title></head>' 432 ... '<body>Some <em>body</em> text.</body></html>') 443 ... '<body>Some <em>body</em> text.</body></html>', 444 ... encoding='utf-8') 433 445 >>> print(html | Transformer('.//body').append(' Some new body text.')) 434 446 <html><head><title>Some Title</title></head><body>Some <em>body</em> 435 447 text. Some new body text.</body></html> … … 450 462 451 463 >>> html = HTML('<html><head><title>Some Title</title></head>' 452 464 ... '<body>Some <em class="before">body</em> <em>text</em>.</body>' 453 ... '</html>' )465 ... '</html>', encoding='utf-8') 454 466 >>> print(html | Transformer('body/em').attr('class', None)) 455 467 <html><head><title>Some Title</title></head><body>Some <em>body</em> 456 468 <em>text</em>.</body></html> … … 493 505 >>> from genshi.builder import tag 494 506 >>> buffer = StreamBuffer() 495 507 >>> html = HTML('<html><head><title>Some Title</title></head>' 496 ... '<body>Some <em>body</em> text.</body></html>') 508 ... '<body>Some <em>body</em> text.</body></html>', 509 ... encoding='utf-8') 497 510 >>> print(html | Transformer('head/title/text()').copy(buffer) 498 511 ... .end().select('body').prepend(tag.h1(buffer))) 499 512 <html><head><title>Some Title</title></head><body><h1>Some … … 514 527 515 528 >>> html = HTML('<html><head><title>Some Title</title></head>' 516 529 ... '<body><em>Some</em> <em class="before">body</em>' 517 ... '<em>text</em>.</body></html>') 530 ... '<em>text</em>.</body></html>', 531 ... encoding='utf-8') 518 532 >>> buffer = StreamBuffer() 519 533 >>> def apply_attr(name, entry): 520 534 ... return list(buffer)[0][1][1].get('class') … … 546 560 >>> from genshi.builder import tag 547 561 >>> buffer = StreamBuffer() 548 562 >>> html = HTML('<html><head><title>Some Title</title></head>' 549 ... '<body>Some <em>body</em> text.</body></html>') 563 ... '<body>Some <em>body</em> text.</body></html>', 564 ... encoding='utf-8') 550 565 >>> print(html | Transformer('.//em/text()').cut(buffer) 551 566 ... .end().select('.//em').after(tag.h1(buffer))) 552 567 <html><head><title>Some Title</title></head><body>Some … … 577 592 top of the document: 578 593 579 594 >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> ' 580 ... 'text <note>two</note>.</body></doc>') 595 ... 'text <note>two</note>.</body></doc>', 596 ... encoding='utf-8') 581 597 >>> buffer = StreamBuffer() 582 598 >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True) 583 599 ... .end().buffer().select('notes').prepend(buffer)) … … 595 611 596 612 >>> from genshi.filters.html import HTMLSanitizer 597 613 >>> html = HTML('<html><body>Some text<script>alert(document.cookie)' 598 ... '</script> and some more text</body></html>') 614 ... '</script> and some more text</body></html>', 615 ... encoding='utf-8') 599 616 >>> print(html | Transformer('body/*').filter(HTMLSanitizer())) 600 617 <html><body>Some text and some more text</body></html> 601 618 … … 609 626 the selection. 610 627 611 628 >>> html = HTML('<html><head><title>Some Title</title></head>' 612 ... '<body>Some <em>body</em> text.</body></html>') 629 ... '<body>Some <em>body</em> text.</body></html>', 630 ... encoding='utf-8') 613 631 >>> print(html | Transformer('head/title').map(unicode.upper, TEXT)) 614 632 <html><head><title>SOME TITLE</title></head><body>Some <em>body</em> 615 633 text.</body></html> … … 627 645 628 646 >>> html = HTML('<html><body>Some text, some more text and ' 629 647 ... '<b>some bold text</b>\\n' 630 ... '<i>some italicised text</i></body></html>') 648 ... '<i>some italicised text</i></body></html>', 649 ... encoding='utf-8') 631 650 >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME')) 632 651 <html><body>Some text, some more text and <b>SOME bold text</b> 633 652 <i>some italicised text</i></body></html> … … 649 668 """Rename matching elements. 650 669 651 670 >>> html = HTML('<html><body>Some text, some more text and ' 652 ... '<b>some bold text</b></body></html>') 671 ... '<b>some bold text</b></body></html>', 672 ... encoding='utf-8') 653 673 >>> print(html | Transformer('body/b').rename('strong')) 654 674 <html><body>Some text, some more text and <strong>some bold text</strong></body></html> 655 675 """ … … 658 678 def trace(self, prefix='', fileobj=None): 659 679 """Print events as they pass through the transform. 660 680 661 >>> html = HTML('<body>Some <em>test</em> text</body>' )681 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') 662 682 >>> print(html | Transformer('em').trace()) 663 683 (None, ('START', (QName('body'), Attrs()), (None, 1, 0))) 664 684 (None, ('TEXT', u'Some ', (None, 1, 6))) … … 1024 1044 ... yield event 1025 1045 ... for event in stream: 1026 1046 ... yield event 1027 >>> html = HTML('<body>Some <em>test</em> text</body>' )1047 >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8') 1028 1048 >>> print(html | Transformer('.//em').apply(Top('Prefix '))) 1029 1049 Prefix <body>Some <em>test</em> text</body> 1030 1050 """ -
genshi/input.py
diff -r 4bbd2b021cb5 genshi/input.py
a b 18 18 from itertools import chain 19 19 import htmlentitydefs as entities 20 20 import HTMLParser as html 21 from StringIO import StringIO22 21 from xml.parsers import expat 23 22 24 23 from genshi.core import Attrs, QName, Stream, stripentities 25 24 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \ 26 25 END_NS, START_CDATA, END_CDATA, PI, COMMENT 26 from genshi.compat import StringIO, BytesIO 27 27 28 28 29 __all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML'] 29 30 __docformat__ = 'restructuredtext en' … … 90 91 91 92 _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in 92 93 entities.name2codepoint.items()] 93 _external_dtd = '\n'.join(_entitydefs)94 _external_dtd = u'\n'.join(_entitydefs).encode('utf-8') 94 95 95 96 def __init__(self, source, filename=None, encoding=None): 96 97 """Initialize the parser for the given XML input. … … 108 109 # Setup the Expat parser 109 110 parser = expat.ParserCreate(encoding, '}') 110 111 parser.buffer_text = True 111 parser.returns_unicode = True 112 # Python 3 does not have returns_unicode 113 if hasattr(parser, 'returns_unicode'): 114 parser.returns_unicode = True 112 115 parser.ordered_attributes = True 113 116 114 117 parser.StartElementHandler = self._handle_start … … 146 149 while 1: 147 150 while not done and len(self._queue) == 0: 148 151 data = self.source.read(bufsize) 149 if data == '': # end of data152 if not data: # end of data 150 153 if hasattr(self, 'expat'): 151 154 self.expat.Parse('', True) 152 155 del self.expat # get rid of circular references … … 170 173 171 174 def _build_foreign(self, context, base, sysid, pubid): 172 175 parser = self.expat.ExternalEntityParserCreate(context) 173 parser.ParseFile( StringIO(self._external_dtd))176 parser.ParseFile(BytesIO(self._external_dtd)) 174 177 return 1 175 178 176 179 def _enqueue(self, kind, data=None, pos=None): … … 279 282 280 283 The parsing is initiated by iterating over the parser object: 281 284 282 >>> parser = HTMLParser( StringIO('<UL compact><LI>Foo</UL>'))285 >>> parser = HTMLParser(BytesIO(u'<UL compact><LI>Foo</UL>'.encode('utf-8')), encoding='utf-8') 283 286 >>> for kind, data, pos in parser: 284 287 ... print('%s %s' % (kind, data)) 285 288 START (QName('ul'), Attrs([(QName('compact'), u'compact')])) … … 293 296 'hr', 'img', 'input', 'isindex', 'link', 'meta', 294 297 'param']) 295 298 296 def __init__(self, source, filename=None, encoding= 'utf-8'):299 def __init__(self, source, filename=None, encoding=None): 297 300 """Initialize the parser for the given HTML input. 298 301 299 302 :param source: the HTML text as a file-like object … … 320 323 while 1: 321 324 while not done and len(self._queue) == 0: 322 325 data = self.source.read(bufsize) 323 if data == '': # end of data326 if not data: # end of data 324 327 self.close() 325 328 done = True 326 329 else: 330 if not isinstance(data, unicode): 331 # bytes 332 if self.encoding: 333 data = data.decode(self.encoding) 334 else: 335 raise UnicodeError("source returned bytes, but no encoding specified") 327 336 self.feed(data) 328 337 for kind, data, pos in self._queue: 329 338 yield kind, data, pos … … 403 412 self._enqueue(COMMENT, text) 404 413 405 414 406 def HTML(text, encoding= 'utf-8'):415 def HTML(text, encoding=None): 407 416 """Parse the given HTML source and return a markup stream. 408 417 409 418 Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be 410 419 iterated over multiple times: 411 420 412 >>> html = HTML('<body><h1>Foo</h1></body>' )421 >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8') 413 422 >>> print(html) 414 423 <body><h1>Foo</h1></body> 415 424 >>> print(html.select('h1')) … … 422 431 :raises ParseError: if the HTML text is not well-formed, and error recovery 423 432 fails 424 433 """ 425 return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) 434 if isinstance(text, unicode): 435 return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) 436 return Stream(list(HTMLParser(BytesIO(text), encoding=encoding))) 426 437 427 438 428 439 def _coalesce(stream): -
genshi/output.py
diff -r 4bbd2b021cb5 genshi/output.py
a b 27 27 __docformat__ = 'restructuredtext en' 28 28 29 29 30 def encode(iterator, method='xml', encoding= 'utf-8', out=None):30 def encode(iterator, method='xml', encoding=None, out=None): 31 31 """Encode serializer output into a string. 32 32 33 33 :param iterator: the iterator returned from serializing a stream (basically -
genshi/template/astutil.py
diff -r 4bbd2b021cb5 genshi/template/astutil.py
a b 21 21 def parse(source, mode): 22 22 return compile(source, '', mode, _ast.PyCF_ONLY_AST) 23 23 24 from genshi.compat import IS_PYTHON2 24 25 25 26 __docformat__ = 'restructuredtext en' 26 27 … … 129 130 first = False 130 131 self._write('**' + node.kwarg) 131 132 133 if not IS_PYTHON2: 134 # In Python 3 arguments get a special node 135 def visit_arg(self, node): 136 self._write(node.arg) 137 132 138 # FunctionDef(identifier name, arguments args, 133 139 # stmt* body, expr* decorator_list) 134 140 def visit_FunctionDef(self, node): … … 289 295 self._change_indent(-1) 290 296 291 297 292 # Raise(expr? type, expr? inst, expr? tback) 293 def visit_Raise(self, node): 294 self._new_line() 295 self._write('raise') 296 if not node.type: 297 return 298 self._write(' ') 299 self.visit(node.type) 300 if not node.inst: 301 return 302 self._write(', ') 303 self.visit(node.inst) 304 if not node.tback: 305 return 306 self._write(', ') 307 self.visit(node.tback) 298 if IS_PYTHON2: 299 # Raise(expr? type, expr? inst, expr? tback) 300 def visit_Raise(self, node): 301 self._new_line() 302 self._write('raise') 303 if not node.type: 304 return 305 self._write(' ') 306 self.visit(node.type) 307 if not node.inst: 308 return 309 self._write(', ') 310 self.visit(node.inst) 311 if not node.tback: 312 return 313 self._write(', ') 314 self.visit(node.tback) 315 else: 316 # Raise(expr? exc from expr? cause) 317 def visit_Raise(self, node): 318 self._new_line() 319 self._write('raise') 320 if not node.exc: 321 return 322 self._write(' ') 323 self.visit(node.exc) 324 if not node.cause: 325 return 326 self._write(' from ') 327 self.visit(node.cause) 308 328 309 329 # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) 310 330 def visit_TryExcept(self, node): … … 626 646 def visit_Str(self, node): 627 647 self._write(repr(node.s)) 628 648 649 if not IS_PYTHON2: 650 # Bytes(bytes s) 651 def visit_Bytes(self, node): 652 self._write(repr(node.s)) 653 629 654 # Attribute(expr value, identifier attr, expr_context ctx) 630 655 def visit_Attribute(self, node): 631 656 self.visit(node.value) -
genshi/template/base.py
diff -r 4bbd2b021cb5 genshi/template/base.py
a b 15 15 16 16 from collections import deque 17 17 import os 18 from StringIO import StringIO19 18 import sys 20 19 20 from genshi.compat import StringIO, BytesIO 21 21 from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure 22 22 from genshi.input import ParseError 23 23 … … 398 398 self._init_loader() 399 399 self._prepared = False 400 400 401 if isinstance(source, basestring): 402 source = StringIO(source) 403 else: 404 source = source 401 if not isinstance(source, Stream) and not hasattr(source, 'read'): 402 if isinstance(source, unicode): 403 source = StringIO(source) 404 else: 405 source = BytesIO(source) 405 406 try: 406 407 self._stream = self._parse(source, encoding) 407 408 except ParseError, e: -
genshi/template/directives.py
diff -r 4bbd2b021cb5 genshi/template/directives.py
a b 622 622 if not info: 623 623 raise TemplateRuntimeError('"when" directives can only be used ' 624 624 'inside a "choose" directive', 625 self.filename, * stream.next()[2][1:])625 self.filename, *(stream.next())[2][1:]) 626 626 if info[0]: 627 627 return [] 628 628 if not self.expr and not info[1]: 629 629 raise TemplateRuntimeError('either "choose" or "when" directive ' 630 630 'must have a test expression', 631 self.filename, * stream.next()[2][1:])631 self.filename, *(stream.next())[2][1:]) 632 632 if info[1]: 633 633 value = info[2] 634 634 if self.expr: … … 661 661 if not info: 662 662 raise TemplateRuntimeError('an "otherwise" directive can only be ' 663 663 'used inside a "choose" directive', 664 self.filename, * stream.next()[2][1:])664 self.filename, *(stream.next())[2][1:]) 665 665 if info[0]: 666 666 return [] 667 667 info[0] = True -
genshi/template/eval.py
diff -r 4bbd2b021cb5 genshi/template/eval.py
a b 24 24 from genshi.template.base import TemplateRuntimeError 25 25 from genshi.util import flatten 26 26 27 from genshi.compat import get_code_params, build_code_chunk, IS_PYTHON2 28 27 29 __all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup', 28 30 'Undefined', 'UndefinedError'] 29 31 __docformat__ = 'restructuredtext en' … … 98 100 def __getstate__(self): 99 101 state = {'source': self.source, 'ast': self.ast, 100 102 'lookup': self._globals.im_self} 101 c = self.code 102 state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, 103 c.co_consts, c.co_names, c.co_varnames, c.co_filename, 104 c.co_name, c.co_firstlineno, c.co_lnotab, (), ()) 103 state['code'] = get_code_params(self.code) 105 104 return state 106 105 107 106 def __setstate__(self, state): … … 236 235 of that variable, will raise an exception that includes the name used to 237 236 reference that undefined variable. 238 237 239 >>> foo('bar') 240 Traceback (most recent call last): 241 ... 242 UndefinedError: "foo" not defined 238 >>> try: 239 ... foo('bar') 240 ... except UndefinedError, e: 241 ... print e.msg 242 "foo" not defined 243 243 244 >>> foo.bar 245 Traceback (most recent call last): 246 ... 247 UndefinedError: "foo" not defined 244 >>> try: 245 ... foo.bar 246 ... except UndefinedError, e: 247 ... print e.msg 248 "foo" not defined 248 249 249 250 :see: `LenientLookup` 250 251 """ … … 388 389 raise an ``UndefinedError``: 389 390 390 391 >>> expr = Expression('nothing', lookup='strict') 391 >>> expr.evaluate({}) 392 Traceback (most recent call last): 393 ... 394 UndefinedError: "nothing" not defined 392 >>> try: 393 ... expr.evaluate({}) 394 ... except UndefinedError, e: 395 ... print e.msg 396 "nothing" not defined 395 397 396 398 The same happens when a non-existing attribute or item is accessed on an 397 399 existing object: 398 400 399 401 >>> expr = Expression('something.nil', lookup='strict') 400 >>> expr.evaluate({'something': dict()}) 401 Traceback (most recent call last): 402 ... 403 UndefinedError: {} has no member named "nil" 402 >>> try: 403 ... expr.evaluate({'something': dict()}) 404 ... except UndefinedError, e: 405 ... print e.msg 406 {} has no member named "nil" 404 407 """ 405 408 406 409 @classmethod … … 421 424 rest = '\n'.join([' %s' % line for line in rest.splitlines()]) 422 425 source = '\n'.join([first, rest]) 423 426 if isinstance(source, unicode): 424 source = '\xef\xbb\xbf' + source.encode('utf-8')427 source = (u'\ufeff' + source).encode('utf-8') 425 428 return parse(source, mode) 426 429 427 430 428 431 def _compile(node, source=None, mode='eval', filename=None, lineno=-1, 429 432 xform=None): 430 if isinstance(filename, unicode): 431 # unicode file names not allowed for code objects 432 filename = filename.encode('utf-8', 'replace') 433 elif not filename: 433 if not filename: 434 434 filename = '<string>' 435 if IS_PYTHON2: 436 # Python 2 requires non-unicode filenames 437 if isinstance(filename, unicode): 438 filename = filename.encode('utf-8', 'replace') 439 else: 440 # Python 3 requires unicode filenames 441 if not isinstance(filename, unicode): 442 filename = filename.decode('utf-8', 'replace') 435 443 if lineno <= 0: 436 444 lineno = 1 437 445 … … 458 466 try: 459 467 # We'd like to just set co_firstlineno, but it's readonly. So we need 460 468 # to clone the code object while adjusting the line number 461 return CodeType(0, code.co_nlocals, code.co_stacksize, 462 code.co_flags | 0x0040, code.co_code, code.co_consts, 463 code.co_names, code.co_varnames, filename, name, 464 lineno, code.co_lnotab, (), ()) 469 return build_code_chunk(code, filename, name, lineno) 465 470 except RuntimeError: 466 471 return code 467 472 … … 493 498 def _extract_names(self, node): 494 499 names = set() 495 500 def _process(node): 501 if not IS_PYTHON2 and isinstance(node, _ast.arg): 502 names.add(node.arg) 496 503 if isinstance(node, _ast.Name): 497 504 names.add(node.id) 498 505 elif isinstance(node, _ast.alias): … … 513 520 return names 514 521 515 522 def visit_Str(self, node): 516 if isinstance(node.s, str):523 if not isinstance(node.s, unicode): 517 524 try: # If the string is ASCII, return a `str` object 518 525 node.s.decode('ascii') 519 526 except ValueError: # Otherwise return a `unicode` object -
genshi/template/loader.py
diff -r 4bbd2b021cb5 genshi/template/loader.py
a b 46 46 47 47 >>> import tempfile 48 48 >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template') 49 >>> os.write(fd, '<p>$var</p>')49 >>> os.write(fd, u'<p>$var</p>'.encode('utf-8')) 50 50 11 51 51 >>> os.close(fd) 52 52 … … 283 283 """ 284 284 def _load_from_directory(filename): 285 285 filepath = os.path.join(path, filename) 286 fileobj = open(filepath, ' U')286 fileobj = open(filepath, 'rbU') 287 287 mtime = os.path.getmtime(filepath) 288 288 def _uptodate(): 289 289 return mtime == os.path.getmtime(filepath) -
genshi/template/plugin.py
diff -r 4bbd2b021cb5 genshi/template/plugin.py
a b 44 44 options = {} 45 45 self.options = options 46 46 47 self.default_encoding = options.get('genshi.default_encoding', 'utf-8')47 self.default_encoding = options.get('genshi.default_encoding', None) 48 48 auto_reload = options.get('genshi.auto_reload', '1') 49 49 if isinstance(auto_reload, basestring): 50 50 auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true') -
genshi/template/tests/directives.py
diff -r 4bbd2b021cb5 genshi/template/tests/directives.py
a b 1137 1137 <py:with vars="x = x * 2; y = x / 2;">${x} ${y}</py:with> 1138 1138 </div>""") 1139 1139 self.assertEqual("""<div> 1140 84 421141 </div>""" , tmpl.generate(x=42).render(encoding=None))1140 84 %s 1141 </div>""" % (84 / 2), tmpl.generate(x=42).render(encoding=None)) 1142 1142 1143 1143 def test_semicolon_escape(self): 1144 1144 tmpl = MarkupTemplate("""<div xmlns:py="http://genshi.edgewall.org/"> -
genshi/template/tests/eval.py
diff -r 4bbd2b021cb5 genshi/template/tests/eval.py
a b 14 14 import doctest 15 15 import os 16 16 import pickle 17 from StringIO import StringIO18 17 import sys 19 18 from tempfile import mkstemp 20 19 import unittest … … 23 22 from genshi.template.base import Context 24 23 from genshi.template.eval import Expression, Suite, Undefined, UndefinedError, \ 25 24 UNDEFINED 25 from genshi.compat import BytesIO, IS_PYTHON2, wrapped_bytes 26 26 27 27 28 28 class ExpressionTestCase(unittest.TestCase): … … 39 39 40 40 def test_pickle(self): 41 41 expr = Expression('1 < 2') 42 buf = StringIO()42 buf = BytesIO() 43 43 pickle.dump(expr, buf, 2) 44 44 buf.seek(0) 45 45 unpickled = pickle.load(buf) … … 58 58 def test_str_literal(self): 59 59 self.assertEqual('foo', Expression('"foo"').evaluate({})) 60 60 self.assertEqual('foo', Expression('"""foo"""').evaluate({})) 61 self.assertEqual('foo', Expression("'foo'").evaluate({})) 61 self.assertEqual(u'foo'.encode('utf-8'), 62 Expression(wrapped_bytes("b'foo'")).evaluate({})) 62 63 self.assertEqual('foo', Expression("'''foo'''").evaluate({})) 63 64 self.assertEqual('foo', Expression("u'foo'").evaluate({})) 64 65 self.assertEqual('foo', Expression("r'foo'").evaluate({})) … … 68 69 self.assertEqual(u'ß', expr.evaluate({})) 69 70 expr = Expression("u'\xfe'") 70 71 self.assertEqual(u'ß', expr.evaluate({})) 71 expr = Expression("'\xc3\xbe'") 72 self.assertEqual(u'ß', expr.evaluate({})) 72 # On Python2 strings are converted to unicode if they contained 73 # non-ASCII characters. 74 # On Py3k, we have no need to do this as non-prefixed strings aren't 75 # raw. 76 expr = Expression(wrapped_bytes(r"b'\xc3\xbe'")) 77 if IS_PYTHON2: 78 self.assertEqual(u'ß', expr.evaluate({})) 79 else: 80 self.assertEqual(u'ß'.encode('utf-8'), expr.evaluate({})) 73 81 74 82 def test_num_literal(self): 75 83 self.assertEqual(42, Expression("42").evaluate({})) 76 self.assertEqual(42L, Expression("42L").evaluate({})) 84 if IS_PYTHON2: 85 self.assertEqual(42L, Expression("42L").evaluate({})) 77 86 self.assertEqual(.42, Expression(".42").evaluate({})) 78 self.assertEqual(07, Expression("07").evaluate({})) 87 if IS_PYTHON2: 88 self.assertEqual(07, Expression("07").evaluate({})) 79 89 self.assertEqual(0xF2, Expression("0xF2").evaluate({})) 80 90 self.assertEqual(0XF2, Expression("0XF2").evaluate({})) 81 91 … … 246 256 def test_lambda(self): 247 257 data = {'items': range(5)} 248 258 expr = Expression("filter(lambda x: x > 2, items)") 249 self.assertEqual([3, 4], expr.evaluate(data))259 self.assertEqual([3, 4], list(expr.evaluate(data))) 250 260 251 261 def test_lambda_tuple_arg(self): 262 # This syntax goes away in Python 3 263 if not IS_PYTHON2: 264 return 252 265 data = {'items': [(1, 2), (2, 1)]} 253 266 expr = Expression("filter(lambda (x, y): x > y, items)") 254 self.assertEqual([(2, 1)], expr.evaluate(data))267 self.assertEqual([(2, 1)], list(expr.evaluate(data))) 255 268 256 269 def test_list_comprehension(self): 257 270 expr = Expression("[n for n in numbers if n < 2]") … … 470 483 471 484 def test_pickle(self): 472 485 suite = Suite('foo = 42') 473 buf = StringIO()486 buf = BytesIO() 474 487 pickle.dump(suite, buf, 2) 475 488 buf.seek(0) 476 489 unpickled = pickle.load(buf) … … 645 658 assert 'plain' in data 646 659 647 660 def test_import(self): 648 suite = Suite("from itertools import ifilter")661 suite = Suite("from itertools import repeat") 649 662 data = {} 650 663 suite.execute(data) 651 assert ' ifilter' in data664 assert 'repeat' in data 652 665 653 666 def test_import_star(self): 654 667 suite = Suite("from itertools import *") 655 668 data = Context() 656 669 suite.execute(data) 657 assert ' ifilter' in data670 assert 'repeat' in data 658 671 659 672 def test_import_in_def(self): 660 673 suite = Suite("""def fun(): 661 from itertools import ifilter662 return ifilter(None, range(3))674 from itertools import repeat 675 return repeat(1, 3) 663 676 """) 664 677 data = Context() 665 678 suite.execute(data) 666 assert ' ifilter' not in data667 self.assertEqual([1, 2], list(data['fun']()))679 assert 'repeat' not in data 680 self.assertEqual([1, 1, 1], list(data['fun']())) 668 681 669 682 def test_for(self): 670 683 suite = Suite("""x = [] … … 766 779 self.assertEqual("foo", d["k"]) 767 780 768 781 def test_exec(self): 769 suite = Suite("x = 1; exec d['k']; assert x == 42, x")782 suite = Suite("x = 1; exec(d['k']); assert x == 42, x") 770 783 suite.execute({"d": {"k": "x = 42"}}) 771 784 772 785 def test_return(self): … … 828 841 829 842 def test_yield_expression(self): 830 843 d = {} 831 suite = Suite("""results = [] 844 suite = Suite("""from genshi.compat import next 845 results = [] 832 846 def counter(maximum): 833 847 i = 0 834 848 while i < maximum: … … 838 852 else: 839 853 i += 1 840 854 it = counter(5) 841 results.append( it.next())855 results.append(next(it)) 842 856 results.append(it.send(3)) 843 results.append( it.next())857 results.append(next(it)) 844 858 """) 845 859 suite.execute(d) 846 860 self.assertEqual([0, 3, 4], d['results']) -
genshi/template/tests/loader.py
diff -r 4bbd2b021cb5 genshi/template/tests/loader.py
a b 347 347 assert 'tmpl2.html' not in loader._cache 348 348 349 349 def test_load_with_default_encoding(self): 350 f = open(os.path.join(self.dirname, 'tmpl.html'), 'w ')350 f = open(os.path.join(self.dirname, 'tmpl.html'), 'wb') 351 351 try: 352 352 f.write(u'<div>\xf6</div>'.encode('iso-8859-1')) 353 353 finally: … … 356 356 loader.load('tmpl.html') 357 357 358 358 def test_load_with_explicit_encoding(self): 359 f = open(os.path.join(self.dirname, 'tmpl.html'), 'w ')359 f = open(os.path.join(self.dirname, 'tmpl.html'), 'wb') 360 360 try: 361 361 f.write(u'<div>\xf6</div>'.encode('iso-8859-1')) 362 362 finally: -
genshi/template/tests/markup.py
diff -r 4bbd2b021cb5 genshi/template/tests/markup.py
a b 15 15 import os 16 16 import pickle 17 17 import shutil 18 from StringIO import StringIO19 18 import sys 20 19 import tempfile 21 20 import unittest 22 21 22 from genshi.compat import BytesIO, StringIO 23 23 from genshi.core import Markup 24 24 from genshi.input import XML 25 25 from genshi.template.base import BadDirectiveError, TemplateSyntaxError … … 43 43 def test_pickle(self): 44 44 stream = XML('<root>$var</root>') 45 45 tmpl = MarkupTemplate(stream) 46 buf = StringIO()46 buf = BytesIO() 47 47 pickle.dump(tmpl, buf, 2) 48 48 buf.seek(0) 49 49 unpickled = pickle.load(buf) -
genshi/template/tests/plugin.py
diff -r 4bbd2b021cb5 genshi/template/tests/plugin.py
a b 30 30 31 31 def test_init_no_options(self): 32 32 plugin = MarkupTemplateEnginePlugin() 33 self.assertEqual( 'utf-8', plugin.default_encoding)33 self.assertEqual(None, plugin.default_encoding) 34 34 self.assertEqual('html', plugin.default_format) 35 35 self.assertEqual(None, plugin.default_doctype) 36 36 … … 165 165 def test_helper_functions(self): 166 166 plugin = MarkupTemplateEnginePlugin() 167 167 tmpl = plugin.load_template(PACKAGE + '.templates.functions') 168 output = plugin.render({'snippet': '<b>Foo</b>'}, template=tmpl)168 output = plugin.render({'snippet': u'<b>Foo</b>'}, template=tmpl) 169 169 self.assertEqual("""<div> 170 170 False 171 171 bar … … 178 178 179 179 def test_init_no_options(self): 180 180 plugin = TextTemplateEnginePlugin() 181 self.assertEqual( 'utf-8', plugin.default_encoding)181 self.assertEqual(None, plugin.default_encoding) 182 182 self.assertEqual('text', plugin.default_format) 183 183 184 184 self.assertEqual([], plugin.loader.search_path) -
genshi/template/text.py
diff -r 4bbd2b021cb5 genshi/template/text.py
a b 162 162 depth = 0 163 163 164 164 source = source.read() 165 if isinstance(source, str):165 if not isinstance(source, unicode): 166 166 source = source.decode(encoding or 'utf-8', 'replace') 167 167 offset = 0 168 168 lineno = 1 … … 279 279 depth = 0 280 280 281 281 source = source.read() 282 if isinstance(source, str):282 if not isinstance(source, unicode): 283 283 source = source.decode(encoding or 'utf-8', 'replace') 284 284 offset = 0 285 285 lineno = 1 -
genshi/tests/core.py
diff -r 4bbd2b021cb5 genshi/tests/core.py
a b 13 13 14 14 import doctest 15 15 import pickle 16 from StringIO import StringIO17 try:18 from cStringIO import StringIO as cStringIO19 except ImportError:20 cStringIO = StringIO21 16 import unittest 22 17 23 18 from genshi import core 24 19 from genshi.core import Markup, Attrs, Namespace, QName, escape, unescape 25 20 from genshi.input import XML, ParseError 21 from genshi.compat import StringIO, BytesIO 26 22 27 23 28 24 class StreamTestCase(unittest.TestCase): 29 25 30 26 def test_render_utf8(self): 31 27 xml = XML('<li>Ãber uns</li>') 32 self.assertEqual( '<li>Ãber uns</li>', xml.render())28 self.assertEqual(u'<li>Ãber uns</li>'.encode('utf-8'), xml.render(encoding='utf-8')) 33 29 34 30 def test_render_unicode(self): 35 31 xml = XML('<li>Ãber uns</li>') 32 self.assertEqual(u'<li>Ãber uns</li>', xml.render()) 36 33 self.assertEqual(u'<li>Ãber uns</li>', xml.render(encoding=None)) 37 34 38 35 def test_render_ascii(self): 39 36 xml = XML('<li>Ãber uns</li>') 40 self.assertEqual( '<li>Über uns</li>', xml.render(encoding='ascii'))37 self.assertEqual(u'<li>Über uns</li>'.encode('ascii'), xml.render(encoding='ascii')) 41 38 42 39 def test_render_output_stream_utf8(self): 43 40 xml = XML('<li>Ãber uns</li>') 44 strio = cStringIO()45 self.assertEqual(None, xml.render( out=strio))46 self.assertEqual( '<li>Ãber uns</li>', strio.getvalue())41 strio = BytesIO() 42 self.assertEqual(None, xml.render(encoding='utf-8', out=strio)) 43 self.assertEqual(u'<li>Ãber uns</li>'.encode('utf-8'), strio.getvalue()) 47 44 48 45 def test_render_output_stream_unicode(self): 49 46 xml = XML('<li>Ãber uns</li>') … … 53 50 54 51 def test_pickle(self): 55 52 xml = XML('<li>Foo</li>') 56 buf = StringIO()53 buf = BytesIO() 57 54 pickle.dump(xml, buf, 2) 58 55 buf.seek(0) 59 56 xml = pickle.load(buf) … … 63 60 class MarkupTestCase(unittest.TestCase): 64 61 65 62 def test_new_with_encoding(self): 66 markup = Markup('Döner', encoding='utf-8') 67 self.assertEquals("<Markup u'D\\xf6ner'>", repr(markup)) 63 markup = Markup(u'Döner'.encode('utf-8'), encoding='utf-8') 64 # mimic Markup.__repr__ when constructing output for Python 2/3 compatibility 65 self.assertEquals("<Markup %r>" % u'D\u00f6ner', repr(markup)) 68 66 69 67 def test_repr(self): 70 68 markup = Markup('foo') … … 158 156 159 157 def test_pickle(self): 160 158 markup = Markup('foo') 161 buf = StringIO()159 buf = BytesIO() 162 160 pickle.dump(markup, buf, 2) 163 161 buf.seek(0) 164 162 self.assertEquals("<Markup u'foo'>", repr(pickle.load(buf))) … … 168 166 169 167 def test_pickle(self): 170 168 attrs = Attrs([("attr1", "foo"), ("attr2", "bar")]) 171 buf = StringIO()169 buf = BytesIO() 172 170 pickle.dump(attrs, buf, 2) 173 171 buf.seek(0) 174 172 unpickled = pickle.load(buf) … … 196 194 197 195 def test_pickle(self): 198 196 ns = Namespace('http://www.example.org/namespace') 199 buf = StringIO()197 buf = BytesIO() 200 198 pickle.dump(ns, buf, 2) 201 199 buf.seek(0) 202 200 unpickled = pickle.load(buf) … … 209 207 210 208 def test_pickle(self): 211 209 qname = QName('http://www.example.org/namespace}elem') 212 buf = StringIO()210 buf = BytesIO() 213 211 pickle.dump(qname, buf, 2) 214 212 buf.seek(0) 215 213 unpickled = pickle.load(buf) -
genshi/tests/input.py
diff -r 4bbd2b021cb5 genshi/tests/input.py
a b 12 12 # history and logs, available at http://genshi.edgewall.org/log/. 13 13 14 14 import doctest 15 from StringIO import StringIO16 15 import sys 17 16 import unittest 18 17 19 18 from genshi.core import Attrs, Stream 20 19 from genshi.input import XMLParser, HTMLParser, ParseError 20 from genshi.compat import StringIO, BytesIO 21 21 22 22 23 23 class XMLParserTestCase(unittest.TestCase): … … 59 59 60 60 def test_latin1_encoded(self): 61 61 text = u'<div>\xf6</div>'.encode('iso-8859-1') 62 events = list(XMLParser( StringIO(text), encoding='iso-8859-1'))62 events = list(XMLParser(BytesIO(text), encoding='iso-8859-1')) 63 63 kind, data, pos = events[1] 64 64 self.assertEqual(Stream.TEXT, kind) 65 65 self.assertEqual(u'\xf6', data) … … 68 68 text = u"""<?xml version="1.0" encoding="iso-8859-1" ?> 69 69 <div>\xf6</div> 70 70 """.encode('iso-8859-1') 71 events = list(XMLParser( StringIO(text)))71 events = list(XMLParser(BytesIO(text))) 72 72 kind, data, pos = events[2] 73 73 self.assertEqual(Stream.TEXT, kind) 74 74 self.assertEqual(u'\xf6', data) … … 116 116 class HTMLParserTestCase(unittest.TestCase): 117 117 118 118 def test_text_node_pos_single_line(self): 119 text = '<elem>foo bar</elem>'119 text = u'<elem>foo bar</elem>' 120 120 events = list(HTMLParser(StringIO(text))) 121 121 kind, data, pos = events[1] 122 122 self.assertEqual(Stream.TEXT, kind) … … 124 124 self.assertEqual((None, 1, 6), pos) 125 125 126 126 def test_text_node_pos_multi_line(self): 127 text = '''<elem>foo127 text = u'''<elem>foo 128 128 bar</elem>''' 129 129 events = list(HTMLParser(StringIO(text))) 130 130 kind, data, pos = events[1] … … 134 134 135 135 def test_input_encoding_text(self): 136 136 text = u'<div>\xf6</div>'.encode('iso-8859-1') 137 events = list(HTMLParser( StringIO(text), encoding='iso-8859-1'))137 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) 138 138 kind, data, pos = events[1] 139 139 self.assertEqual(Stream.TEXT, kind) 140 140 self.assertEqual(u'\xf6', data) 141 141 142 142 def test_input_encoding_attribute(self): 143 143 text = u'<div title="\xf6"></div>'.encode('iso-8859-1') 144 events = list(HTMLParser( StringIO(text), encoding='iso-8859-1'))144 events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1')) 145 145 kind, (tag, attrib), pos = events[0] 146 146 self.assertEqual(Stream.START, kind) 147 147 self.assertEqual(u'\xf6', attrib.get('title')) … … 154 154 self.assertEqual(u'\u2013', data) 155 155 156 156 def test_html_entity_in_attribute(self): 157 text = '<p title=" "></p>'157 text = u'<p title=" "></p>' 158 158 events = list(HTMLParser(StringIO(text))) 159 159 kind, data, pos = events[0] 160 160 self.assertEqual(Stream.START, kind) … … 163 163 self.assertEqual(Stream.END, kind) 164 164 165 165 def test_html_entity_in_text(self): 166 text = '<p> </p>'166 text = u'<p> </p>' 167 167 events = list(HTMLParser(StringIO(text))) 168 168 kind, data, pos = events[1] 169 169 self.assertEqual(Stream.TEXT, kind) 170 170 self.assertEqual(u'\xa0', data) 171 171 172 172 def test_processing_instruction(self): 173 text = '<?php echo "Foobar" ?>'173 text = u'<?php echo "Foobar" ?>' 174 174 events = list(HTMLParser(StringIO(text))) 175 175 kind, (target, data), pos = events[0] 176 176 self.assertEqual(Stream.PI, kind) … … 205 205 self.assertEqual(1, standalone) 206 206 207 207 def test_processing_instruction_trailing_qmark(self): 208 text = '<?php echo "Foobar" ??>'208 text = u'<?php echo "Foobar" ??>' 209 209 events = list(HTMLParser(StringIO(text))) 210 210 kind, (target, data), pos = events[0] 211 211 self.assertEqual(Stream.PI, kind) … … 213 213 self.assertEqual('echo "Foobar" ?', data) 214 214 215 215 def test_out_of_order_tags1(self): 216 text = '<span><b>Foobar</span></b>'216 text = u'<span><b>Foobar</span></b>' 217 217 events = list(HTMLParser(StringIO(text))) 218 218 self.assertEqual(5, len(events)) 219 219 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) … … 223 223 self.assertEqual((Stream.END, 'span'), events[4][:2]) 224 224 225 225 def test_out_of_order_tags2(self): 226 text = '<span class="baz"><b><i>Foobar</span></b></i>'227 events = list(HTMLParser( StringIO(text)))226 text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8') 227 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) 228 228 self.assertEqual(7, len(events)) 229 229 self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), 230 230 events[0][:2]) … … 236 236 self.assertEqual((Stream.END, 'span'), events[6][:2]) 237 237 238 238 def test_out_of_order_tags3(self): 239 text = '<span><b>Foobar</i>'240 events = list(HTMLParser( StringIO(text)))239 text = u'<span><b>Foobar</i>'.encode('utf-8') 240 events = list(HTMLParser(BytesIO(text), encoding='utf-8')) 241 241 self.assertEqual(5, len(events)) 242 242 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) 243 243 self.assertEqual((Stream.START, ('b', ())), events[1][:2]) … … 246 246 self.assertEqual((Stream.END, 'span'), events[4][:2]) 247 247 248 248 def test_hex_charref(self): 249 text = '<span>'</span>'249 text = u'<span>'</span>' 250 250 events = list(HTMLParser(StringIO(text))) 251 251 self.assertEqual(3, len(events)) 252 252 self.assertEqual((Stream.START, ('span', ())), events[0][:2]) -
genshi/tests/output.py
diff -r 4bbd2b021cb5 genshi/tests/output.py
a b 356 356 </div>""", output) 357 357 358 358 def test_html5_doctype(self): 359 stream = HTML( '<html></html>')359 stream = HTML(u'<html></html>') 360 360 output = stream.render(XHTMLSerializer, doctype=DocType.HTML5, 361 361 encoding=None) 362 362 self.assertEqual('<!DOCTYPE html>\n<html></html>', output) … … 427 427 </style>""", output) 428 428 429 429 def test_html5_doctype(self): 430 stream = HTML( '<html></html>')430 stream = HTML(u'<html></html>') 431 431 output = stream.render(HTMLSerializer, doctype=DocType.HTML5, 432 432 encoding=None) 433 433 self.assertEqual('<!DOCTYPE html>\n<html></html>', output) -
genshi/util.py
diff -r 4bbd2b021cb5 genshi/util.py
a b 15 15 16 16 import htmlentitydefs as entities 17 17 import re 18 import sys 19 20 from compat import any, all, stringrepr 18 21 19 22 __docformat__ = 'restructuredtext en' 20 23 … … 246 249 """ 247 250 return _STRIPTAGS_RE.sub('', text) 248 251 249 250 def stringrepr(string):251 ascii = string.encode('ascii', 'backslashreplace')252 quoted = "'" + ascii.replace("'", "\\'") + "'"253 if len(ascii) > len(string):254 return 'u' + quoted255 return quoted256 257 258 # Compatibility fallback implementations for older Python versions259 260 try:261 all = all262 any = any263 except NameError:264 def any(S):265 for x in S:266 if x:267 return True268 return False269 270 def all(S):271 for x in S:272 if not x:273 return False274 return True -
setup.py
diff -r 4bbd2b021cb5 setup.py
a b 41 41 def run(self): 42 42 try: 43 43 build_ext.run(self) 44 except DistutilsPlatformError, e: 44 except DistutilsPlatformError: 45 _etype, e, _tb = sys.exc_info() 45 46 self._unavailable(e) 46 47 47 48 def build_extension(self, ext): … … 49 50 build_ext.build_extension(self, ext) 50 51 global _speedup_available 51 52 _speedup_available = True 52 except CCompilerError, e: 53 except CCompilerError: 54 _etype, e, _tb = sys.exc_info() 53 55 self._unavailable(e) 54 56 55 57 def _unavailable(self, exc): … … 86 88 cmdclass['bdist_egg'] = my_bdist_egg 87 89 88 90 91 # Use 2to3 if we're running under Python 3 (with Distribute) 92 extra = {} 93 if sys.version_info >= (3,): 94 extra['use_2to3'] = True 95 extra['convert_2to3_doctests'] = [] 96 extra['use_2to3_fixers'] = ['fixes'] 97 # include tests for python3 setup.py test 98 packages = [ 99 'genshi', 'genshi.filters', 'genshi.template', 100 'genshi.tests', 'genshi.filters.tests', 101 'genshi.template.tests', 102 'genshi.template.tests.templates', 103 ] 104 # Install genshi template tests 105 extra['include_package_data'] = True 106 else: 107 packages = ['genshi', 'genshi.filters', 'genshi.template'] 108 109 89 110 setup( 90 111 name = 'Genshi', 91 112 version = '0.7', … … 114 135 'Topic :: Text Processing :: Markup :: XML' 115 136 ], 116 137 keywords = ['python.templating.engines'], 117 packages = ['genshi', 'genshi.filters', 'genshi.template'],138 packages = packages, 118 139 test_suite = 'genshi.tests.suite', 119 140 120 141 extras_require = { … … 132 153 """, 133 154 134 155 features = {'speedups': speedups}, 135 cmdclass = cmdclass 156 cmdclass = cmdclass, 157 158 **extra 136 159 )