"""Various functions for dealing with HTML.
$HeadURL: svn+ssh://svn/repos/trunk/quixote/html.py $
$Id: html.py 22539 2003-09-17 13:56:12Z dbinger $
These functions are fairly simple but it is critical that they be
used correctly. Many security problems are caused by quoting errors
(cross site scripting is one example). The HTML and XML standards on
www.w3c.org and www.xml.com should be studied, especially the sections
on character sets, entities, attribute and values.
htmltext and htmlescape
-----------------------
This type and function are meant to be used with [html] PTL template type.
The htmltext type designates data that does not need to be escaped and the
htmlescape() function calls str() on the argment, escapes the resulting
string and returns a htmltext instance. htmlescape() does nothing to
htmltext instances.
html_quote
----------
Use for quoting data that will be used within attribute values or as
element contents (if the [html] template type is not being used).
Examples:
'
%s ' % html_quote(title)
' ' % html_quote(data)
'something ' % html_quote(url)
Note that the \" character should be used to surround attribute values.
url_quote
---------
Use for quoting data to be included as part of a URL, for example:
input = "foo bar"
...
'' % url_quote(input)
Note that URLs are usually used as attribute values and should be quoted
using html_quote. For example:
url = 'http://example.com/?a=1©=0'
...
' do something ' % html_quote(url)
If html_quote is not used, old browsers would treat "©" as an entity
reference and replace it with the copyright character. XML processors should
treat it as an invalid entity reference.
"""
__revision__ = "$Id: html.py 22539 2003-09-17 13:56:12Z dbinger $"
import urllib
from types import UnicodeType
try:
# faster C implementation
from quixote._c_htmltext import htmltext, htmlescape, _escape_string, \
TemplateIO
except ImportError:
from quixote._py_htmltext import htmltext, htmlescape, _escape_string, \
TemplateIO
ValuelessAttr = ["valueless_attr"] # magic singleton object
def htmlattrs (**attrs):
"""
Return a dictionary suitable for passing as the keyword argument
dictionary to htmltag(). Trailing underscores are stripped
to allow use of Python keywords (specifically, 'class') as
arguments (hence HTML tags). Also, all attribute names are
forced to lowercase.
For example,
htmlattrs(href="http://google.com",
title="A nice search engine",
class_="link",
onClick='alert("ouch!")')
returns the dictionary
{ 'href': 'http://google.com',
'title': 'A nice search engine',
'class': 'link',
'onclick': 'alert("ouch!")' }
"""
d = {}
for attr in attrs:
d[attr.rstrip('_').lower()] = attrs[attr]
return d
def htmltag (tag, xml_end=0, **attrs):
"""
Return a string with an HTML tag and various attributes. Does
nothing about the corresponding end tag, unless 'xml_end' is true,
in which case a valid XML-style " " tag is produced.
Keyword arguments to this function become HTML tag attributes, eg.
htmltag("img", src="foo.gif", alt="Picture of a foo")
returns
' '
Note that keyword args are transformed into tag attributes in hash
order (ie. unpredictable and subject to change across Python
versions).
"""
r = ["<%s" % tag]
# This is for backwards compatibility with code that predates
# the htmlattrs() function above. (However, special treatment
# of 'css_class' only appeared in Quixote 0.7a1, so it could
# probably just be removed without great harm.)
if 'css_class' in attrs and 'class' not in attrs:
attrs['class'] = attrs['css_class']
del attrs['css_class']
for (attr, val) in attrs.items():
if val is ValuelessAttr:
val = attr
if val is not None:
r.append(' %s="%s"' % (attr, _escape_string(str(val))))
if xml_end:
r.append(" />")
else:
r.append(">")
return htmltext("".join(r))
def href (url, text, title=None, **attrs):
return (htmltag("a", href=url, title=title, **attrs) +
htmlescape(text) +
htmltext(""))
def nl2br (value):
"""nl2br(value : any) -> htmltext
Insert tags before newline characters.
"""
text = htmlescape(value)
return htmltext(text.s.replace('\n', ' \n'))
def url_quote(value, fallback=None):
"""url_quote(value : any [, fallback : string]) -> string
Quotes 'value' for use in a URL; see urllib.quote(). If value is None,
then the behavior depends on the fallback argument. If it is not
supplied then an error is raised. Otherwise, the fallback value is
returned unquoted.
"""
if value is None:
if fallback is None:
raise ValueError, "value is None and no fallback supplied"
else:
return fallback
if isinstance(value, UnicodeType):
value = value.encode('iso-8859-1')
else:
value = str(value)
return urllib.quote(value)
#
# The rest of this module is for Quixote applications that were written
# before 'htmltext'. If you are writing a new application, ignore them.
#
def html_quote(value, fallback=None):
"""html_quote(value : any [, fallback : string]) -> str
Quotes 'value' for use in an HTML page. The special characters &,
<, > are replaced by SGML entities. If value is None, then the
behavior depends on the fallback argument. If it is not supplied
then an error is raised. Otherwise, the fallback value is returned
unquoted.
"""
if value is None:
if fallback is None:
raise ValueError, "value is None and no fallback supplied"
else:
return fallback
elif isinstance(value, UnicodeType):
value = value.encode('iso-8859-1')
else:
value = str(value)
value = value.replace("&", "&") # must be done first
value = value.replace("<", "<")
value = value.replace(">", ">")
value = value.replace('"', """)
return value
def value_quote(value):
"""Quote HTML attribute values. This function is of marginal
utility since html_quote can be used.
XHTML 1.0 requires that all values be quoted. weblint claims
that some clients don't understand single quotes. For compatibility
with HTML, XHTML 1.0 requires that ampersands be encoded.
"""
assert value is not None, "can't pass None to value_quote"
value = str(value).replace('&', '&')
value = value.replace('"', '"')
return '"%s"' % value
def link (url, text, title=None, name=None, **kwargs):
return render_tag("a", href=url, title=title, name=name,
**kwargs) + str(text) + ""
def render_tag (tag, xml_end=0, **attrs):
r = "<%s" % tag
for (attr, val) in attrs.items():
if val is ValuelessAttr:
r += ' %s="%s"' % (attr, attr)
elif val is not None:
r += " %s=%s" % (attr, value_quote(val))
if xml_end:
r += " />"
else:
r += ">"
return r