"""Various functions for dealing with HTML. $HeadURL: svn+ssh://svn/repos/trunk/quixote/html.py $ $Id: html.py 22539 2003-09-17 13:56:12Z dbinger $ These functions are fairly simple but it is critical that they be used correctly. Many security problems are caused by quoting errors (cross site scripting is one example). The HTML and XML standards on www.w3c.org and www.xml.com should be studied, especially the sections on character sets, entities, attribute and values. htmltext and htmlescape ----------------------- This type and function are meant to be used with [html] PTL template type. The htmltext type designates data that does not need to be escaped and the htmlescape() function calls str() on the argment, escapes the resulting string and returns a htmltext instance. htmlescape() does nothing to htmltext instances. html_quote ---------- Use for quoting data that will be used within attribute values or as element contents (if the [html] template type is not being used). Examples: '%s' % html_quote(title) '' % html_quote(data) 'something' % html_quote(url) Note that the \" character should be used to surround attribute values. url_quote --------- Use for quoting data to be included as part of a URL, for example: input = "foo bar" ... '' % url_quote(input) Note that URLs are usually used as attribute values and should be quoted using html_quote. For example: url = 'http://example.com/?a=1©=0' ... 'do something' % html_quote(url) If html_quote is not used, old browsers would treat "©" as an entity reference and replace it with the copyright character. XML processors should treat it as an invalid entity reference. """ __revision__ = "$Id: html.py 22539 2003-09-17 13:56:12Z dbinger $" import urllib from types import UnicodeType try: # faster C implementation from quixote._c_htmltext import htmltext, htmlescape, _escape_string, \ TemplateIO except ImportError: from quixote._py_htmltext import htmltext, htmlescape, _escape_string, \ TemplateIO ValuelessAttr = ["valueless_attr"] # magic singleton object def htmlattrs (**attrs): """ Return a dictionary suitable for passing as the keyword argument dictionary to htmltag(). Trailing underscores are stripped to allow use of Python keywords (specifically, 'class') as arguments (hence HTML tags). Also, all attribute names are forced to lowercase. For example, htmlattrs(href="http://google.com", title="A nice search engine", class_="link", onClick='alert("ouch!")') returns the dictionary { 'href': 'http://google.com', 'title': 'A nice search engine', 'class': 'link', 'onclick': 'alert("ouch!")' } """ d = {} for attr in attrs: d[attr.rstrip('_').lower()] = attrs[attr] return d def htmltag (tag, xml_end=0, **attrs): """ Return a string with an HTML tag and various attributes. Does nothing about the corresponding end tag, unless 'xml_end' is true, in which case a valid XML-style "" tag is produced. Keyword arguments to this function become HTML tag attributes, eg. htmltag("img", src="foo.gif", alt="Picture of a foo") returns 'Picture of a foo' Note that keyword args are transformed into tag attributes in hash order (ie. unpredictable and subject to change across Python versions). """ r = ["<%s" % tag] # This is for backwards compatibility with code that predates # the htmlattrs() function above. (However, special treatment # of 'css_class' only appeared in Quixote 0.7a1, so it could # probably just be removed without great harm.) if 'css_class' in attrs and 'class' not in attrs: attrs['class'] = attrs['css_class'] del attrs['css_class'] for (attr, val) in attrs.items(): if val is ValuelessAttr: val = attr if val is not None: r.append(' %s="%s"' % (attr, _escape_string(str(val)))) if xml_end: r.append(" />") else: r.append(">") return htmltext("".join(r)) def href (url, text, title=None, **attrs): return (htmltag("a", href=url, title=title, **attrs) + htmlescape(text) + htmltext("")) def nl2br (value): """nl2br(value : any) -> htmltext Insert
tags before newline characters. """ text = htmlescape(value) return htmltext(text.s.replace('\n', '
\n')) def url_quote(value, fallback=None): """url_quote(value : any [, fallback : string]) -> string Quotes 'value' for use in a URL; see urllib.quote(). If value is None, then the behavior depends on the fallback argument. If it is not supplied then an error is raised. Otherwise, the fallback value is returned unquoted. """ if value is None: if fallback is None: raise ValueError, "value is None and no fallback supplied" else: return fallback if isinstance(value, UnicodeType): value = value.encode('iso-8859-1') else: value = str(value) return urllib.quote(value) # # The rest of this module is for Quixote applications that were written # before 'htmltext'. If you are writing a new application, ignore them. # def html_quote(value, fallback=None): """html_quote(value : any [, fallback : string]) -> str Quotes 'value' for use in an HTML page. The special characters &, <, > are replaced by SGML entities. If value is None, then the behavior depends on the fallback argument. If it is not supplied then an error is raised. Otherwise, the fallback value is returned unquoted. """ if value is None: if fallback is None: raise ValueError, "value is None and no fallback supplied" else: return fallback elif isinstance(value, UnicodeType): value = value.encode('iso-8859-1') else: value = str(value) value = value.replace("&", "&") # must be done first value = value.replace("<", "<") value = value.replace(">", ">") value = value.replace('"', """) return value def value_quote(value): """Quote HTML attribute values. This function is of marginal utility since html_quote can be used. XHTML 1.0 requires that all values be quoted. weblint claims that some clients don't understand single quotes. For compatibility with HTML, XHTML 1.0 requires that ampersands be encoded. """ assert value is not None, "can't pass None to value_quote" value = str(value).replace('&', '&') value = value.replace('"', '"') return '"%s"' % value def link (url, text, title=None, name=None, **kwargs): return render_tag("a", href=url, title=title, name=name, **kwargs) + str(text) + "" def render_tag (tag, xml_end=0, **attrs): r = "<%s" % tag for (attr, val) in attrs.items(): if val is ValuelessAttr: r += ' %s="%s"' % (attr, attr) elif val is not None: r += " %s=%s" % (attr, value_quote(val)) if xml_end: r += " />" else: r += ">" return r