"""Various functions for dealing with HTML.
$HeadURL: svn+ssh://svn/repos/trunk/quixote/html.py $
$Id: html.py 22539 2003-09-17 13:56:12Z dbinger $
These functions are fairly simple but it is critical that they be
used correctly.  Many security problems are caused by quoting errors
(cross site scripting is one example).  The HTML and XML standards on
www.w3c.org and www.xml.com should be studied, especially the sections
on character sets, entities, attribute and values.
htmltext and htmlescape
-----------------------
This type and function are meant to be used with [html] PTL template type.
The htmltext type designates data that does not need to be escaped and the
htmlescape() function calls str() on the argment, escapes the resulting
string and returns a htmltext instance.  htmlescape() does nothing to
htmltext instances.
html_quote
----------
Use for quoting data that will be used within attribute values or as
element contents (if the [html] template type is not being used).
Examples:
    '
%s ' % html_quote(title)
    'something ' % html_quote(url)
Note that the \" character should be used to surround attribute values.
url_quote
---------
Use for quoting data to be included as part of a URL, for example:
    input = "foo bar"
    ...
    '' % url_quote(input)
Note that URLs are usually used as attribute values and should be quoted
using html_quote.  For example:
    url = 'http://example.com/?a=1©=0'
    ...
    ' do something ' % html_quote(url)
If html_quote is not used, old browsers would treat "©" as an entity
reference and replace it with the copyright character.  XML processors should
treat it as an invalid entity reference.
"""
__revision__ = "$Id: html.py 22539 2003-09-17 13:56:12Z dbinger $"
import urllib
from types import UnicodeType
try:
    # faster C implementation
    from quixote._c_htmltext import htmltext, htmlescape, _escape_string, \
        TemplateIO
except ImportError:
    from quixote._py_htmltext import htmltext, htmlescape, _escape_string, \
        TemplateIO
ValuelessAttr = ["valueless_attr"] # magic singleton object
def htmlattrs (**attrs):
    """
    Return a dictionary suitable for passing as the keyword argument
    dictionary to htmltag().  Trailing underscores are stripped
    to allow use of Python keywords (specifically, 'class') as
    arguments (hence HTML tags).  Also, all attribute names are
    forced to lowercase.
    For example,
      htmlattrs(href="http://google.com",
                title="A nice search engine",
                class_="link",
                onClick='alert("ouch!")')
    returns the dictionary
      { 'href': 'http://google.com',
        'title': 'A nice search engine',
        'class': 'link',
        'onclick': 'alert("ouch!")' }
    """
    d = {}
    for attr in attrs:
        d[attr.rstrip('_').lower()] = attrs[attr]
    return d
def htmltag (tag, xml_end=0, **attrs):
    """
    Return a string with an HTML tag and various attributes.  Does
    nothing about the corresponding end tag, unless 'xml_end' is true,
    in which case a valid XML-style "