"""Convenience mechanisms for writing HTML Similar in concept to buildtree.py (qv), but very different in the details. """ import sys import string __docformat__ = "reST" EMPTY = ["br","hr"] """Elements that do not (may not) have content. """ INLINE = ["em","strong","samp","code","tt","text"] """Elements that may occur 'inline' - within a paragraph, etc. Note that we include the 'pseudo-element' "text". """ NEWLINE_AFTER = ["html","head","body","table","address"] LISTS = ["ol","ul","dl"] # ---------------------------------------------------------------------- class BuildHTML: def __init__(self,stream=None): """Instantiate a BuildHTML instance. `stream` should be either something that "looks like" a file instance (specifically, it has to have a "write" method), or else `None` if we want to default to sys.stdout """ self.stream = stream or sys.stdout self.stack = [] """A stack of tag names (e.g., ["html","body","h1","p"]) """ self.last = None """The last element we were told to add to our output. """ self.fancy = 0 def write_doctype(self): """Write out the DOCTYPE element at the start of the HTML file. For the moment, we don't provide any flexibility in this... """ self.stream.write('\n') def finish(self): """Call this to indicate we have finished. It will grumble if anything is left unclosed - i.e., if there is still stuff on the internal stack. """ if len(self.stack) > 0: raise ValueError,"Items still outstanding on stack: %s"%\ self._stack_as_string() def _maybe_write_newline(self,tag,before=1): """Decide whether to write a newline before or after an element. """ if before: if tag not in INLINE: self.stream.write("\n") else: if tag in NEWLINE_AFTER: self.stream.write("\n") def add(self,tag,*args,**keywords): """Write an HTML element at the current level. For instance:: build.add("em","Some simple text.") If `tag` is "text" then it will automagically be converted to ordinary inline text (even though there is no such HTML element). Otherwise, this produces (for instance):: Some simple text. See `write` (which this uses) for more details of the arguments. """ self._maybe_write_newline(tag) self.stream.write(self.element(tag,*args,**keywords)) self.last = "/"+tag def start(self,tag,*args,**keywords): """Write out the start of an HTML element, and start a new level. `tag` should be the name of an HTML element (a tag), or "comment". For instance:: build.start("li","some text") might cause::
  • some text to be written out. Note that the element's closing tag is *not* written out - see `end()` for that. If `args` are given, they are assumed to be (things that resolve to) more text elements (i.e., strings). For instance:: build.start("li","some text", build.element("strong","and emphasis"), "and plain text again") See `write` (which this uses) for more details of the use of the `tag` and `keywords` arguments. """ if tag in EMPTY: raise ValueError,"Cannot start an 'empty' element (%s)"%tag elif tag == "text": raise ValueError,"Cannot start 'text'" elif tag == "html" and len(self.stack) > 0: raise ValueError,\ "Cannot insert 'html' except at root of stack" self._maybe_write_newline(tag,before=1) self.stream.write(self.start_tag(tag,**keywords)) content = self._content(tag,args) if content: self.stream.write(content) self._stack_add(tag) self.last = tag def end(self,tag,*args): """Write out the end of an HTML element, and finish the current level. `tag` should be the name of an HTML element (a tag), or "comment". For instance:: build.end("ul") Otherwise, for the moment at least, the `tag` being ended must be the last tag that was begun (in the future, we *might* support automatic "unrolling" of the stack, but not at the moment). NB: if `args` are given, they will also be treated as closing tags, in order - thus, for example:: build.end("td","tr","table") is exactly equivalent to:: build.end("td") build.end("tr") build.end("table") (Hmm - I'm not sure if that last is a good idea. Still, I *do* use it for that specific instance, which is a relatively common thing to want to do, and it does save "wasting" two fairly uninteresting lines of code.) """ if tag in EMPTY: raise ValueError,"Cannot start an 'empty' element (%s)"%tag if tag == "text": raise ValueError,"Cannot end 'text'" self._stack_remove(tag) self.stream.write(self.end_tag(tag)) self._maybe_write_newline(tag,before=0) if args: for item in args: self.end(item) self.last = "/"+args[-1] else: self.last = "/"+tag def start_tag(self,tag,**keywords): """Construct and return a start tag. `tag` should be the name of an HTML element (a tag) `tag` may not be "text". `keywords` should be attributes for the tag. """ if tag == "comment": return "" else: return ""%tag def element(self,tag,*args,**keywords): """Construct and return a complete HTML element. `tag` should be the name of an HTML element (a tag), or "text". If `tag` is "text" then `keywords` is ignored, and the result of concatenating `args` is returned. Otherwise: - an opening tag is composed from `tag` and `keywords` (see `start_tag()`) - the result of concatentating `args` is appended to that - a closing tag (see `end_tag()`) is appended to that and the result is returned. Within `args`, non-strings are coerced to their representations. """ content = self._content(tag,args) if tag == "text": return content else: return self.start_tag(tag,**keywords) + content + \ self.end_tag(tag) def _content(self,tag,args): """Return the *content* of an element. `tag` is not currently used, but *might* be useful later on? """ content = "" if args: for item in args: if type(item) == type(""): content += item else: content += `item` return content def escape(self,text): """Return `text` as valid HTML (that is, with any "special" characters escaped) """ # Hmm - paranoia, just in case if type(text) != type(""): return text text = string.replace(text, "&", "&") text = string.replace(text, "<", "<") text = string.replace(text, '"', """) text = string.replace(text, ">", ">") if self.fancy: text = string.replace(text, " ", "°") text = string.replace(text, "\n", "¶\n") return text def last_tag(self): """Return the last element we were asked to add to our output. Note that if we just closed element "XX" (for instance), then we will return "/XX". """ return self.last # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Stack queries def _in_list(self): """Are we *immediately* within a list (i.e., the first child element of a list) """ return self.stack[-1] in LISTS # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Stack maintenance def _stack_ends(self,name): """Return true if the stack ends with the named entity. """ return self.stack[-1] == name def _stack_add(self,name): """Add a new level to the stack. """ self.stack.append(name) def _stack_remove(self,name): """Remove the last level from the stack (but only if it is of the right sort). """ if len(self.stack) == 0: raise ValueError,"Cannot end %s - nothing outstanding to end"%\ (name) if name != self.stack[-1]: raise ValueError,"Cannot end %s - last thing begun was %s"%\ (name,self.stack[-1]) del self.stack[-1] def _stack_as_string(self): names = [] for name in self.stack: names.append(name) return string.join(names,",") # ---------------------------------------------------------------------- if __name__ == "__main__": build = BuildHTML() print "Building a page" build.start("html") build.start("body") build.add("h1","Fred") build.start("p") build.add("text","This is some text.") build.add("strong","Really.") build.start("p","Another paragraph") build.end("body") build.end("html") build.finish() print print "Building a broken page" try: build.start("html") build.start("body") build.add("h1","Fred") build.start("p") build.finish() except ValueError,detail: print "ValueError:",detail