"""Kant Generator for Python

Generates mock philosophy based on a context-free grammar

Usage: python kgp.py [options] [string...]

Options:
  -g ..., --grammar=...   use specified grammar file or URL
  -s ..., --source=...    parse specified source file or URL instead of string
  -w #, --wrap=#          hard wrap output to # characters per line
  -h, --help              show this help
  -d                      show debugging information while parsing

Examples:
  kgp.py                  generates several paragraphs of Kantian philosophy
  kpg.py -w 72 paragraph  generate a paragraph of Kant, wrap to 72 characters
  kgp.py -g husserl.xml   generates several paragraphs of Husserl
  kgp.py -s template.xml  reads from template.xml to decide what to generate

This program is part of "Dive Into Python", a free Python book for
experienced programmers.  Visit http://diveintopython.org/ for the
latest version.
"""

__author__ = "Mark Pilgrim (f8dy@diveintopython.org)"
__version__ = "1.0"
__date__ = "26 August 2001"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"

from xml.dom import minidom
import random
import toolbox
import sys
import getopt

_debug = 0

class KantGenerator:
    """generates mock philosophy based on a context-free grammar"""
    
    def __init__(self, grammar=None, source=None):
        self.refs = {}
        self.defaultSource = None
        self.pieces = []
        self.capitalizeNextWord = 0
        self.loadGrammar(grammar)
        if not source:
            source = self.defaultSource
        self.loadSource(source)
        self.refresh()

    def loadGrammar(self, grammar):
        """load context-free grammar
        
        grammar can be
        - a URL of a remote XML file ("http://diveintopython.org/kant.xml")
        - a filename of a local XML file ("/a/diveintopython/common/py/kant.xml")
        - the actual grammar, as a string
        """
        sock = toolbox.openAnything(grammar)
        self.grammar = minidom.parse(sock).documentElement
        sock.close()
        self.refs = {}
        for ref in self.grammar.getElementsByTagName("ref"):
            self.refs[ref.attributes["id"].value] = ref
        xrefs = {}
        for xref in self.grammar.getElementsByTagName("xref"):
            xrefs[xref.attributes["id"].value] = 1
        xrefs = xrefs.keys()
        standaloneXrefs = [e for e in self.refs.keys() if e not in xrefs]
        if standaloneXrefs:
            self.defaultSource = '<xref id="%s"/>' % random.choice(standaloneXrefs)
        else:
            self.defaultSource = None
        
    def loadSource(self, source):
        """load source
        
        source can be
        - a URL of a remote XML file ("http://diveintopython.org/section.xml")
        - a filename of a local XML file ("/a/diveintopython/common/py/section.xml")
        - the actual XML to parse, as a string ("<xref id='section'/>")
        """
        sock = toolbox.openAnything(source)
        self.source = minidom.parse(sock).documentElement
        sock.close()

    def reset(self):
        """reset parser"""
        self.pieces = []
        self.capitalizeNextWord = 0

    def refresh(self):
        """reset output buffer and re-parse entire source file
        
        Since parsing involves a good deal of randomness, this is an
        easy way to get new output without having to reload a grammar file
        each time.
        """
        self.reset()
        self.parse(self.source)
        return self.output()

    def output(self):
        """output generated text"""
        return "".join(self.pieces)

    def randomChildElement(self, node):
        """choose a random child element of a node
        
        This is a utility method used by parse_xref and parse_choice.
        """
        def isElement(e):
            return isinstance(e, minidom.Element)
        choices = filter(isElement, node.childNodes)
        chosen = random.choice(choices)
        if _debug:
            print '%s available choices:' % len(choices), [e.toxml() for e in choices]
            print 'Chosen:', chosen.toxml()
        return chosen

    def parse(self, node):
        """parse a single XML node
        
        A parsed XML document (from minidom.parse) is a tree of nodes
        of various types.  Each node is represented by an instance of the
        corresponding Python class (Element for a tag, Text for
        text data, Document for the top-level document).  The following
        statement constructs the name of a class method based on the type
        of node we're parsing ("parse_Element" for an Element node,
        "parse_Text" for a Text node, etc.) and then calls the method.
        """
        parseMethod = getattr(self, "parse_%s" % node.__class__.__name__)
        parseMethod(node)

    def parse_Document(self, node):
        """parse the document node
        
        The document node by itself isn't interesting (to us), but
        its only child, node.documentElement, is: it's the root node
        of the grammar.
        """
        self.parse(node.documentElement)

    def parse_Text(self, node):
        """parse a text node
        
        The text of a text node is usually added to the output buffer
        verbatim.  The one exception is that <p class='sentence'> sets
        a flag to capitalize the first letter of the next word.  If
        that flag is set, we capitalize the text and reset the flag.
        """
        text = node.data
        if self.capitalizeNextWord:
            self.pieces.append(text[0].upper())
            self.pieces.append(text[1:])
            self.capitalizeNextWord = 0
        else:
            self.pieces.append(text)

    def parse_Element(self, node):
        """parse an element
        
        An XML element corresponds to an actual tag in the source:
        <xref id='...'>, <p chance='...'>, <choice>, etc.
        Each element type is handled in its own method.  Like we did in
        parse(), we construct a method name based on the name of the
        element ("do_xref" for an <xref> tag, etc.) and
        call the method.
        """
        handlerMethod = getattr(self, "do_%s" % node.tagName)
        handlerMethod(node)

    def parse_Comment(self, node):
        """parse a comment
        
        The grammar can contain XML comments, but we ignore them
        """
        pass
    
    def do_xref(self, node):
        """handle <xref id='...'> tag
        
        An <xref id='...'> tag is a cross-reference to a <ref id='...'>
        tag.  <xref id='sentence'/> evaluates to a randomly chosen child of
        <ref id='sentence'>.
        """
        id = node.attributes["id"].value
        self.parse(self.randomChildElement(self.refs[id]))

    def do_p(self, node):
        """handle <p> tag
        
        The <p> tag is the core of the grammar.  It can contain almost
        anything: freeform text, <choice> tags, <xref> tags, even other
        <p> tags.  If a "class='sentence'" attribute is found, a flag
        is set and the next word will be capitalized.  If a "chance='X'"
        attribute is found, there is an X% chance that the tag will be
        evaluated (and therefore a (100-X)% chance that it will be
        completely ignored)
        """
        keys = node.attributes.keys()
        if "class" in keys:
            if node.attributes["class"].value == "sentence":
                self.capitalizeNextWord = 1
        if "chance" in keys:
            chance = int(node.attributes["chance"].value)
            doit = (chance > random.randrange(100))
        else:
            doit = 1
        if doit:
            map(self.parse, node.childNodes)

    def do_choice(self, node):
        """handle <choice> tag
        
        A <choice> tag contains one or more <p> tags.  One <p> tag
        is chosen at random and evaluated; the rest are ignored.
        """
        self.parse(self.randomChildElement(node))

def usage():
    print __doc__

def main(argv):
    grammar = None
    source = None
    wrap = None
    try:
        opts, args = getopt.getopt(argv, "hg:s:w:d", ["help", "grammar=","source=","wrap="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit()
        elif opt == '-d':
            global _debug
            _debug = 1
        elif opt in ("-g", "--grammar"):
            grammar = arg
        elif opt in ("-s", "--source"):
            source = arg
        elif opt in ("-w", "--wrap"):
            try:
                wrap = int(arg)
            except ValueError:
                print "Warning: ignoring invalid wrap option: %s" % arg
    
    if not grammar:
        grammar = "kant.xml"
    
    if not source:
        if args:
            source = "".join(["<xref id='%s'/>" % arg for arg in args])

    k = KantGenerator(grammar, source)
    if wrap:
        print toolbox.hardwrap(k.output(), wrap)
    else:
        print k.output()

if __name__ == "__main__":
    main(sys.argv[1:])
