#!/usr/bin/env python """ A 'grep' for HTML/XML documents that uses xpath syntax. Usage: xpathgrep.py xpath_expr [filename ...] """ import os import sys import libxml2 try: import zope.i18nmessageid except ImportError: if 'zope' in sys.modules: del sys.modules['zope'] sys.path.insert(0, os.path.normpath(os.path.join(os.path.dirname(__file__), os.pardir, 'Zope3', 'src'))) try: import schootool except ImportError: if 'schooltool' in sys.modules: del sys.modules['schooltool'] sys.path.insert(0, os.path.normpath(os.path.join(os.path.dirname(__file__), os.pardir, 'src'))) from schooltool.xmlparsing import HTMLDocument, XMLParseError, XMLXPathError def grep(expr, filename): try: if not filename: filename = '' htmldata = sys.stdin.read() else: htmldata = file(filename).read() doc = HTMLDocument(htmldata) except Exception, e: print >> sys.stderr, 'xpathgrep: %s: %s' % (filename, e) return try: results = doc.query(expr) except Exception, e: print >> sys.stderr, 'xpathgrep: %s' % e sys.exit(1) # no point in continuing if the expr has a syntax error if results: print '%s: %s matches' % (filename, len(results)) for node in results: print ' * ' + str(node).replace('\n', '\n ') def main(): if len(sys.argv) < 2: print >> sys.stderr, __doc__.strip() sys.exit(1) expr = sys.argv[1] filenames = sys.argv[2:] if not filenames: filenames = [None] # read from sys.stdin libxml2.registerErrorHandler(lambda ctx, error: None, None) # Be quiet! for filename in filenames: grep(expr, filename) if __name__ == '__main__': main()