S2 CSS: cssexplorer.py
From Dreamwidth Notes
This is a quick little script that dumps out all HTML tags containing ids or divs. Requires BeautifulSoup.
Note: on entry pages, it seems to die. Go and delete the one <script> section on the line it says it's erroring out on and it should work.
#!/usr/bin/python import sys, os, re, string from optparse import OptionParser from BeautifulSoup import BeautifulSoup from sets import Set tag_ids = list() tag_classes = Set() def navigateClassesAndIDs(item, level): """A recursive function that dumps all classes and IDs""" printClassesAndIDs(item, level) if 'contents' in item.__dict__ and len(item.contents) > 0: navigateClassesAndIDs(item.contents[0], level+1) if item.nextSibling: navigateClassesAndIDs(item.nextSibling, level) def printClassesAndIDs(item, level): item_id = None item_classes = None if not 'attrs' in item.__dict__ or len(item.attrs) == 0: return if 'id' in item.attrs[0]: item_id = '#' + item['id'] tag_ids.append(item_id) if 'class' in item.attrs[0]: item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()]) [tag_classes.add(item_class) for item_class in item['class'].split()] if item_id == None and item_classes == None: return if item_id == None: item_id = "" if item_classes == None: item_classes = "" tab = " " * level print "L %2d: %s<%s> %s %s" % (level, tab, item.name, item_id, item_classes) if __name__ == '__main__': parser = OptionParser() parser.add_option("-i", "--infile", dest="infile", help="The input file", metavar="INFILE") (options, args) = parser.parse_args() if options.infile: InFile = options.infile else: print >> sys.stderr, "Error: no input file to load!" parser.print_help() sys.exit(0) page = open(InFile, 'r') soup = BeautifulSoup(page) body = soup.find('body') navigateClassesAndIDs(body, 0) print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids) print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))