S2 CSS: cssexplorer.py

From Dreamwidth Notes
Jump to: navigation, search

This is a quick little script that dumps out all HTML tags containing ids or divs. Requires BeautifulSoup.

Note: on entry pages, it seems to die. Go and delete the one <script> section on the line it says it's erroring out on and it should work.

import sys, os, re, string
from optparse import OptionParser
from BeautifulSoup import BeautifulSoup
from sets import Set
tag_ids = list()
tag_classes = Set()
def navigateClassesAndIDs(item, level):
    """A recursive function that dumps all classes and IDs"""
    printClassesAndIDs(item, level)
    if 'contents' in item.__dict__ and len(item.contents) > 0:
        navigateClassesAndIDs(item.contents[0], level+1)
    if item.nextSibling:
        navigateClassesAndIDs(item.nextSibling, level)
def printClassesAndIDs(item, level):
    item_id = None
    item_classes = None
    if not 'attrs' in item.__dict__ or len(item.attrs) == 0:
    if 'id' in item.attrs[0]:
        item_id = '#' + item['id']
    if 'class' in item.attrs[0]:
        item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()])
        [tag_classes.add(item_class) for item_class in item['class'].split()]
    if item_id == None and item_classes == None:
    if item_id == None:
        item_id = ""
    if item_classes == None:
        item_classes = ""
    tab = "    " * level
    print "L %2d: %s<%s> %s %s" % (level, tab, item.name, item_id, item_classes)
if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-i", "--infile", dest="infile",
        help="The input file", metavar="INFILE")
    (options, args) = parser.parse_args()
    if options.infile:
        InFile = options.infile
        print >> sys.stderr, "Error: no input file to load!"
    page = open(InFile, 'r')
    soup = BeautifulSoup(page)
    body = soup.find('body')   
    navigateClassesAndIDs(body, 0)
    print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids)
    print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))