S2 CSS: cssexplorer.py

From Dreamwidth Notes
Jump to: navigation, search

This is a quick little script that dumps out all HTML tags containing ids or divs. Requires BeautifulSoup.

Note: on entry pages, it seems to die. Go and delete the one <script> section on the line it says it's erroring out on and it should work.

#!/usr/bin/python
 
import sys, os, re, string
from optparse import OptionParser
from BeautifulSoup import BeautifulSoup
from sets import Set
 
tag_ids = list()
tag_classes = Set()
 
def navigateClassesAndIDs(item, level):
    """A recursive function that dumps all classes and IDs"""
 
    printClassesAndIDs(item, level)
 
    if 'contents' in item.__dict__ and len(item.contents) > 0:
        navigateClassesAndIDs(item.contents[0], level+1)
 
    if item.nextSibling:
        navigateClassesAndIDs(item.nextSibling, level)
 
def printClassesAndIDs(item, level):
 
    item_id = None
    item_classes = None
 
    if not 'attrs' in item.__dict__ or len(item.attrs) == 0:
        return
 
    if 'id' in item.attrs[0]:
        item_id = '#' + item['id']
        tag_ids.append(item_id)
 
    if 'class' in item.attrs[0]:
        item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()])
        [tag_classes.add(item_class) for item_class in item['class'].split()]
 
    if item_id == None and item_classes == None:
        return
 
    if item_id == None:
        item_id = ""
 
    if item_classes == None:
        item_classes = ""
 
    tab = "    " * level
 
    print "L %2d: %s<%s> %s %s" % (level, tab, item.name, item_id, item_classes)
 
if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-i", "--infile", dest="infile",
        help="The input file", metavar="INFILE")
 
    (options, args) = parser.parse_args()
 
    if options.infile:
        InFile = options.infile
    else:
        print >> sys.stderr, "Error: no input file to load!"
        parser.print_help()
        sys.exit(0)
 
    page = open(InFile, 'r')
    soup = BeautifulSoup(page)
    body = soup.find('body')   
 
    navigateClassesAndIDs(body, 0)
 
    print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids)
    print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))