Wednesday, August 11, 2004

A simple python colorizer

#This blog is all about python and python code.
#
#I realize that ill be putting up a lot of code so I've
#written this simple python code to html converter.


#pyhtmlizer.py
#Colorize and convert you py file into html
#Usage: python pyhtmlizer <filename>
#Fixes:
#24-Feb-2005: Added elif and class to the keywords list
#24-Feb-2005: Function names in def starting with
#             underscores get coloured correctly
#26-Mar-2005: Fixed the regular expression to handle
#             the '\' in strings properly.
#31-Mar-2005:Added print to the keywords list

import re

keyWords= ["def""import""from""for""if""else",
           "elif""in""and""or""not""while""print",
           "return""yield""try""except""class"]

r = re.compile("[a-zA-Z\_][\\w]*|"
               "\t|"+"\n|"+
               "\d[\\.\\w]*|"+
               "\"\"\"(?:(?:\\\\.|[^\"])*)\"\"\"|"+
               "\"(?:(?:\\\\.|[^\"])*)\"|"+
               "\'\'\'(?:(?:\\\\.|[^\'])*)\'\'\'|"+
               "\'(?:(?:\\\\.|[^\'])*)\'|"+
               "#[^\\n]*\\n|"
               ".",
               re.DOTALL)

replaceList = [("&""&amp;"),
               (" ""&nbsp;"),
               ("\t""&nbsp;"*4),
               ("<""&lt;"),
               (">""&gt;"),
               ("\n""<br>")]

replaceDict = dict(replaceList)
markupdict = {
    "keyword":'<b><FONT COLOR="navy">%s</FONT></b>',
    "def":'<b><FONT COLOR="blue">%s</FONT></b>',
    "number":'<FONT COLOR="blue">%s</FONT>',
    "string":'<FONT COLOR="olive">%s</FONT>',
    "comment":'<FONT COLOR="green">%s</FONT>'}

def colorize(code, type):
    return markupdict[type] % code
        
def htmlizedTokens(code):
    indef = False
    for token in r.findall(code):
        if token in keyWords:
            if token in ["class""def"]:
                indef = True
            yield colorize(token, "keyword")
        elif token[0].isdigit():
            yield colorize(token, "number")
        elif token[0].isalpha() or token[0]=="_":
            if indef:
                indef = False
                yield colorize(token, "def")
            else:
                yield token
        elif replaceDict.has_key(token):
            yield replaceDict[token]
        elif token[0] == "#":
            yield colorize(reduce(lambda s, (old, new):
                                  s.replace(old, new), replaceList, token),
                           "comment")
        elif token[0in ["'"'"']:
            yield colorize(reduce(lambda s, (old, new):
                                  s.replace(old, new), replaceList, token),
                           "string")
        else:
            yield token
            
def htmlize(code):
    return "".join(htmlizedTokens(code))


if __name__ == "__main__":
    import sys
    if len(sys.argv):
        fi = file(sys.argv[1])
        fo = file(sys.argv[1]+".html""w")
        fo.write(htmlize(fi.read()))
        fi.close()
        fo.close()

0 Comments:

Post a Comment

<< Home