
import fnmatch
import os
import re
import string
import types

from dirwalker import DirWalker

class CommentWalker(DirWalker):
    def __init__(self, opts):
        self.opts = opts

        if opts.has_key('filemasks'):
            self.skipMasks = []
            self.doMasks = opts['filemasks']
        else:
            self.skipMasks = ['*.pyc', '*.o', '*.obj', '*.a', '*.lib', '*.exe']
            self.doMasks = ['*']

        self.cppSuffixes = []
        if opts.has_key('cpp-comments'):
            self.cppSuffixes = string.split(opts['cpp-comments'], ':')

        self.asmSuffixes = []
        if opts.has_key('asm-comments'):
            self.asmSuffixes = string.split(opts['asm-comments'], ':')

        self.goodWords = ['an', 'the', 'in', 'on', 'of', 'from', 'with',
                          re.compile(r'^(0x)?[0-9a-f]+$', re.I),
                          re.compile(r'^things?$', re.I)]
        if opts.has_key('good-words'):
            inLines = []
            try:
                inFile = open(opts['good-words'])
                inLines = inFile.readlines()
                inFile.close()
            except: pass
            for l in inLines:
                l = StripComments(l)
                l = string.strip(l)
                if len(l) > 1:
                    if re.match(r'^\^', l): l = re.compile(l, re.I)
                    self.goodWords.append(l)

        self.badWords = []
        if opts.has_key('bad-words'):
            inLines = []
            try:
                inFile = open(opts['bad-words'])
                inLines = inFile.readlines()
                inFile.close()
            except: pass
            for l in inLines:
                l = StripComments(l)
                l = string.strip(l)
                if len(l) > 1:
                    if re.match(r'^\^', l): l = re.compile(l, re.I)
                    self.badWords.append(l)

        self.words = []
        if opts.has_key('words'):
            inLines = []
            try:
                inFile = open(opts['words'])
                inLines = inFile.readlines()
                inFile.close()
            except: pass
            for l in inLines:
                l = self.StripComments(l)
                l = string.strip(l)
                if len(l) > 1:
                    ## print "# line=%s" % l
                    if re.match(r'^\^', l): l = re.compile(l, re.I)
                    self.words.append(l)

        self.newWords = {}

    def StripComments(self, l):
        return re.sub(r'(^|[^\\])\#.*', r'\1', l)

    def WordMatch(self, word, *wordLists):
        for l in wordLists:
            for w in l:
                if isinstance(w, types.StringType):
                    ## print "# WordMatch(%s) vs %s: %s" % (word, w, (word == w))
                    if word == w: return w
                else:
                    ##print "# WordMatch(%s) vs %s: %s" % \
                    ##      (word, w.pattern, w.match(word))
                    m = w.match(word)
                    if m: return w.pattern
        return None
            
    def WordSplit(self, str):
        return re.split(r'[^a-zA-Z0-9\$]+', str) 
        
    def WordFilterOut(self, srcList, *knownLists):
        # Clean out short or known words.
        destList = []
        for word in srcList:
            bToss = None
            if len(word) <= 1: bToss = 1
            else:
                for knownList in knownLists:
                    if self.WordMatch(word, knownList):
                        bToss = 1
                        break

            if not bToss: destList.append(word)
            
        return destList
                
    def LineMatch(self, srcList, *knownLists):
        # does srcList have any known words known words.
        for word in srcList:
            if len(word) > 1:
                for knownList in knownLists:
                    m = self.WordMatch(word, knownList)
                    if m: return m

        return None
                
    def DoFile(self, f):
        if self.opts.has_key('verbose'): print "# DoFile(%s)" % f

        bDoC = None
        if self.opts.has_key('wordfind'):
            toEOL = r'.*'
        else:
            toEOL = r'\#.*'
            for suf in self.asmSuffixes:
                if fnmatch.fnmatch(f, '*' + suf):
                    toEOL = r'\;.*'         # XXXSAB am I remembering right?
                    break
            for suf in self.cppSuffixes:
                if fnmatch.fnmatch(f, '*' + suf):
                    toEOL = r'//.*'
                    bDoC = 1
                    break

        inFname = os.path.join(self.baseDir, self.subDir, f)
        inSubFname = os.path.join(self.subDir, f)
        try:
            inFile = open(inFname)
        except IOError, e:
            print "# ERROR: File error on %s" % inFname
            return

        # Ought to check for binary files in here somehow
        if not self.opts.has_key('binary'):
            binCheck = inFile.read(2048)
            if re.search(r'\0', binCheck):
                if self.opts.has_key('verbose'):
                    print "# binary file check matched %s" % (f)
                inFile.close()
                return

        inFile.seek(0)
        lines = inFile.readlines()
        inFile.close()
        if len(lines) == 0: return

        for lineNum in range(len(lines)):
            line = lines[lineNum]
            if toEOL:
                m = re.match(r'^.*?(' + toEOL + ')', line)
                if m:
                    outLine = ''
                    if self.opts.has_key('words-only') or \
                           self.opts.has_key('new-only'):
                        words = self.WordFilterOut(
                            self.WordSplit(string.lower(m.group(1))),
                            self.badWords, self.goodWords)
                        ## print "# words=%s; comm=%s" % (words, m.group(1))
                        outLine = string.join(words)
                    elif self.opts.has_key('wordfind'):
                        # We're just looking for lines with 'words'
                        m = self.LineMatch(self.WordSplit(m.group(1)), self.words)
                        if m:
                            outLine = line.strip() + ("  ##(%s)" % m)
                    else: outLine = m.group(1)

                    if len(outLine) == 0: continue

                    if self.opts.has_key('new-only'):
                        for w in words:
                            if self.WordMatch(w, self.goodWords, self.badWords):
                                pass
                            else: self.newWords[w] = 1
                    elif self.opts.has_key('filenames') or \
                         self.opts.has_key('wordfind'):
                        print "%s:%d:%s" % (inSubFname, lineNum+1, outLine)
                    else: print outLine

    def Visit(self, f):
        if self.opts.has_key('verbose'): print "# Visit(%s)" % f

        for mask in self.skipMasks:
            if fnmatch.fnmatch(f, mask):
                if self.opts.has_key('verbose'):
                    print "# skipMask(%s) matched %s" % (mask, f)
                return

        for mask in self.doMasks:
            if fnmatch.fnmatch(f, mask):
                if self.opts.has_key('verbose'):
                    print "# doMask(%s) matched %s" % (mask, f)

                inFname = os.path.join(self.baseDir, self.subDir, f)
                if os.path.isfile(inFname): self.DoFile(f)
    
