From 207d41ed4f92658277860dfcff8aa711d90b0567 Mon Sep 17 00:00:00 2001 From: darizotas Date: Thu, 27 Sep 2018 22:33:40 +0200 Subject: [PATCH] Options -d and -f separated. Error control added Options -d and -f have been separated to avoid errors in the loop. That means either -d or -f can be used. Error control has also been added to avoid breaking the process in case of having malformed cache files. --- firefox-cache2-file-parser.py | 185 +++++++++++++++++++++++----------- 1 file changed, 126 insertions(+), 59 deletions(-) diff --git a/firefox-cache2-file-parser.py b/firefox-cache2-file-parser.py index b25d182..6585762 100644 --- a/firefox-cache2-file-parser.py +++ b/firefox-cache2-file-parser.py @@ -4,81 +4,148 @@ import datetime import hashlib import csv +import sys argParser = argparse.ArgumentParser(description='Parse Firefox cache2 files in a directory or individually.') +argParser.add_argument('-d', '--directory', help='directory with cache2 files to parse', required=True) argParser.add_argument('-f', '--file', help='single cache2 file to parse') -argParser.add_argument('-d', '--directory', help='directory with cache2 files to parse') argParser.add_argument('-o', '--output', help='CSV output file') args = argParser.parse_args() chunkSize = 256 * 1024 -script_dir = os.path.dirname(__file__) +skippedFiles = [] + +def UnpackCache2Field (parseFile, format, bytes, field): + try: + return struct.unpack(format, parseFile.read(bytes))[0] + except struct.error as e: + print "Error unpacking cache2 field {0}: {1}".format(field, e) + skippedFiles.append(parseFile.name) + return None + def ParseCacheFile (parseFile): print "parsing file: {0}".format(parseFile.name) - fileSize = os.path.getsize(parseFile.name) - parseFile.seek(-4, os.SEEK_END) - #print parseFile.tell() - #print fileSize - metaStart = struct.unpack('>I', parseFile.read(4))[0] - #print metaStart - numHashChunks = metaStart / chunkSize - if metaStart % chunkSize : - numHashChunks += 1 - #print 4 + numHashChunks * 2 - parseFile.seek(metaStart + 4 + numHashChunks * 2, os.SEEK_SET) - #print parseFile.tell() - version = struct.unpack('>I', parseFile.read(4))[0] - #if version > 1 : - # TODO quit with error - fetchCount = struct.unpack('>I', parseFile.read(4))[0] - lastFetchInt = struct.unpack('>I', parseFile.read(4))[0] - lastModInt = struct.unpack('>I', parseFile.read(4))[0] - frecency = struct.unpack('>I', parseFile.read(4))[0] - expireInt = struct.unpack('>I', parseFile.read(4))[0] - keySize = struct.unpack('>I', parseFile.read(4))[0] - flags = struct.unpack('>I', parseFile.read(4))[0] if version >= 2 else 0 - key = parseFile.read(keySize) - key_hash = hashlib.sha1(key).hexdigest().upper() + try: + fileSize = os.path.getsize(parseFile.name) + parseFile.seek(-4, os.SEEK_END) + #print parseFile.tell() + #print fileSize + #metaStart = struct.unpack('>I', parseFile.read(4))[0] + metaStart = UnpackCache2Field(parseFile, '>I', 4, 'metadata-start') + if metaStart is None: + print "Skipping file..." + return + + #print metaStart + numHashChunks = metaStart / chunkSize + if metaStart % chunkSize : + numHashChunks += 1 + #print 4 + numHashChunks * 2 + parseFile.seek(metaStart + 4 + numHashChunks * 2, os.SEEK_SET) + #print parseFile.tell() - if doCsv : - csvWriter.writerow((fetchCount, - datetime.datetime.fromtimestamp(lastFetchInt), - datetime.datetime.fromtimestamp(lastModInt), - hex(frecency), - datetime.datetime.fromtimestamp(expireInt), - flags, - key, - key_hash)) + #version = struct.unpack('>I', parseFile.read(4))[0] + version = UnpackCache2Field(parseFile, '>I', 4, 'version') + if version is None: + print "Skipping file..." + return + #if version > 1 : + # TODO quit with error + #fetchCount = struct.unpack('>I', parseFile.read(4))[0] + fetchCount = UnpackCache2Field(parseFile, '>I', 4, 'fetchCount') + if fetchCount is None: + print "Skipping file..." + return + #lastFetchInt = struct.unpack('>I', parseFile.read(4))[0] + lastFetchInt = UnpackCache2Field(parseFile, '>I', 4, 'lastFetchInt') + if lastFetchInt is None: + print "Skipping file..." + return + #lastModInt = struct.unpack('>I', parseFile.read(4))[0] + lastModInt = UnpackCache2Field(parseFile, '>I', 4, 'lastModInt') + if lastModInt is None: + print "Skipping file..." + return + #frecency = struct.unpack('>I', parseFile.read(4))[0] + frecency = UnpackCache2Field(parseFile, '>I', 4, 'frecency') + if frecency is None: + print "Skipping file..." + return + #expireInt = struct.unpack('>I', parseFile.read(4))[0] + expireInt = UnpackCache2Field(parseFile, '>I', 4, 'expireInt') + if expireInt is None: + print "Skipping file..." + return + keySize = struct.unpack('>I', parseFile.read(4))[0] + keySize = UnpackCache2Field(parseFile, '>I', 4, 'keySize') + if keySize is None: + print "Skipping file..." + return + #flags = struct.unpack('>I', parseFile.read(4))[0] if version >= 2 else 0 + flags = UnpackCache2Field(parseFile, '>I', 4, 'flags') if version >= 2 else 0 + if flags is None: + print "Skipping file..." + return + + key = parseFile.read(keySize) + key_hash = hashlib.sha1(key).hexdigest().upper() - print "version: {0}".format(version) - print "fetchCount: {0}".format(fetchCount) - print "lastFetch: {0}".format(datetime.datetime.fromtimestamp(lastFetchInt)) - print "lastMod: {0}".format(datetime.datetime.fromtimestamp(lastModInt)) - print "frecency: {0}".format(hex(frecency)) - print "expire: {0}".format(datetime.datetime.fromtimestamp(expireInt)) - print "keySize: {0}".format(keySize) - print "flags: {0}".format(flags) - print "key: {0}".format(key) - print "key sha1: {0}\n".format(key_hash) + if doCsv : + csvWriter.writerow((fetchCount, + datetime.datetime.fromtimestamp(lastFetchInt), + datetime.datetime.fromtimestamp(lastModInt), + hex(frecency), + datetime.datetime.fromtimestamp(expireInt), + flags, + key, + key_hash)) + print "version: {0}".format(version) + print "fetchCount: {0}".format(fetchCount) + print "lastFetch: {0}".format(datetime.datetime.fromtimestamp(lastFetchInt)) + print "lastMod: {0}".format(datetime.datetime.fromtimestamp(lastModInt)) + print "frequency: {0}".format(hex(frecency)) + print "expire: {0}".format(datetime.datetime.fromtimestamp(expireInt)) + print "keySize: {0}".format(keySize) + print "flags: {0}".format(flags) + print "key: {0}".format(key) + print "key sha1: {0}\n".format(key_hash) + + except : + print "Unexpected error:", sys.exc_info()[0] + #ParseCacheFile(testFile) #procPath = script_dir + '/' + testDir -if args.directory or args.file : - if args.output : - doCsv = True - csvFile = open(args.output, 'w') - csvWriter = csv.writer(csvFile, delimiter=',', quoting=csv.QUOTE_NONNUMERIC) - csvWriter.writerow(('Fetch Count', 'Last Fetch', 'Last Modified', 'Frecency', 'Expiration', 'Flags', 'URL', 'Key Hash')) - procPath = args.directory + +# Output to CSV +doCsv = args.output +if doCsv : + # https://stackoverflow.com/questions/3348460/csv-file-written-with-python-has-blank-lines-between-each-row + csvFile = open(args.output, 'wb') + csvWriter = csv.writer(csvFile, delimiter=';', quoting=csv.QUOTE_NONNUMERIC) + csvWriter.writerow(('Fetch Count', 'Last Fetch', 'Last Modified', 'Frecency', 'Expiration', 'Flags', 'URL', 'Key Hash')) + +procPath = args.directory +# Only one file to process +if args.file : + fileList = [args.file] +else : fileList = os.listdir(procPath) - for filePath in fileList : - file = open(procPath + '/' + filePath, 'r') + +for filePath in fileList : + if os.path.isdir(filePath) : + continue + else : + file = open(os.path.join(procPath, filePath), 'r') ParseCacheFile(file) - if doCsv : - print 'Data written to CSV file: {0}'.format(csvFile.name) - csvFile.close() -else : - argParser.print_help() + +if doCsv : + print 'Data written to CSV file: {0}'.format(csvFile.name) + csvFile.close() + +if skippedFiles: + print "Skipped files:" + print "\n".join(skippedFiles)