Scripts

#!/usr/bin/python
 
# Original Code
# <a href="http://github.com/turian/py80legsformat" title="http://github.com/turian/py80legsformat">http://github.com/turian/py80legsformat</a> by Joseph Turian
 
import csv
import hashlib
import os
import struct
import sys
import zipfile
from cStringIO import StringIO
from optparse import OptionParser
 
class EightyLegs:
    def __init__(self, filename):
        self.filename = filename
        self.directory = filename.split('.')[0]
 
    def read(self, file):
        assert(struct.calcsize("i")) == 4
 
        l = file.read(2*4)
        (classID, versionID) = struct.unpack("ii", l)
        assert (classID, versionID) == (218217067, 1)
 
        l = "not EOF"
        data = []
        l = file.read(1*4)
        while l != "":
            (URLSIZE,) = struct.unpack("i", l)
            url = file.read(URLSIZE).decode("utf-8")
            l = file.read(1*4)
            (DATASIZE,) = struct.unpack("i", l)
            data = str(file.read(DATASIZE))
            yield (url, data)
            l = file.read(1*4)
 
    def read_zip(self, file):
        zip = zipfile.ZipFile(file, 'r')
        for info in zip.infolist():
            fname = info.filename            
            if fname.endswith('.80'):
                data = zip.read(fname)
                for r in self.read(StringIO(data)):
                    yield r
 
    def parse(self):
        if not os.path.exists(self.directory):
            os.makedirs(self.directory)
 
        tsv = csv.writer(open("%s.tsv" % self.directory, 'w'), delimiter='\t', lineterminator='\n')
 
        if self.filename.endswith('.zip'):
            e = self.read_zip(open(self.filename))
        else:
            e = self.read(open(self.filename))
 
        for url, data in e:
            print url
            f = open("%s/%s.html" % (self.directory, hashlib.md5(url).hexdigest()), 'w')
            f.write(data)
            tsv.writerow([hashlib.md5(url).hexdigest(), url])
 
def main():
    usage = 'Usage: %prog -f 19970_20966_a_1.zip or %prog -f 19970_20966_a_1.80'
    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', dest='filename', help='input file')
 
    (options, args) = parser.parse_args()
 
    if options.filename is None:
        parser.print_help()
        sys.exit(0)
 
    legs = EightyLegs(options.filename)
    legs.parse()
 
if __name__ == '__main__':
    main()

Syndicate content
Drupal theme by Kiwi Themes.