import string, sys, os class Respondent: pass class Interval: pass # these are the attributes to extract from the respondents file resattrs = [ ['id', 1, 8], ['wntanotr', 9360, 9360], ['spwntkid', 9361, 9361], ['intnothr', 9362, 9362], ['ager', 10880, 10881], ['rcurpreg', 11275, 11275], ['pregnum', 11276, 11277], ['parity', 11290, 11291], ['intent', 12244, 12244], ['addexp', 12245, 12247], ] # these are the attributes to extract from the interval file intattrs = [ ['id', 1, 8], ['pregordr', 9, 10], ['nbrnlv', 14, 14], ['wks_preg', 22, 23], ['kidssex1', 95, 95], ['kidssex2', 151, 151], ['outcome', 291, 291], ['prglngth', 292, 293], ['sex1', 325, 325], ['sex2', 326, 326], ] def process_respondent(line): # take a line from the respondent file and build a Respondent object res = Respondent() for (attr, start, end) in resattrs: setattr(res, attr, line[start-1:end]) return res def process_interval(line): # take a line from the interval file and build an Interval object inter = Interval() for (attr, start, end) in intattrs: setattr(inter, attr, line[start-1:end]) return inter def process_line(line, constructor, attrs): # here's how I could have generalized process_respondent and # process_interval obj = constructor() for (attr, start, end) in attrs: setattr(obj, attr, line[start-1:end]) return obj def main(name): # process the respondent file (uncompress with gunzip) fp = os.popen('gunzip --to-stdout respnd95.asc.gz', 'r') for line in fp: res = process_respondent(line) # OR res = process_line(line, Respondent, resattrs) print res.id fp.close() # process the interval file (uncompressed text) fp = open('interv95.asc', 'r') for line in fp: inter = process_interval(line) # OR inter = process_line(line, Interval, intattrs) print inter.id fp.close() if __name__ == '__main__': main(*sys.argv)