# creates a file of word pairs from a simple thesaurus # works on a plain text file with no naycount prefixes # usage: python ths2prs.py thesaurus-file wordpair-file # # this is not an exact inverse to the related program # prs2ths.py since the input to this program does not # have to have sorted lines but the output will be sorted # pairs, however they are inverses in the sense that # running either on the output of the other will produce # files that the progam can use to reproduce that output # # note: written to be read and maintained, not to be # short and sweet -- as all good programs should be # # ths2prs.py is Copyright (C) 2010 Douglas Pardoe Wilson # This program comes with ABSOLUTELY NO WARRANTY # This is free software, and you are welcome to redistribute it # subject to the conditions of the GNU General Public License (GPL) import sys import string comlineargs = sys.argv argcount = len(comlineargs) # print "command line argument count was " + `argcount` # print "command line: " # print comlineargs if len(comlineargs) != 3: print "usage: python ths2prs.py thesaurus-infile wordpair-outfile" else: infile = open(comlineargs[1], "r") outfile = open(comlineargs[2], "w") wordpairs = list() while 1: # loop indefinitely, but will break out when no line found thisline = infile.readline() if thisline: theslinelist = string.split(string.strip(thisline)) if len(theslinelist) > 1: lemma = theslinelist[0] syns = theslinelist[1:] for synonym in syns: pair = (lemma, synonym) wordpairs.append(pair) pair = (synonym, lemma) wordpairs.append(pair) else: break infile.close() if len(wordpairs) > 0: wordpairs.sort() lastoutstring = "" for pair in wordpairs: outstring = pair[0] + " " + pair[1] + "\n" if outstring != lastoutstring: outfile.write(outstring) lastoutstring = outstring outfile.close()