#!/usr/bin/python # -*- coding: iso8859-2 ''' -------------------------------------------------- Database Dictionary ?/ ?/2003: started PHP version ?/ ?/2003: converted to python 15/10/2003: added readline capability (requires readline module) 15/10/2003: remembers history accross sessions 18/10/2003: added debug mode, delete entry, read() function 09/03/2004: code recovery. working again! 15-05-2004: new readline module adjastments. 15-05-2004: command line support 26-05-2004: fixed bug in polish characters input 02-06-2004: ability to execute arbitrary SQL statement 2005-02-13: merging changes on z general-language version changed to one big class added ini config changed data field names added meta-data table 2005-02-14: command line accept dictionary parameter switch between naked/normal (perliminary code) basic output encoding (utf8)! bugs: new entry: language changes on switch! todo: add german & swedish letters to 'naked' feature make german dict redo the encoding stuff support the 'language-to' parameter consider working with cmd/word in "always lowercase" mode show status (naked lookup...input encoding) naming conventions: word, entry: source expression to be translated translation: target translation source: where this match was found naked: without diacritic marks, plain english. eg. cafe, naive, gluck, poczag... language field: field name in DB language name: human name (english, spanish..) table def OLD!!: CREATE TABLE polish ( entry_id INTEGER PRIMARY KEY, polish varchar(255) NOT NULL default '', naked varchar(255) default NULL, english varchar(255) default NULL, french varchar(255) default NULL, hebrew varchar(255) default NULL, source varchar(10) NOT NULL default '', voice_file varchar(255) default NULL, part_of_speech varchar(255) default NULL, /* enum('verb','noun','pronoun','adjective','adverb','preposition','abbr','figurative','ordinal','exclamation'), */ category varchar(255) default NULL, gender varchar(10) default NULL, /* enum('male','female','neuter') */ usa_uk NOT NULL default 'usa' /* enum('usa','uk') */ ); table def NEW!!: CREATE TABLE dictionary ( entry_id INTEGER PRIMARY KEY, entry varchar(255) NOT NULL default '', entry_naked varchar(255) default NULL, trans varchar(255) default NULL, trans_naked varchar(255) default NULL, source varchar(10) NOT NULL default '', part_of_speech varchar(255) default NULL, /* enum('verb','noun','pronoun','adjective','adverb','preposition','abbr','figurative','ordinal','exclamation'), */ gender varchar(10) default NULL, /* enum('male','female','neuter') */ voice_file varchar(255) default NULL ); CREATE TABLE meta_data ( meta_id INTEGER PRIMARY KEY, meta_name varchar(255) NOT NULL, meta_value varchar(255) default NULL ); INSERT INTO meta_data (meta_name) VALUES ("source language"); INSERT INTO meta_data (meta_name) VALUES ("target language"); INSERT INTO meta_data (meta_name) VALUES ("last update"); INSERT INTO meta_data (meta_name) VALUES ("translation direction"); INSERT INTO meta_data (meta_name) VALUES ("source encoding"); INSERT INTO meta_data (meta_name) VALUES ("target encoding"); INSERT INTO meta_data (meta_name) VALUES ("to encoding"); ----------------------------------------------------------''' # ------------ module imports: -------------- import string, os, sys, sqlite import ini from time import sleep try: import readline import myreadline READLINE_SUPPORT = 1 #print "readline support enabled." except: READLINE_SUPPORT = 0 print 'no readline support!' # -- globals ---- SECTION='dictionary' HISTORY_FILE = '' class Dictionary: def __init__(self, dictfile=''): # ------------ constants section: -------------- INI_PATHs = [ os.path.expanduser('~'), '/usr/local/lib/dictionary', r'D:\all-users\dictionary', '/mnt/sd/all-users/dictionary', r'c:\users\dictionary', '.', ] ini_file = 'dictionary.ini' ini_path = self.find_ini_file(ini_file, INI_PATHs) ini_file = ini_path + os.sep + ini_file self.conf = ini.init(ini_file) db_dir = self.read_ini('dictionary database directory', '.') self.USER_ENCODING = self.read_ini("user_encoding", 'iso-8859-1') self.original_current_dir = os.path.abspath('.') # os.chdir(db_dir) # because of bug in sqlite2 if not os.path.isdir(db_dir): print 'cant find dictionary files directory' sys.exit(2) if dictfile: DB_NAME = db_dir + os.sep + dictfile+'.sqlite' else: DB_NAME = db_dir + os.sep + self.read_ini("last dictionary file", 'polish-english.sqlite') if os.path.isfile(DB_NAME): print 'Using dictionary file: ', DB_NAME else: print "oy vey: dictionary file noy found" sys.exit(2) self.conn = sqlite.connect(DB_NAME) self.source_language_name = self.read_meta("source language") self.target_encoding = self.read_meta("target encoding") self.source_encoding = self.read_meta("source encoding") self.target_language_name = self.read_meta("target language") self.DB_LAST_UPDATE = self.read_meta("last update") self.translation_direction = self.read_meta("translation direction", 'source to target') if self.translation_direction == 'target to source': self.switch_direction() if self.read_meta("search naked entries", 'yes')=='yes': self.source_language_field = 'entry_naked' else: self.source_language_field = 'entry' if self.read_ini("print naked results", 'no')=='no': self.target_language_field = 'trans' else: self.target_language_field = 'trans_naked' self.prompt = 'will be overwritten' self.set_prompt() self.SOURCE_ENCODING = self.read_meta("source encoding", 'iso-8895-1') self.TARGET_ENCODING = self.read_meta("target encoding", 'iso-8895-1') self.HISTORY_FILE = ini_path + os.sep + self.read_ini('dictionary history filename', '.dictionary_history') HISTORY_FILE = self.HISTORY_FILE #global copy for --del-- #self.LANGUAGES = ['entry','entry_naked','trans','trans_naked',] self.HISTORY_SIZE = 50 if not os.path.isfile(self.HISTORY_FILE): open(self.HISTORY_FILE, 'w').write('hello darling!') self.GENDERS = {'m':'male', 'f':'female', 'n':'neuter'} self.DB_TABLE = "dictionary" self.SHOW_NAKED = False # --- show or hide accent-less entry self.debug = False self.SQL_DANGEROUS_WORDS = ['delete', 'update'] self.DUMP_FILE = self.original_current_dir + os.sep + DB_NAME + '.txt' self.RESULTS_LIMIT = 10 self.SHORT_FACTOR = 0.78 self.FILE_prompt = ' name of FILE to translate> ' self.WORD_SPLITTERS = '!"?<>[](){}_,.;:\n\r' self.GARBAGE_CHARACTERS = '@#$%^&*+0123456789' self.indicator = '...' self.standard_encoding = "󜿟" self.other_encodings = ( "ʣӌ", #win_set_big "䢘", #dos_set_small "", #dos_set_big ('a`','c`','e`','l`','n`','o`','s`','z.','z`'), #user_encoding ("a'","c'","e'","l'","n'","o'","s'","z.","z'"), #user_encoding2 (unichr(261),unichr(263),unichr(281),unichr(322),unichr(324),unichr(160),unichr(347),unichr(380),unichr(378)), #unicode -utf8? (unichr(261).encode('utf-8'),unichr(263).encode('utf-8'),unichr(281).encode('utf-8'),unichr(322).encode('utf-8'),unichr(324).encode('utf-8'),unichr(160).encode('utf-8'),unichr(347).encode('utf-8'),unichr(380).encode('utf-8'),unichr(378).encode('utf-8')), #unicode -utf8? ) self.phase_description = { 1:'(exact)', 2:'(begin)', 3:'(mid)', 4:'(trim)', 5:'(trim sub)' } utf8_to_naked = { # german range: 'ä': 'a', 'ü': 'u', 'ö': 'o', '´': "'", 'ß': 'ss', 'Ä': 'a', 'Ü': 'u', 'Ö': 'o', 'é': 'e', 'ú': 'u', # polish range: 'ą': 'a', 'Ą': 'a', 'ć': 'c', 'Ć': 'c', 'ę': 'e', 'Ę': 'e', 'ń': 'n', 'Ń': 'n', 'ś': 's', 'Ś': 's', 'ó': 'o', 'Ó': 'o', 'ż': 'z', 'Ż': 'z', 'ł': 'l', 'Ł': 'l', 'ź': 'z', 'Ź': 'z', #french range #swedish range } self.conv = { '': 'a', '': 'a', '': 'c', '': 'c', '': 'e', '': 'e', '': 'n', '': 'n', '': 's', '': 's', '': 'o', '': 'o', '': 'z', '': 'z', '': 'l', '': 'l', '': 'z', '': 'z', } self.characters_require_conversion = string.join(self.conv.keys(), '') # ------------ end of constants ------------- # ------------ doc strings section ------------- self.usage = '''Usage: with NO arguments: run the interactive mode C:\\>trans [ []] C:\\>trans u|r|fix|dump C:\\>trans -h | --help Where: fix - fix something (misc functions) dump - write everything to a plain-text file n - insert new word to the dictionary u - update naked words in the dictionary r - fix polish entries word - word to be translated languages: default language: from ----- to --- ''' self.MENU_USAGE = ''' PLEH! S - Switch translation direction K - look in naked words (on/off) KK - show also naked translation (on/off) F - traslate words from a file N - add New dictionary entry DU - dump dictionary to text file DL - delete an entry from the dictionary DM - debug mode H - Help screen Q - Quit, exit the dictionary CLS - clear the screen sqlsql - query the database directly (sql)''' if READLINE_SUPPORT: self.MENU_USAGE = self.MENU_USAGE + ''' - remember last word(s) - erase line HH - show word history. ''' # ------------ end of doc strings ------------- self.clear_screen() # ------------ end of --init--() ------------- # ------------ functions section: -------------- def separator(self ): sys.stderr.write("_"*34 + "h - help \n") #___________________________________________________ def clear_screen(self): print '\n\n\n\n\n' #___________________________________________________ def read_ini(self, key, default): key=key.strip() try: value = self.conf[SECTION][key] return value except: self.conf.set( SECTION, key, default ) return default #___________________________________________________ def switch_direction(self): tmpf = self.source_language_field self.source_language_field = self.target_language_field self.target_language_field = tmpf tmpn = self.source_language_name self.source_language_name = self.target_language_name self.target_language_name = tmpn if self.translation_direction == 'source to target': self.translation_direction = 'target to source' else: self.translation_direction = 'source to target' self.set_prompt() #___________________________________________________ def set_prompt(self): if 'naked' in self.source_language_field: self.naked_input = '(naked)' else: self.naked_input = '' self.prompt = '%s %s --> %s: ' % (self.naked_input, self.source_language_name, self.target_language_name) #___________________________________________________ def switch_naked(self): if '_naked' in self.source_language_field: self.source_language_field = self.source_language_field.replace('_naked','') else: self.source_language_field = self.source_language_field + '_naked' self.set_prompt() #___________________________________________________ def switch_show_naked(self): self.SHOW_NAKED = not self.SHOW_NAKED #if '_naked' in self.target_language_field: # self.target_language_field = self.target_language_field.replace('_naked','') #else: # self.target_language_field = self.target_language_field + '_naked' #___________________________________________________ def read_meta(self, key, default=''): #read from db file sql='''select meta_value from meta_data where meta_name="%s" limit 1''' % key dbh = self.conn.db.execute(sql) if dbh.row_list: value= dbh.row_list[0][0] return value else: print 'nooooootffoundd ',sql sql='''insert into meta_data (meta_name,meta_value) values ("%s",%s) ''' %(key, default) return default #___________________________________________________ def progress_indicator(self, new=False): global indicator if new: indicator = '\r searching ....' else: indicator = indicator + '......' print indicator, #___________________________________________________ def debug2(self, s, x=''): if self.debug: print "debug: ", print s, print x, print "\r\n" #___________________________________________________ def is_query_dangerous(self, sql): words = sql.split() for word in words: if word.lower() in self.SQL_DANGEROUS_WORDS: return True return False #___________________________________________________ def shorten(self, word): l = len(word) max_letters = int(self.SHORT_FACTOR * l) word = word[ : max_letters] return word #___________________________________________________ def find_ini_file(self, ini_file, INI_PATHs): ini_found = False DEFAULT_INI_FILE_CONTENTS = '[%s]\nhello =world\n' % SECTION for ini_dir in INI_PATHs: f = ini_dir + os.sep + ini_file if os.path.isfile(f): ini_file = f ini_found = True break if not ini_found: # then create it print 'oy vey: INI file not found !!!' for ini_dir in INI_FILE_DIRs: f = ini_dir + os.sep + ini_file try: open(f,'w').write(DEFAULT_INI_FILE_CONTENTS) ini_file = f ini_found = True break except: continue if not ini_found: print 'big trouble: no ini found' sys.exit(2) return ini_dir #___________________________________________________ def remove_from_history(self, cmd): myreadline.delete_item_from_history(cmd) #___________________________________________________ def organize_history_file(self): tmplist = [] hists = open(self.HISTORY_FILE).read().split('\n') for hist in hists: if len(hist) < 3: continue if hist not in tmplist: tmplist.append(hist) open(self.HISTORY_FILE, 'w').write(string.join(tmplist, '\n')) readline.read_history_file(self.HISTORY_FILE) return tmplist #___________________________________________________ def read(self, prompt, log_in_history=0): if READLINE_SUPPORT: #print prompt # due to bug in readlines, doesn't end with <,> reply = myreadline.readline(self.HISTORY_FILE, prompt=prompt) else: reply = raw_input(prompt) reply = reply.strip() return reply #___________________________________________________ def split_words(self, txt): for ch in self.WORD_SPLITTERS: txt = txt.replace(ch, ' ') txt = make_naked(txt) words = txt.split() return words #___________________________________________________ #sqlite quote def quote(self, word): newword = word.replace('"','""') return newword #___________________________________________________ def get_dict_from_db_results(self, dbh): field = {} for i, defi in enumerate(dbh.col_defs): field[ defi[0] ] = i return field #___________________________________________________ def iconv(self, s, encfrom, encto='utf-8'): try: out = unicode(s,encfrom).encode(encto) except: out=s return out #___________________________________________________ def main_loop(self): # --- default language: "entry_naked" --- while 1: # ----- read user input ------------- self.separator() word = self.read(self.prompt, 1) cmd = word.lower() # ---------- quit ------ if cmd in ['q', 'exit']: readline.write_history_file(self.HISTORY_FILE) break # ---------- search history (todo: save to file!) ------ if cmd == 'hh': self.remove_from_history(cmd) newlist = self.organize_history_file() if READLINE_SUPPORT: for i,hist in enumerate(newlist): print ' %d> %s' %(i, hist) else: print " >>> please install READLINE.py support. <<< " continue # ------- switch languages ------ if cmd == 's': self.remove_from_history(cmd) self.switch_direction() continue # ------- switch input naked or not ------ if cmd == 'k': self.remove_from_history(cmd) self.switch_naked() continue # ------- show naked output? ------ if cmd == 'kk': self.remove_from_history(cmd) self.switch_show_naked() continue # ---------- file of words ------ if cmd == 'f': self.remove_from_history(cmd) fname = self.read(self.FILE_prompt) outfile = fname + '.trans' if not os.path.isfile(fname): print "This file isn't good enough (or doesn't exist)" continue txt = open(fname).read() out = '' words = self.split_words(txt) for word in words: if self.word_is_garbage(word): trans = '' else: trans = self.translate(word) out = out + word + ' = ' + trans + '\n' open(outfile,'w').write(out) print "translation written to: " + outfile continue # ---------- help ------ if cmd in ['help', 'h', '?']: self.remove_from_history(cmd) print self.MENU_USAGE continue # ---------- nothing... ------ if cmd == '': continue #------------- clear screen ------------------ if cmd == 'cls': self.clear_screen() self.remove_from_history(cmd) continue #------------- debug mode ------------------ if cmd == 'dm': self.remove_from_history(cmd) self.debug = not debug print "debug mode is now:", self.debug continue #------------- delete entry ------------------ if cmd == 'dl': self.remove_from_history(cmd) entry_id = read("Delete entry # ", 1) try: entry_id = int(entry_id) except: continue sql = 'SELECT * FROM %s WHERE entry_id="%d"' % (DB_TABLE, entry_id) dbh =self.conn.db.execute(sql) rows = dbh.row_list print rows ok = self.read("Delete [y/n]") if ok in ['y','yes','ok','o']: sql = 'DELETE FROM %s WHERE entry_id="%d"' % (DB_TABLE, entry_id) dbh =self.conn.db.execute(sql) print "deleted." continue #------------- dump to dic text file ------------------ if cmd == 'du': self.remove_from_history(cmd) cmd = read('dump to filename [%s]: ' % DUMP_FILE) if cmd == '': cmd = DUMP_FILE if os.path.isfile(cmd): print cmd, 'already exists.' if os.path.isdir(cmd): print DUMP_FILE, 'is a directory.' continue DUMP_FILE = cmd ok= read('continue dumping [y/n]?') if ok in ['y','yes','ok']: do_something_to_all_records('dump') print 'text file written to', DUMP_FILE continue #------------- delete duplicate entries ------------------ if cmd == 'dd': self.remove_from_history(cmd) ok = read('Delete all duplicate entries, on (polish, english, hebrew). Continue [y/n]?') if ok in ['y','yes','ok']: do_something_to_all_records('delete_duplicate_entries') continue #------------- insert New entry ------------------ if cmd == 'n': self.remove_from_history(cmd) ok='no' while 1: print '''hint: "a," for polish "Aom", "a." for swedish "angstram" symbol, "e," is polish "Em" "Z." means z letter with dot on top "Z`" means z with apostroph on top "u:" means umlaut''' entry = self.read('---------- %s word : ' % self.source_language_name) translation = self.read('----- %s translation : ' % self.target_language_name) part_of_speech = self.read('-- part of speech [none]: ', 0) gender = self.read('--- gender (m/f/n)[none]: ', 0) source = self.read('------------ source [me]: ', 0) part_of_speech = part_of_speech.lower() if source == '': source="me" if gender in self.GENDERS: gender = self.GENDERS[gender] else: gender = '' entry_naked = self.special_input_conversion(entry, self.source_language_name) sql = ''' INSERT INTO %s (entry, entry_naked, trans, source, part_of_speech, gender) VALUES("%s", "%s", "%s", "%s", "%s", "%s") ''' %( self.DB_TABLE, entry, entry_naked, translation, source, part_of_speech, gender) print sql ok = self.read("k, ry again, ancel: ") if ok in ['y','yes','Y','Yes','ok','O','o','Ok']: dbh =self.conn.db.execute(sql) print "id: %d inserted." %self.conn.db.sqlite_last_insert_rowid() break elif ok in ['c','C','cancel','Cancel', 'q','Q','exit','Quit']: break continue #------------- arbitrary SQL query ---------------- if cmd == 'sqlsql': self.remove_from_history(cmd) while 1: sql = self.read('SQL statemnent ("q" to quit): ') if self.is_query_dangerous(sql): print 'Query is not safe. aborting.' break if sql.lower() in ['q', 'quit']: break #print sql ok = self.read("Query Database? [Yes/No/Quit]: ") self.remove_from_history(ok) if ok.lower() in ['q','quit']: break if ok.lower() in ['y','yes']: try: dbh =self.conn.db.execute(sql) for row in dbh.row_list: print row #print "id: %d inserted." %self.conn.db.sqlite_last_insert_rowid() except: print "Error in sql or something" continue # -- if here, it's not a command. # user input is a word, translate it. --- self.clear_screen() #separator() self.translate(word) #___________________________________________________ def translate(self, word): self.progress_indicator(True) # ---------- translation -------- queries=0 where = { 1: '%s = "%s"' %(self.source_language_field, word), 2: '%s LIKE "%s%%"' %(self.source_language_field, word), 4: '%s LIKE "%s%%"' %(self.source_language_field, self.shorten(word)), 3: '%s LIKE "%%%s%%"' %(self.source_language_field, word), 5: '%s LIKE "%%%s%%"' %(self.source_language_field, self.shorten(self.shorten(word))), } for phase in where: self.progress_indicator() sql = '''SELECT entry_id, entry, entry_naked, trans, trans_naked, source, part_of_speech, gender FROM %s WHERE %s limit %d''' %(self.DB_TABLE, where[phase], self.RESULTS_LIMIT) dbh =self.conn.db.execute(sql) field = self.get_dict_from_db_results(dbh) rows = dbh.row_list for row in rows: #progress_indicator() entry = row[field['entry']] entry_naked = row[field['entry_naked']] trans = row[field['trans']] trans_naked = row[field['trans_naked']] entry_id = row[field['entry_id']] source = row[field['source']] part = row[field['part_of_speech']] gender = row[field['gender']] if part in ('None', '(?)', '(?2)', '(?3)', ''): part = '' else: part = "[%s]" % part if gender in self.GENDERS.values(): gender = '[%s] ' % gender else: gender = '' if self.SHOW_NAKED and entry_naked != entry and entry_naked: entry_naked = " {" + entry_naked + "}" else: entry_naked = '' if self.SHOW_NAKED and trans_naked != trans and trans_naked: trans_naked = " {" + trans_naked + "}" else: trans_naked = '' translation = trans queries += 1 desc = self.phase_description[phase] if self.debug: debug_info = "%d:" % entry_id else: debug_info = '' #if READLINE_SUPPORT: print print "\r ", s = "\r%s%s> %s %s%s\n %s %s %s" %(debug_info,source,entry,gender,entry_naked,translation,trans_naked,part) try: if 'UTF' in os.environ['LC_ALL']: encfrom = self.source_encoding print self.iconv(s, encfrom, 'utf-8') else: print s except: print s if queries: break #print "phase ",phase if not queries: translation = '' print " --- not found ---- " return translation #___________________________________________________ # 'make_all_naked' # 'normalize' # 'fix' # 'dump' # def do_something_to_all_records(self, something) : self.debug2(something, 'something') # --- permanent vars ---- if something == 'fix': where = "WHERE entry like '%(<%'" print "working quietly.\nWHERE = %s...\r\n" %where elif something == 'dump': where = "ORDER BY entry" fname = self.DUMP_FILE print "working quietly.\nWHERE = %s...\r\n" %where elif something == 'make_all_naked': where = '' elif something == 'delete_duplicate_entries': sql1 = 'SELECT entry,english,hebrew FROM %s' % DB_TABLE dbh =self.conn.db.execute(sql1) rows = dbh.row_list num_rows = dbh.rowcount print "rows in query: ", dbh.rowcount progress = 0.0 all_output = '' i=0 for row in rows: sql2 = ''' SELECT entry_id FROM %s WHERE entry="%s" AND english="%s" AND hebrew="%s" ''' % (DB_TABLE, row[0], row[1], row[2]) dbh2 =self.conn.db.execute(sql2) rows2 = dbh2.row_list ids = map(lambda lst:str(lst[0]), rows2) print 'ids :', ids ids_to_del = ids[1:] ids_to_del = ','.join( ids_to_del ) print 'ids DEL:', ids_to_del sql_del = 'DELETE FROM %s WHERE entry_id IN (%s)' % (DB_TABLE, ids_to_del) #dbh =self.conn.db.execute(sql_del) sleep(1) #dbh =self.conn.db.execute(sql) print sql open('entry-dups.sql','w').write(sql) print 'written to entry-dups.sql' return else: where = '' sql = "SELECT * FROM " + DB_TABLE + ' ' + where print "SQL select:", sql dbh =self.conn.db.execute(sql) field = get_dict_from_db_results(dbh) rows = dbh.row_list num_rows = dbh.rowcount print "rows in query: ", dbh.rowcount progress = 0.0 all_output = '' i=0 for row in rows: i += 1 entry_id = row[0] english = row[field['english']] french = row[field['french']] hebrew = row[field['hebrew']] naked = row[field['naked']] part = row[field['part_of_speech']] entry = row[field['entry']] source = row[field['source']] gender = row[field['gender']] prog = 100 * progress // num_rows progress += 1 if something == 'make_all_naked': naked1 = make_naked(entry) if naked == naked1: continue naked = quote(naked1) sql = 'UPDATE polish SET naked="%s" WHERE entry_id=%d' %(naked, entry_id) #print sql dbh =self.conn.db.execute(sql) #print "%d [%s -> %s]" %(prog, entry, naked) elif something == 'dump': if english != '': trans = english elif french != '': trans = french else: continue if part in ('(?)', '(?2)', '(?3)', '?'): part = '' else: part = "[%s]" % part if naked != entry: naked = naked + "\n" else: naked = '' ret = "%s> %s%s\n%s %s\n\n" %(source, naked, entry, trans, part) all_output = all_output + ret print "%", prog, "\r", elif something == 'fix': fixed_polish = fix(polish) reg = '\(<([^)>]+)>\)' ret[0] = '' ret[1] = '' ret[2] = '' pos = ereg( reg, polish, ret ) the_whole_thing = ret[0] the_thing = ret[1] len_of_the_whole_thing = len( the_whole_thing ) if len_of_the_whole_thing < 3: continue the_thing = english.strip + ";" + the_thing.strip the_thing = the_thing.replace(',', ';') english_words = the_thing.split(';') #english_words = array_unique(english_words) english_joined = string.join(english_words, ' / ') newenglish = english_joined #newpolish = string.strip(replace(the_whole_thing, '', polish)) newnaked = make_naked(newpolish) #newnaked = addslashes(newnaked) #newpolish = addslashes(newpolish) #newenglish = addslashes(newenglish) sql = '''update polish set polish = "%s", english = "%s", naked = "%s" where entry_id=entry_id''' %(newpolish,newenglish,newnaked) print sql fixed_polish = fix(polish) reg = '\(<([^)>]+)>\)' ret[0] = '' ret[1] = '' ret[2] = '' pos = ereg( reg, polish, ret ) the_whole_thing = ret[0] the_thing = ret[1] len_of_the_whole_thing = len( the_whole_thing ) if len_of_the_whole_thing < 3: continue the_thing = english.strip + ";" + the_thing.strip the_thing = the_thing.replace(',', ';') english_words = the_thing.split(';') #english_words = array_unique(english_words) english_joined = string.join(english_words, ' / ') newenglish = english_joined #newpolish = string.strip(replace(the_whole_thing, '', polish)) newnaked = make_naked(newpolish) #newnaked = addslashes(newnaked) #newpolish = addslashes(newpolish) #newenglish = addslashes(newenglish) sql = '''update polish set polish = "%s", english = "%s", naked = "%s" where entry_id=entry_id''' %(newpolish,newenglish,newnaked) elif something == 'delete_duplicate_entries': pass else: print "switch ERRRO!!!\n\r\n" # while if all_output: print "writing output to: " + fname open(fname,'w').write(all_output) #if ids_to_delete: #___________________________________________________ def make_naked(self, word) : global conv, characters_require_conversion word = word.strip() for c in characters_require_conversion: word = word.replace(c, conv[c]) word = word.lower() return word #___________________________________________________ # fix def fix(self, word) : word = string.strip(word) word = word.replace(' ', ' ') return word #___________________________________________________ def input_to_polish(self, s): s = s.lower() for enc in self.other_encodings: for i, to_char in enumerate( self.standard_encoding ): from_char = enc[i] #if s.find(from_char)>=0: print from_char, to_char s = s.replace(from_char, to_char) if '`' in s or '.' in s: print "POSSIBLE ERROR in input !!!!" return s #___________________________________________________ def special_input_conversion(self, entry, language): if language=='polish': return self.input_to_polish(entry) if language=='german': return self.input_to_german(entry) if language=='spanish': return self.input_to_spanish(entry) if language=='swedish': return self.input_to_swedish(entry) return entry #___________________________________________________ def normalize_polish(self) : sql = "SELECT entry_id, polish FROM " + DB_TABLE # ".where dbh =self.conn.db.execute(sql) field = get_dict_from_db_results(dbh) rows = dbh.row_list i=0 for row in rows: entry_id = row[field['entry_id']] polish = row[field['polish']] fixed_polish = fix(polish) if (fixed_polish != polish) : fixed_polish = quote(fixed_polish) sql = '''update polish SET polish="%s" WHERE entry_id=%d ''' %(fixed_polish, entry_id) print sql print fixed_polish self.conn.db.execute(sql) #def __del__(self): # global HISTORY_FILE # readline.write_history_file(HISTORY_FILE) # ------------ end function declarations ------------- # ---------------------- MAIN ---------------- if __name__ == '__main__': where = [0,0,0,0,0] args=sys.argv if len(args)>1: dictfile=args[1] else: dictfile=False if len(args)>2: entry = args[2] if len(args)>3: self.target_language_field = args[3] if self.target_language_field not in LANGUAGES: self.target_language_field = DEFAULT_self.target_language_field print translate(entry, self.source_language_field, self.target_language_field) d = Dictionary(dictfile) if READLINE_SUPPORT: d.organize_history_file() readline.set_history_length( d.HISTORY_SIZE ) d.debug2('reading history from %s\n' % d.HISTORY_FILE) try: if not os.path.isfile(d.HISTORY_FILE): open(d.HISTORY_FILE,'w').write('') readline.read_history_file(d.HISTORY_FILE) except: d.debug2("history file require solutions: %s\n" % d.HISTORY_FILE) d.main_loop() sys.exit() # ------------ end of file -----------------#