# -*- coding: iso8859-2 ''' --------------------------------------------------- Database Dictionary ?/ ?/2003: started PHP version ?/ ?/2003: converted to python 15/10/2003: added readline capability (requires readline module) 15/10/2003: remembers history accross sessions 18/10/2003: added debug mode, delete entry, read() function 09/03/2004: code recovery. working again! 15-05-2004: new readline module adjastments. 15-05-2004: command line support 26-05-2004: fixed bug in polish characters input 02-06-2004: ability to execute arbitrary SQL statement todo: support the 'language-to' parameter consider working with cmd/word in "always lowercase" mode table def: CREATE TABLE polish ( word_id INTEGER PRIMARY KEY, polish varchar(255) NOT NULL default '', naked varchar(255) default NULL, english varchar(255) default NULL, french varchar(255) default NULL, hebrew varchar(255) default NULL, source varchar(10) NOT NULL default '', voice_file varchar(255) default NULL, part_of_speech varchar(255) default NULL, /* enum('verb','noun','pronoun','adjective','adverb','preposition','abbr','figurative','ordinal','exclamation'), */ category varchar(255) default NULL, gender varchar(10) default NULL, /* enum('male','female','neuter') */ usa_uk NOT NULL default 'usa' /* enum('usa','uk') */ ); -----------------------------------------------------------''' # ------------ moule importsection: -------------- import string, os, sys, sqlite from time import sleep try: import readline #import readline2 READLINE_SUPPORT = 1 _pyhistfile = #print "with readline" except: READLINE_SUPPORT = 0 #print "no readline" # ------------ constants section: -------------- DEFAULT_LANG_FROM = 'naked' DEFAULT_LANG_TO = 'english' LANGUAGES = ['polish', 'french', 'naked', 'english', 'hebrew'] DEFAULT_LANG_FROM = 'naked' GENDERS = {'m':'male', 'f':'female', 'n':'neuter'} POLISH_DIRs = [ r'.', r'D:\all-users\languages\polish', r'c:\users\language\polish', ] POLISH_DIR = '' for POLISH_DIR in POLISH_DIRs: try: os.chdir(POLISH_DIR) # because of bug in sqlite except: pass if not POLISH_DIR: print 'cant find dictionary directory' sys.exit(2) HISTORY_FILE = 'c:\dic-hist' if not os.path.isfile(HISTORY_FILE): open(HISTORY_FILE,'w').write('') DB_NAME = 'polish.sqlite' DB_TABLE = "polish" SHOW_NAKED = 0 # --- show or hide accent-less entry DEBUG = False SQL_DANGEROUS_WORDS = ['delete', 'update'] POLISH_TEXT_DUMP = POLISH_DIR + os.sep + 'polish.out.txt' RESULTS_LIMIT = 10 SHORT_FACTOR = 0.78 POLISH_PROMPT = 'word in POLISH> h - help ' ENGLISH_PROMPT = 'word in ENGLISH> h - help ' FILE_PROMPT = ' name of FILE to translate> ' prompt = POLISH_PROMPT WORD_SPLITTERS = '!"?<>[](){}_,.;:\n\r' GARBAGE_CHARACTERS = '@#$%^&*+0123456789' standard_encoding = "󜿟" other_encodings = ( "ʣӌ", #win_set_big "䢘", #dos_set_small "", #dos_set_big ('a`','c`','e`','l`','n`','o`','s`','z.','z`'), #user_encoding ("a'","c'","e'","l'","n'","o'","s'","z.","z'") #user_encoding2 ) indicator = '...' phase_description = { 1:'(exact)', 2:'(begin)', 3:'(mid)', 4:'(trim)', 5:'(trim sub)' } conv = { '': 'a', '': 'a', '': 'c', '': 'c', '': 'e', '': 'e', '': 'n', '': 'n', '': 's', '': 's', '': 'o', '': 'o', '': 'z', '': 'z', '': 'l', '': 'l', '': 'z', '': 'z', } characters_require_conversion = string.join(conv.keys(), '') # ------------ end of constants ------------- # ------------ doc strings section ------------- usage = '''Usage: with NO arguments: run the interactive mode C:\\>trans [ []] C:\\>trans u|r|fix|dump C:\\>trans -h | --help Where: fix - fix something (misc functions) dump - write everything to a plain-text file n - insert new word to the dictionary u - update naked words in the dictionary r - fix polish entries word - word to be translated languages: %s default language: from %s to %s ''' % (string.join(LANGUAGES, ', '), DEFAULT_LANG_FROM, DEFAULT_LANG_TO) ''' -------------------------------------- 'pronoun','adjective','adverb','preposition','abbr.','job' noun rzecz rzeczownik pronoun zaim zaimek adject przym przymiotnik prepos przyim przyimek adverb przysl przyslowek abbrv skr skrot verb czas czasownik -------------------------------------- ''' MENU_USAGE = ''' PLEH! E - English to Polish translation P - Polish to English translation F - traslate words from a file N - add New dictionary entry DU - dump dictionary to text file DL - delete an entry from the dictionary DM - debug mode H - Help screen Q - Quit, exit the dictionary CLS - clear the screen sqlsql - query the database directly (sql)''' if READLINE_SUPPORT: MENU_USAGE = MENU_USAGE + ''' - remember last word(s) - erase line HH - show word history. ''' # ------------ end of doc strings ------------- # ------------ functions section: -------------- def separator(): sys.stderr.write("___________________________________________\n") #___________________________________________________ def progress_indicator(new=False): global indicator if new: indicator = '\r searching ....' else: indicator = indicator + '......' print indicator, #___________________________________________________ def debug2(s, x=''): global DEBUG if DEBUG: print "debug: ", print s, print x, print "\r\n" #___________________________________________________ def is_query_dangerous(sql): words = sql.split() for word in words: if word.lower() in SQL_DANGEROUS_WORDS: return True return False #___________________________________________________ def shorten(word): l = len(word) max_letters = int(SHORT_FACTOR * l) word = word[ : max_letters] return word #___________________________________________________ def remove_from_history(cmd): readline.delete_item_from_history(cmd) #pass #___________________________________________________ def organize_history_file(): tmplist = [] hists = open(HISTORY_FILE).read().split('\n') for hist in hists: if len(hist) < 3: continue if hist not in tmplist: tmplist.append(hist) open(HISTORY_FILE, 'w').write(string.join(tmplist, '\n')) readline.read_history_file(HISTORY_FILE) return tmplist #___________________________________________________ def read(prompt, log_in_history=0): if READLINE_SUPPORT: print prompt # due to bug in readlines, doesn't end with <,> reply = readline.readline(histfile=HISTORY_FILE) else: reply = raw_input(prompt) reply = reply.strip() return reply #___________________________________________________ def split_words(txt): for ch in WORD_SPLITTERS: txt = txt.replace(ch, ' ') txt = make_naked(txt) words = txt.split() return words #___________________________________________________ #sqlite quote def quote(word): newword = word.replace('"','""') return newword #___________________________________________________ def get_dict_from_db_results(dbh): field = {} for i, defi in enumerate(dbh.col_defs): field[ defi[0] ] = i return field #___________________________________________________ def clear_screen(): print '\n\n\n\n\n' #___________________________________________________ def main_loop(lang, lang_to): global prompt, DEBUG, POLISH_TEXT_DUMP # --- default language: polish "naked" --- while 1: # ----- read user input ------------- separator() word = read(prompt, 1) cmd = word.lower() # ---------- quit ------ if cmd in ['q', 'exit']: if READLINE_SUPPORT: #rdln.history.remove(cmd) #hist = string.join(rdln.history, '\n') #open(HISTORY_FILE,'w').write(hist) pass #debug! break # ---------- eng to pol ------ if cmd == 'e': remove_from_history(cmd) lang = 'english' prompt = ENGLISH_PROMPT continue # ---------- search history (todo: save to file!) ------ if cmd == 'hh': remove_from_history(cmd) newlist = organize_history_file() if READLINE_SUPPORT: for i,hist in enumerate(newlist): print ' %d> %s' %(i, hist) else: print " >>> please install READLINE.py support. <<< " continue # ---------- pol to eng ------ if cmd == 'p': remove_from_history(cmd) lang = 'naked' prompt = POLISH_PROMPT continue # ---------- file of words ------ if cmd == 'f': remove_from_history(cmd) fname = read(FILE_PROMPT) outfile = fname + '.trans' if not os.path.isfile(fname): print "This file isn't good enough (or doesn't exist)" continue txt = open(fname).read() out = '' words = split_words(txt) for word in words: if word_is_garbage(word): trans = '' else: trans = translate(word, lang, lang_to) out = out + word + ' = ' + trans + '\n' open(outfile,'w').write(out) print "translation written to: " + outfile continue # ---------- help ------ if cmd in ['help', 'h', '?']: remove_from_history(cmd) print MENU_USAGE continue # ---------- nothing... ------ if cmd == '': continue #------------- clear screen ------------------ if cmd == 'cls': clear_screen() remove_from_history(cmd) continue #------------- debug mode ------------------ if cmd == 'dm': remove_from_history(cmd) DEBUG = not DEBUG print "debug mode is now:", DEBUG continue #------------- delete entry ------------------ if cmd == 'dl': remove_from_history(cmd) word_id = read("Delete entry # ", 1) try: word_id = int(word_id) except: continue sql = 'SELECT * FROM polish WHERE word_id="%d"' % word_id dbh = conn.db.execute(sql) rows = dbh.row_list print rows ok = read("Delete [y/n]") if ok in ['y','yes','ok','o']: sql = 'DELETE FROM polish WHERE word_id="%d"' % word_id dbh = conn.db.execute(sql) print "deleted." continue #------------- dump to dic text file ------------------ if cmd == 'du': remove_from_history(cmd) cmd = read('dump to filename [%s]: ' % POLISH_TEXT_DUMP) if cmd == '': cmd = POLISH_TEXT_DUMP if os.path.isfile(cmd): print cmd, 'already exists.' if os.path.isdir(cmd): print POLISH_TEXT_DUMP, 'is a directory.' continue POLISH_TEXT_DUMP = cmd ok= read('continue dumping [y/n]?') if ok in ['y','yes','ok']: do_something_to_all_records('dump') print 'text file written to', POLISH_TEXT_DUMP continue #------------- delete duplicate entries ------------------ if cmd == 'dd': remove_from_history(cmd) ok = read('Delete all duplicate entries, on (polish, english, hebrew). Continue [y/n]?') if ok in ['y','yes','ok']: do_something_to_all_records('delete_duplicate_entries') continue #------------- insert New entry ------------------ if cmd == 'n': remove_from_history(cmd) ok='no' while 1: word = read('--------- new polish word: ') english = read('----- english translation: ') hebrew = read('----------- hebrew [none]: ', 0) part_of_speech = read('----part of speech [none]: ', 0) gender = read('---- gender (m/f/n)[none]: ', 0) source = read('------------- source [me]: ', 0) part_of_speech = part_of_speech.lower() if source == '': source="me" if gender in GENDERS: gender = GENDERS[gender] else: gender = '' polish = input_to_polish(word) naked = make_naked(polish) sql = ''' INSERT INTO polish (polish, naked, english, hebrew, source, part_of_speech, gender) VALUES("%s", "%s", "%s", "%s", "%s", "%s", "%s") ''' %( polish,naked,english,hebrew,source, part_of_speech, gender) print sql ok = read("k, ry again, ancel: ") if ok in ['y','yes','Y','Yes','ok','O','o','Ok']: dbh = conn.db.execute(sql) print "id: %d inserted." % conn.db.sqlite_last_insert_rowid() break elif ok in ['c','C','cancel','Cancel', 'q','Q','exit','Quit']: break continue #------------- arbitrary SQL query ---------------- if cmd == 'sqlsql': remove_from_history(cmd) while 1: sql = read('SQL statemnent ("q" to quit): ') if is_query_dangerous(sql): print 'Query is not safe. aborting.' break if sql.lower() in ['q', 'quit']: break #print sql ok = read("Query Database? [Yes/No/Quit]: ") remove_from_history(ok) if ok.lower() in ['q','quit']: break if ok.lower() in ['y','yes']: try: dbh = conn.db.execute(sql) for row in dbh.row_list: print row #print "id: %d inserted." % conn.db.sqlite_last_insert_rowid() except: print "Error in sql or something" continue # -- if here, it's not a command. # user input is a word, translate it. --- clear_screen() #separator() translate(word, lang, lang_to) #___________________________________________________ def translate(word, lang, lang_to): global prompt, DEBUG progress_indicator(True) # ---------- translation -------- queries=0 where = { 1: '%s = "%s"' %(lang, word), 2: '%s LIKE "%s%%"' %(lang, word), 4: '%s LIKE "%s%%"' %(lang, shorten(word)), 3: '%s LIKE "%%%s%%"' %(lang, word), 5: '%s LIKE "%%%s%%"' %(lang, shorten(shorten(word))), } for phase in where: progress_indicator() sql = '''SELECT word_id, polish, naked, english, french, source, part_of_speech, gender FROM polish WHERE %s limit %d''' %(where[phase], RESULTS_LIMIT) try: dbh = conn.db.execute(sql) except: print "fatal database error: \n" print sql print os.path.abspath(os.curdir) print DB_NAME sys.exit() field = get_dict_from_db_results(dbh) rows = dbh.row_list for row in rows: #progress_indicator() polish = row[field['polish']] naked = row[field['naked']] english = row[field['english']] french = row[field['french']] word_id = row[field['word_id']] source = row[field['source']] part = row[field['part_of_speech']] gender = row[field['gender']] #swedish = row[field['swedish']] #hebrew = row[field['hebrew']] if part in ('None', '(?)', '(?2)', '(?3)', ''): part = '' else: part = "[%s]" % part if gender in GENDERS.values(): gender = '[%s] ' % gender else: gender = '' if SHOW_NAKED and naked != polish: naked = ": " + naked else: naked = '' if english: translation = english elif french: translation = french else: continue #elif hebrew: translation = hebrew #elif swedish: translation = swedish queries += 1 desc = phase_description[phase] if DEBUG: debug_info = "%d:" % word_id else: debug_info = '' #if READLINE_SUPPORT: print print "\r ", print "\r%s%s> %s %s%s\n %s %s" %(debug_info,source,polish,gender,naked,translation,part) #separator() if queries: break #print "phase ",phase if not queries: translation = '' print " --- not found ---- " return translation #___________________________________________________ # 'make_all_naked' # 'normalize' # 'fix' # 'dump' # def do_something_to_all_records(something) : global POLISH_TEXT_DUMP debug2(something, 'something') # --- permanent vars ---- if something == 'fix': where = "WHERE polish like '%(<%'" print "working quietly.\nWHERE = %s...\r\n" %where elif something == 'dump': where = "ORDER BY polish" fname = POLISH_TEXT_DUMP print "working quietly.\nWHERE = %s...\r\n" %where elif something == 'make_all_naked': where = '' elif something == 'delete_duplicate_entries': sql1 = 'SELECT polish,english,hebrew FROM polish' dbh = conn.db.execute(sql1) rows = dbh.row_list num_rows = dbh.rowcount print "rows in query: ", dbh.rowcount progress = 0.0 all_output = '' i=0 for row in rows: sql2 = ''' SELECT word_id FROM polish WHERE polish="%s" AND english="%s" AND hebrew="%s" ''' % (row[0], row[1], row[2]) dbh2 = conn.db.execute(sql2) rows2 = dbh2.row_list ids = map(lambda lst:str(lst[0]), rows2) print 'ids :', ids ids_to_del = ids[1:] ids_to_del = ','.join( ids_to_del ) print 'ids DEL:', ids_to_del sql_del = 'DELETE FROM polish WHERE word_id IN (%s)' % ids_to_del #dbh = conn.db.execute(sql_del) sleep(1) #dbh = conn.db.execute(sql) print sql open('polish-dups.sql','w').write(sql) print 'written to polish-dups.sql' return else: where = '' sql = "SELECT * FROM polish " + where print "SQL select:",sql dbh = conn.db.execute(sql) field = get_dict_from_db_results(dbh) rows = dbh.row_list num_rows = dbh.rowcount print "rows in query: ", dbh.rowcount progress = 0.0 all_output = '' i=0 for row in rows: i += 1 word_id = row[0] english = row[field['english']] french = row[field['french']] hebrew = row[field['hebrew']] naked = row[field['naked']] part = row[field['part_of_speech']] polish = row[field['polish']] source = row[field['source']] gender = row[field['gender']] prog = 100 * progress // num_rows progress += 1 if something == 'make_all_naked': naked1 = make_naked(polish) if naked == naked1: continue naked = quote(naked1) sql = 'UPDATE polish SET naked="%s" WHERE word_id=%d' %(naked, word_id) #print sql dbh = conn.db.execute(sql) #print "%d [%s -> %s]" %(prog, polish, naked) elif something == 'dump': if english != '': trans = english elif french != '': trans = french else: continue if part in ('(?)', '(?2)', '(?3)', '?'): part = '' else: part = "[%s]" % part if naked != polish: naked = naked + "\n" else: naked = '' ret = "%s> %s%s\n%s %s\n\n" %(source, naked, polish, trans, part) all_output = all_output + ret print "%", prog, "\r", elif something == 'fix': fixed_polish = fix(polish) reg = '\(<([^)>]+)>\)' ret[0] = '' ret[1] = '' ret[2] = '' pos = ereg( reg, polish, ret ) the_whole_thing = ret[0] the_thing = ret[1] len_of_the_whole_thing = len( the_whole_thing ) if len_of_the_whole_thing < 3: continue the_thing = english.strip + ";" + the_thing.strip the_thing = the_thing.replace(',', ';') english_words = the_thing.split(';') #english_words = array_unique(english_words) english_joined = string.join(english_words, ' / ') newenglish = english_joined #newpolish = string.strip(replace(the_whole_thing, '', polish)) newnaked = make_naked(newpolish) #newnaked = addslashes(newnaked) #newpolish = addslashes(newpolish) #newenglish = addslashes(newenglish) sql = '''update polish set polish = "%s", english = "%s", naked = "%s" where word_id=word_id''' %(newpolish,newenglish,newnaked) print sql fixed_polish = fix(polish) reg = '\(<([^)>]+)>\)' ret[0] = '' ret[1] = '' ret[2] = '' pos = ereg( reg, polish, ret ) the_whole_thing = ret[0] the_thing = ret[1] len_of_the_whole_thing = len( the_whole_thing ) if len_of_the_whole_thing < 3: continue the_thing = english.strip + ";" + the_thing.strip the_thing = the_thing.replace(',', ';') english_words = the_thing.split(';') #english_words = array_unique(english_words) english_joined = string.join(english_words, ' / ') newenglish = english_joined #newpolish = string.strip(replace(the_whole_thing, '', polish)) newnaked = make_naked(newpolish) #newnaked = addslashes(newnaked) #newpolish = addslashes(newpolish) #newenglish = addslashes(newenglish) sql = '''update polish set polish = "%s", english = "%s", naked = "%s" where word_id=word_id''' %(newpolish,newenglish,newnaked) elif something == 'delete_duplicate_entries': pass else: print "switch ERRRO!!!\n\r\n" # while if all_output: print "writing output to: " + fname open(fname,'w').write(all_output) #if ids_to_delete: #___________________________________________________ def make_naked(word) : global conv, characters_require_conversion word = word.strip() for c in characters_require_conversion: word = word.replace(c, conv[c]) word = word.lower() return word #___________________________________________________ # fix def fix(word) : word = string.strip(word) word = word.replace(' ', ' ') return word #___________________________________________________ def input_to_polish(s): s = s.lower() for enc in other_encodings: for i, to_char in enumerate( standard_encoding ): from_char = enc[i] #if s.find(from_char)>=0: print from_char, to_char s = s.replace(from_char, to_char) if '`' in s or '.' in s: print "POSSIBLE ERROR in input !!!!" return s #___________________________________________________ def normalize_polish() : sql = "SELECT word_id, polish FROM polish" # ".where dbh = conn.db.execute(sql) field = get_dict_from_db_results(dbh) rows = dbh.row_list i=0 for row in rows: word_id = row[field['word_id']] polish = row[field['polish']] fixed_polish = fix(polish) if (fixed_polish != polish) : fixed_polish = quote(fixed_polish) sql = '''update polish SET polish="%s" WHERE word_id=%d ''' %(fixed_polish, word_id) print sql print fixed_polish #conn.db.execute(sql) # ------------ end function declarations ------------- # ---------------------- MAIN ---------------- if __name__ == '__main__': if READLINE_SUPPORT: organize_history_file() debug2('reading history from %s\n' % HISTORY_FILE) try: if not os.path.isfile(HISTORY_FILE): open(HISTORY_FILE,'w').write('') readline.read_history_file(HISTORY_FILE) #HISTORY_FILE = [] # bug in readline except: debug2("history file require solutions: %s\n" %HISTORY_FILE) conn = sqlite.connect(DB_NAME) where = [0,0,0,0,0] #separator() lang_from = DEFAULT_LANG_FROM lang_to = DEFAULT_LANG_TO if len(sys.argv)>1: args = sys.argv word = args[1] if len(args)>2: lang_from = args[2] if lang_from not in LANGUAGES: lang_from = DEFAULT_LANG_FROM if len(args)>3: lang_to = args[3] if lang_to not in LANGUAGES: lang_to = DEFAULT_LANG_TO print translate(word, lang_from, lang_to) else: main_loop(lang_from, lang_to) sys.exit() # ------------ end of file -----------------#