#!/usr/bin/python '''------------------------------------------------- replacer.py replaces strings in text files. a recursive grep, too usage: replacer.py [--old='text to find'] [--new='replacemnt'] [--ignore] [--verbose] [--recursive] [--regex] [--hex] [--ascii] [--include=] [--exclude=] [--file=] [--nobackup] [--search='text to find'] or replacer.py ? go into interactive mode description: might be: file name to search, directory name (all files in it are searched) - (minus sign) for a list of files from STDIN eg: ls /path/*.txt | replacer - ? (question mark) for interactive session --recursive (-r) all dir and sub-dirs are looked, too --case (-c) case SENSITIVE. default is iNSenSiTiVE. --regex (-R) use regular expressions in search. Backslash are processed, eg. "\\n" is newline Backreferences ("\\6"), are replaced with substring match by (sixth) group found in OLD. Grouping is done with (), eg: "b+(fu).b(ar)" --search= (-s) Only search and display matching snippet. Do not write anything to disk or files. --nobackup (-b) DO NOT save backup of changed file --include= (-i) include only these files from comma-separated list, e.g.: --include="*.bak,george.doc,*.zip" --exclude= (-e) comma-separated files to exclude from list default is "*.bak" --old= (-o) the old string to match --old=@filename read old string from filename, if exists --new= (-n) the new replacing string. --new=@filename read new string from filename, if exists --hex (-x) hexadecimal (comma delimited pairs). e.g.: replacer -x --new=FF,A2,7D,09 --ascii (-a) ascii-code (comma delimited). e.g.: replacer -c --new=25q,12,129,6 --verbose (-v) show progress --margin=n (-m) show n letters before/after found text --interactive interactive mode --file= (-f) an additional file to be processed --debug (-d) dont mess here BUG: simple search (non regex) is always case-sensitive''' # --confirm show "r u sure?" before each replacement. change_log=''' Mon Oct 25 11:23:14 2004 2004-10-30: command line changes Nov 1 2004: regex flags bug fix 2005-02-03: old and new string can be read from files ''' import sys, os, time, string, re, getopt, fnmatch #try: import readline files_changed = 0 DEBUG=False BACKUP_EXT = '.bak' # ------------- functions ------------------------------------------- def debug(s): if DEBUG: print '!',s def usage(error_msg='', exit_code=0): print __doc__ print error_msg sys.exit(exit_code) def get_snippet(s, start, stop, snippet_margin): ''' get a piece of text with extra margins around. todo: move this func to external library''' max=len(s) if start > snippet_margin: start = start - snippet_margin #else: start=start if max - stop < snippet_margin: stop = max - stop else: stop = stop + snippet_margin snippet = s[ start : stop ] return snippet def safe_split(s, sep=None, trim=False): if type(s) != type('string'): return [] l = s.split(sep) newl = [] for s in l: if '' != s: if trim: s.strip() newl.append(s) return newl def hex2dec(hx): return int(hx, 16) def hex2chr(hx): if not hx: return '' l=safe_split(hex, ',', True) s='' for hexa in l: if len(hexa)!=2: usage('hex not in range(00-FF)') i=hex2dec(hexa) if i<0 or i>256: usage('hex not in range(00-FF)') s += chr(i) return s def chr2str(ch): if not ch: return '' l= safe_split(ch, ',' , True) s='' for ch in l: i=int(ch) if i<0 or i>256: usage('char not in range(0-255)') s += chr(i) return s class Replacer: def __init__(self): self.file_count = 0 self.files_changed = 0 def get_options(self): args = {} try: opts, arg_names = getopt.getopt(sys.argv[2:], "abcde:f:hi:Im:n:o:rRs:vx", [ 'ascii', 'case', 'confirm', 'debug', 'exclude=', 'file=', 'help', 'hex', 'include=', 'interactive', 'margin=', 'new=', 'nobackup', 'old=', 'recursive', 'regex', 'search=', 'stdin', 'verbose',]) except: usage('\n\n unknown or invalid command-line argument\n') self.path = sys.argv[1] if self.path in ('?', '--interactive'): self.interactive = True else: self.interactive = False # -- defaults: self.list = [] self.debug = 0 self.backup = True self.confirm = False self.new = '' self.old = '' self.hex = False self.include = [] self.exclude = ['*.bak'] self.ascii = False self.regex = False self.flags = re.M + re.S #global module regex flags. self.ignore = True self.verbose = 0 self.recursive = False self.search_only = False self.snippet_margin = 12 for arg_name, arg_value in opts: if arg_name in ('-h', '/?', '--help'): usage(exit_code=1) elif arg_name in ('--debug', '-g'): self.debug = True elif arg_name in ('--nobackup', '-b'): self.backup = False elif arg_name in ('--new', '-n'): self.new = self.filecontents_or_string(arg_value) elif arg_name in ('--old', '-o'): self.old = self.filecontents_or_string(arg_value) elif arg_name in ('--hex', '-x'): self.hex = True elif arg_name in ('--ascii', '-a'): self.ascii = True elif arg_name in ('--regex', '-R'): self.regex = True elif arg_name in ('--recursive', '-r'): self.recursive = True elif arg_name in ('--ignore', '-c'): self.ignore = False elif arg_name in ('--margin', '-m'): self.snippet_margin = int(arg_value) elif arg_name in ('--include', '-i'): self.include = safe_split( arg_value, ',') elif arg_name in ('--exclude', '-e'): self.exclude = safe_split( arg_value, ',') elif arg_name in ('--verbose', '-v'): self.verbose = True elif arg_name in ('--file', '-f'): self.list.append(arg_value) elif arg_name in ('--interactive', '?'): self.interactive = True elif arg_name in ('--confirm'): self.confirm = True elif arg_name in ('--search', '-s'): self.search_only = True self.old = arg_value ### done. elif arg_name in ('--stdin', '-'): self.list = sys.stdin.read().split('\n') def process_options(self): # ---- path or file --------- if self.path in ['stdin','STDIN','-']: self.list = sys.stdin.read().split('\n') elif os.path.isfile( self.path): self.list = [self.path] elif os.path.isdir( self.path): if not self.recursive: #deal with recursion later self.list = os.listdir( self.path) # ----- regular expressions ---------- if self.regex: self.flags = re.S + re.M if self.ignore: self.flags += re.I self.regex = re.compile(self.old, self.flags) # ------- search and replace ------------ if not self.old: usage("can't search for empty text!", 3) if self.new==0: usage("please provide NEW text (i.e. the replacement).\nat least write '' to indicate 'nothing'", 3) #self.old = raw_input('OLD text >') #if len(self.old) < 1: #if len(self.old) < 1: # ans = raw_input('replace with empty txt (y/n) ') # if ans.lower() not in ['y','ok','yes']: # usage("ok, so dont.", 3) # -------- input coding ----------- if self.ascii: self.old = chr2str(self.old) self.new = chr2str(self.new) elif self.hex: self.old = hex2chr(self.old) self.new = hex2chr(self.new) def fill_options_interactively(self): print '----------------------------------------' print ' replacer.py ' print ' replaces strings in text files. ' print ' for more help: replacer.py --help ' print ' interactive mode activated. ' print '----------------------------------------' q = '''\n--where to search? write either: - a FILE NAME to search, - a directory (all files in it are searched) - or STDIN for a list of files via standard-input (e.g.: find /path/*.txt | replacer) [default: current directory]\n>> ''' ans = raw_input(q) ans = ans.strip() if ans=='-': self.list = sys.stdin.read().splitlines() elif ans=='': self.path = '.' else: self.path = ans q = "\n--do all dir and sub-dirs are looked, too? (y/n) [n]" ans = raw_input(q) ans = ans.strip().lower() if ans in ('y','yes'): self.recursive=True else: self.recursive=False q = "\n--is search case SENSITIVE? do CaPitaL letters make difference in search? (y/n) [n]" ans = raw_input(q) ans = ans.strip().lower() if ans in ('y','yes'): self.case=True else: self.case=False q='''\n--use regular expressions in search? - Backslash are processed, eg. "\\n" is newline. - Grouping is done with (), eg: "b+(fu).b(ar)". - Backreferences (eg. "\\6"), are replaced with substring match by N-th group found in OLD. (y/n) [n] ''' ans = raw_input(q) ans = ans.strip().lower() if ans in ('y','yes'): self.regex=True else: self.regex=False #q="\n--Replace old text with new, or just Search for a match? (r/s) [r]" #ans = raw_input(q) #ans = ans.strip().lower() #if ans in ('r','replace'): self.search_only = False #else: self.search_only = True #q='''\n--show "r u sure?" before each replacement? (y/n) [n]''' #ans = raw_input(q) #ans = ans.strip().lower() #if ans in ('y','yes'): self.confirm=True #else: self.confirm=False q = "\n--save backup of changed file? (y/n) [y]" ans = raw_input(q) ans = ans.strip().lower() if ans in ('n','no'): self.backup=True else: self.backup=False q = '''\n--include only these files (comma-separated list) e.g.: *.bak,john.doc,*.zip Leave empty to use above file (or all files if above is directory) >> ''' ans = raw_input(q) ans = ans.strip() if ans: self.include = ans #--exclude= (-e) comma-separated files to exclude from list q = '''\n--exclude these files (comma-separated list) e.g.: *.bak,john.doc,*.zip \n>> ''' ans = raw_input(q) ans = ans.strip() if ans: self.exclude = ans q = "\n--the old string to match >> " #--old=@filename read old string from filename, if exists ans = raw_input(q) #ans = ans.strip().lower() if not ans: sys.exit(3) else: self.old=ans q = "\n--the new, replacing string >> " #--new=@filename read new string from filename, if exists ans = raw_input(q) #ans = ans.strip().lower() if not ans: print "empty string r u sure? sys.exit(3)" else: self.new=ans #--hex (-x) hexadecimal (comma delimited pairs). # e.g.: replacer -x --new=FF,A2,7D,09 #--ascii (-a) ascii-code (comma delimited). # e.g.: replacer -c --new=25q,12,129,6 #--verbose (-v) show progress #--margin=n (-m) show n letters before/after found text q = '''\n--is this what you want? filename or directory: %s look for text: %s replace with: %s regular expressions: %s only include files: %s exclude files: %s create backups: %s CaSe SensiTive: %s look in sub-dirs: %s (es, o, uit) [y] ''' %( self.path, self.old, self.new, self.regex, self.include, self.exclude, self.backup, self.case, self.recursive) ans = raw_input(q) ans = ans.strip().lower() if ans in ('y', 'yes', ''): return True else: sys.exit(0) # ------------------------------------------ def filecontents_or_string(self, s): if len(s) > 1: if s[:1] == '@': f = s[1:] if os.path.isfile(f): s = open(f).read() return s def check_inclusion(self, file): '''is file included/excluded? (this might seem illogical at first). if user chose to include something, it means exclusive inclusion of that thing, thus the rest is not included.''' if self.include: debug( "have inc") match_include = False for inc in self.include: if fnmatch.fnmatch(file, inc): match_include = True break else: match_include = True match_exclude = False for exc in self.exclude: if fnmatch.fnmatch(file, exc): debug( exc+ "have exc") match_exclude = True break #debug( match_include match_exclude) if match_include and not match_exclude: return True return False def process_file_list(self): list = self.list for file in list: self.process_file(file) def recurse(self): os.path.walk(self.path, self.caller, None) def caller(self, x, dirname, files): for file in files: fullname = dirname + os.sep + file self.process_file( fullname ) #except: print "error in file: %s" % fullname def process_file(self, file): if not os.path.isfile(file): return if not self.check_inclusion(file): return self.file = file self.file_count += 1 self.load_file() found = self.replace() if found: saved = self.save_file() if saved: self.files_changed += 1 print "--- above found at: '%s'" % file elif self.verbose: print file def load_file(self): #try: self.oldtext = open(self.file).read() #debug('debug: file: '+ self.file) #debug('debug: oldtext: %d'%len(self.oldtext)) #except: #print "--- error in %s" % self.file def save_file(self): if self.search_only: return True file = self.file if self.backup: try: os.rename(file, file + BACKUP_EXT) except: print "backup can't be created. ignoring this file!" return False try: open(file, 'w').write(self.newtxt) # --- commented becuase of unknown exception handling #except IOError, (err_num, err_msg): # if err_num ==13: # file = file + ".out" # print "Read-only or locked file. Saving %s instead" % file # open(file, 'w').write(self.newtxt) except: print "------ error writing file: " + file return False return True def replace(self): '''todo - it would be nice to know how many replacments occured''' txt = self.oldtext found = False if self.regex: reg = self.regex search_result = reg.search(txt) if search_result: found = True if self.search_only: start,stop = search_result.span() s = get_snippet ( txt, start, stop, self.snippet_margin ) print '...', s, '...' else: self.newtxt = reg.sub(self.new, txt) else: # not regular expression: pos = txt.find(self.old) if pos >= 0: found = True if self.search_only: start = pos stop = pos + len( self.old ) s = get_snippet ( txt, start, stop, self.snippet_margin ) print '...', s, '...' else: self.newtxt = txt.replace(self.old, self.new) return found # ---------------------- MAIN ---------------- if __name__ == '__main__': if len(sys.argv) < 2: usage('what, no arguments?!', 4) # --- read command line replacer = Replacer() replacer.get_options() if replacer.interactive: replacer.fill_options_interactively() replacer.process_options() file_count = 0 files_changed = 0 # --- for 'file' or 'list' modes: ---- if replacer.recursive: replacer.recurse() elif replacer.list: replacer.process_file_list() print "files watched: " + str(replacer.file_count) if replacer.search_only: print "no changes done." else: print "files changed: " + str(replacer.files_changed)