##********************** Code Begins ***********************************
/usr/bin/env python
import sys ,re, os
LOG_PAT = r'''(?P<ip_address>.*?) -(?P<unknown>.*?)- \[(?P<date>.*?)\] "(?P<method>.*?) (?P<page>.*?)(?P<querystr>\?.*?) (?P<protocol>.*?)" (?P<code>\d*) (?P<bytes>.*?) "(?P<referer>.*?)" "(?P<useragent>.*?)"'''
log_reg = re.compile(LOG_PAT)
### Function ###
def parse(input):
"""
Same functionality as parse() but using regular expression.
Need further customization to work with clickTAG after query string.
"""
output = {}
m = log_reg.search(input)
if m:
output['ip_address'] = m.group('ip_address')
output['unknown'] = m.group('unknown')
output['date_time'] = m.group('date')
output['method'] = m.group('method')
output['request'] = m.group('page')
output['query'] = m.group('querystr')
output['protocol'] = m.group('protocol')
output['return_code'] = m.group('code')
output['return_byte'] = m.group('bytes')
output['refering_url'] = m.group('referer')
output['user_agent'] = m.group('useragent')
return output
### Main Routine ###
if __name__ == "__main__":
input_text = """
24.102.200.210 - - [10/Oct/2003:16:35:30 -0700] "HEAD /something/468x60maximize.gif?name=temp HTTP/1.1" 304 0 "http://www.fakedomain.com/fakepage.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Rogers Hi-Speed Internet; Junky "
"""
log_hash = parse(input_text)
for x in log_hash.keys():
print "%s : %s"%(x,log_hash[x])
### Output ###
refering_url : http://www.fakedomain.com/fakepage.html
date_time : 10/Oct/2003:16:35:30 -0700
protocol : HTTP/1.1
unknown :
request : /something/468x60maximize.gif
return_code : 304
user_agent : Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Rogers Hi-Speed Internet; Junky
return_byte : 0
query : ?name=temp
ip_address : 24.102.200.210
method : HEAD
##********************** Code Ends ***********************************
|