joshp |
10-15-2012 01:23 PM |
Python pull logs based on time.
Hello all,
I was asked to modfiy a script so that we could pull logs based on the time. Having a hard time working out the best way of doing so. The sciprt that I have is following.
Code:
#!/usr/bin/python
import argparse
import re
import string
from datetime import datetime
from datetime import time
import time
parser = argparse.ArgumentParser(description='Process The PreDealRisk Daemon Log File.')
parser.add_argument('file' , metavar='file',type=file,help='The log file to parse')
parser.add_argument('-c','--control', action='store_true',help='Show the control messages in the logs')
parser.add_argument('-e','--error', action='store_true',help='Filter for error messages in the logs')
parser.add_argument('-i','--info', action='store_true',help='Filter for info messages in the logs')
parser.add_argument('-s','--stats', action='store_true',help='Show the performance statistics')
parser.add_argument('-r','--response',metavar='Time',action='store',type=float,default=0,help='Show OrderIDs with response times greater than this number of ms')
parser.add_argument('-o','--order',metavar='ID', action='store',help='Show the logs for the specific Order ID')
parser.add_argument('-w','--warn', action='store_true',help='Filter for warning messages in the logs')
parser.add_argument('-t','--time', action='store_true',help='Filter based on time window')
parser.add_argument('-tw','--time-window',action='store_true',help='Time window size default is 5min')
args= parser.parse_args();
# Regex
predealrisk_regex = re.compile(r'\[(\d+-\w+-\d+\s\d+:\d+:\d+.\d+)\]\[(\w+)\]\[(...([\w\/]+.\w+):(\d+))\]\[(\w+)\]\s([\d\w\s\]\[!"#$%&\'()*+,./:;<=>?@\^_`{|}~-]+)')
orderid_regex = re.compile(r'[\w\d\s\]\[!"#$%&\'()*+,./:;<=>?@\^_`{|}~-]+\[ id:([\w\d-]+) ')
response_regex = re.compile(r'Responding with final verdict of')
finish_regex = re.compile(r'[\d]+ orders on this account')
# Variables
window=5
order_id=""
start_time=None
elapsed_time_in_ms=0
elapsed_times = []
parse_error=False
# Parse each line of the log file
for i in args.file:
match = predealrisk_regex.match(i)
if match:
#print match.group(0)
date_string = match.group(1)
error_code = match.group(2)
code_file = match.group(4)
line_number = match.group(5)
thread = match.group(6)
message= match.group(7)
# Look for a line where we are checking for an order
order_match= orderid_regex.match(message)
if order_match and order_match.group(1) <> order_id: #very rarely we get a duplicate request... this should not change the timings.
order_id= order_match.group(1)
start_time=datetime.strptime(date_string,"%d-%b-%Y %H:%M:%S.%f")
# Look for the line where the logs indicate we are responding
if response_regex.match(message):
td= (datetime.strptime(date_string,"%d-%b-%Y %H:%M:%S.%f") - start_time)
elapsed_time_in_ms= float((td.microseconds + (td.seconds + td.days * 24 * 3600) * 1000000))/1000000
elapsed_times.append(elapsed_time_in_ms)
# Set conditions
is_control = not args.control or (args.control and (thread=="A" or thread=="B" or thread=="D"))
is_error = not args.error or (args.error and error_code=="ERR")
is_info = not args.info or (args.info and error_code=="INF")
is_warn = not args.warn or (args.warn and error_code=="WRN")
is_orderid = args.order is None or args.order==order_id
is_no_detail = not args.control and not args.error and not args.warn and args.response==0 and args.order is None
is_slow_response = args.response > 0 and elapsed_time_in_ms>=args.response
# Print the appropriate data depending on the conditions.
if is_error and is_control and is_warn and is_orderid and args.response == 0 and not is_no_detail:
print i.strip('\n')
elif is_slow_response:
print "OrderId: " + order_id + ", Elapsed Time for Order Checking(ms): " + str(elapsed_time_in_ms)
# This needs to be done after we print to get all the log lines before we print the timing.
finish_match =finish_regex.match(message)
if finish_match:
if args.order==order_id:
print "\nElapsed Time for Order Checking(ms): " + str(elapsed_time_in_ms)
order_id=""
# Reset elapsed_time
elapsed_time_in_ms=0
else:
parse_error=True
if args.stats:
elapsed_times.sort()
print "\nOrder Stats"
print "-----------------------"
print " Average Time (ms): "+str(sum(elapsed_times)/len(elapsed_times))
print " 10th Percentile (ms): " + str(elapsed_times[int(len(elapsed_times)*0.10)])
print " 50th Percentile (ms): " + str(elapsed_times[int(len(elapsed_times)*0.50)])
print " 90th Percentile (ms): " + str(elapsed_times[int(len(elapsed_times)*0.90)])
print " Max Time (ms): " + str(elapsed_times[len(elapsed_times)-1])
if parse_error:
print "\n\n PARSE ERROR: Something is wrong with the regex, please check this script.
I was thinking of doing something with the datetime module, but I am new enough to python that I am lost. Any ideas?
Josh
|