您的輸入數據很短,可以對其進行徹底測試。在這裏,您有一個awk
腳本,它可以或多或少地處理您正在查找的內容。它是完全註釋,以便從這裏你可以修改它以滿足您的需求:
內容
script.awk
:
BEGIN {
header = sprintf("\t%-10s\t%10s\t%10s\t%10s\t%10s\t%10s", "AVG", "<1ms", "1-10ms", "10-100ms", "100-500ms", "500+ms")
## Output slices if time.
slices = "1 10 100 500"
split(slices, slices_a)
## Hardcoded start and end times.
start_time = mktime("2010 02 10 10 00 00")
end_time = mktime("2010 02 10 11 00 00")
}
{
## Extract hour, minute and second from time.
fields = split($2, time, /[:.]/)
if (fields != 4) { print "WARNING: Skipped line " FNR " because had bad formatted time." }
## Save previous time to be able to compare if a second has passed. First line is
## a special case because there is not yet a saved value.
if (FNR == 1) {
prev_time = mktime("2010 02 10 " time[1] " " time[2] " " time[3])
}
else {
curr_time = mktime("2010 02 10 " time[1] " " time[2] " " time[3])
## When a second has passed, print all extracted data.
if (curr_time - prev_time > 59) {
print_minute_info(duration, prev_time, header, slices_a)
## Initialize data.
prev_time = curr_time
delete duration
}
}
## For each name (last field) concatenate durations.
duration[ $NF ] = duration[ $NF] "|" $3
}
END {
print_minute_info(duration, prev_time, header, slices_a)
}
## Traverse hash with following format (example):
## duration[ bad_gateway ] = "|34.567|234.918|56.213|"
##
## So, for each key split with pipe, sum its values and try to
## print a formatted output.
function print_minute_info(duration,prev_time,header,slices_a, name,sum,times,times_a,num_times,i,times_avg,printed) {
for (name in duration) {
sum = 0
times = substr(duration[name], 2)
split(times, times_a, /\|/)
num_times = length(times_a)
for (i = 1; i <= num_times; i++) {
sum = sum + times_a[i]
}
times_avg = sum/num_times
printf "%s\n", name
printf "%s\n", strftime("%H:%M", prev_time)
printf "%s\n", header
printf "\t%-10s", times_avg
## This part tries to print the number of ocurrences just
## below its header. It can be improved.
for (i = 1; i <= length(slices_a); i++) {
if (times_avg < slices_a[i]) {
printf "%10d\n", num_times
printed = 1
break
}
else {
printf "\t%10s", ""
}
}
if (! printed) {
printf "%10d\n", num_times
}
printf "\n"
}
}
並假設以下infile
:
2010-02-10 10:00:00.000 171.606 bad_gateway
2010-02-10 10:00:00.234 400.680 bad_gateway
2010-02-10 10:00:00.410 212.308 login_from
2010-02-10 10:00:00.601 223.251 bad_gateway
2010-02-10 10:01:00.401 224.251 bad_gateway
2010-02-10 10:01:00.701 225.251 bad_gateway
2010-02-10 10:01:04.401 226.251 login_to
2010-02-10 10:02:04.401 1.251 login_to
運行它想:
awk -f script.awk infile
得出:
login_from
10:00
AVG <1ms 1-10ms 10-100ms 100-500ms 500+ms
212.308 1
bad_gateway
10:00
AVG <1ms 1-10ms 10-100ms 100-500ms 500+ms
265.179 3
bad_gateway
10:01
AVG <1ms 1-10ms 10-100ms 100-500ms 500+ms
224.751 2
login_to
10:01
AVG <1ms 1-10ms 10-100ms 100-500ms 500+ms
226.251 1
login_to
10:02
AVG <1ms 1-10ms 10-100ms 100-500ms 500+ms
1.251 1
顯示示例代碼 – bksi
'我有一個大的逗號分隔的日誌文件...'逗號在哪裏?或者我忘了逗號是怎麼樣的。 – Kent
我的錯,我從csv做了一個乾淨的文件。 – Telcom