
k2011
Novice
Jun 30, 2011, 10:32 AM
Post #7 of 9
(2282 views)
|
|
Re: [FishMonger] need regular expression
[In reply to]
|
Can't Post
|
|
basically Seach log is a c++ executable which mines a datalogs when invoked /searchlog -o datalogs produces logs like this
//line 1 1297286297~1297286297~1297286297~Smpp~25061~http~25061r~AlphaPg~260500004002~UNDEF~0~false~false~false~0:0:0~0~~0::0::0~0~Default~0~unknown_method~Delivered~~1:0:0~0:0:0~~~1005~~9015~~~~121~id:500004002 sub:001 dlvrd:000 subm it date:1102091518 done date:1102091518 stat:DELIVRD err:000 Text:hello world //line 2 /line 3 etc etc my $infile = '/data/datalogs'; my $search = '/source/searchlog'; # C++ executable my $svcname = 'ver10'; my $out_dir = '/homes/outputfiles'; my $records = 0; # count of total records my $found = 0; # count of codes found; my $file_base ='EmailStats'; my $out_file; sub mine_a_log { my ($date, $in, $out) = @_; my $records = 0; # local scope my $found = 0; # local scope open(my $outfh, "> $out") or warn "Couldn't open $out for writing: $!\n" and return; print $outfh <<ESQL; options(silent=(header,feedback))\nload data\ninfile * append into table Email_Stats\nfields terminated by ',' ( hostnameName constant "$host", svcName constant "$svcname", createTime EXPRESSION "to_date('$date','YYYY-Mon-DD')", MCode,\n ToEmail,\n FromEmail\n)\nbegindata ESQL my $cmd_string = "$search -o $infile"; my_split = '^(?:[^~]*~){5}http~(?:[^~]*~){1}(?!8080)~(?:[^~]*~){22}(?:(\d{3,4,5,6,8,9,12})~|[^~]+~[^~]*~(?:(\d{3,4,5,6,8,9,12}))) print "-- executing $cmd_string\n" ;#if $verbose; open( LINE, $cmd_string . ' |' ) || die "Problem: can't fork command ($cmd_string) $! $?\n"; my (%seen, %sent, %rcvd); while (<LINE>) { #loop $records++; /$new_split/o or next; my ($source, $dest) = ($3, $4); $source && do {$sent{$source}++; next;}; # if source is e- code, then dest can't be $dest && do {$rcvd{$dest}++;}; } for (keys %sent) {@{$seen{$_}}= ($sent{$_}, 0)} for (keys %rcvd) {$seen{$_}[1]= $rcvd{$_}} # print all found codes for my $sc (sort {$a<=>$b} keys %seen) { # numerically sorted for human readability print $outfh (join ',',$sc,@{$seen{$sc}}).$/; #code, plus array in csv format } close LINE || warn "Problem while closing datalog piped read: $! $?\n"; close $outfh; return $records, $found; }
|