#!/usr/bin/perl
use List::MoreUtils qw(uniq);
if ($ARGV[0] =~ /-h|--help/i) {
	print "$0 , show default";
	print "\n$0 searchterm , show count of searchterm";
	print "\n$0 searchterm - , show count searchterm and show bot hits";
	print "\n$0 0 - , show bot hits, no searchterm";
	print "\n$0 -y , add switch anywhere to show yesterday's logs\n";
	exit;
}

my $yesterday;
my $stringtocheck = $ARGV[0];
my $stringtocheckcount = 0;
my $real = 0;
my %whichbots = ('bingbot','0','Googlebot','0','Baiduspider','0','YandexBot','0','Sosospider','0','DoCoMo','0','Yahoo\! Slurp','0','MJ12bot','0','discobot','0','ScoutJet','0','SurveyBot','0','DotBot','0','Exabot','0','Ezooms','0','Search17Bot','0',  'crawler4j','0','ips-agent','0','YandexImages','0','NerdByNature.Bot','0','msnbot','0','Sogou','0','MLBot','0','SISTRIX','0', 'Bender','0','SBIder','0','Nutch','0','Covario-IDS','0','SeznamBot','0','Aboundex','0','AhrefsBot','0','AcoonBot','0','JikeSpider','0','EasouSpider','0','ProCogBot','0','OpenindexSpider','0');
my $bots = 0;
my %whichIP = ('10.13.37','0','127.0.0.1','0');
my $myips = "192.168.1.1|10.13.37|127.0.0.1";
my $me=0;
my $filter = "meh.cs|tech_avatar.jpeg|xraykuh.jpg|bitcoin.png|favicon.ico|judge.php";

foreach my $switch (@ARGV) {
	$yesterday = ".1" if $switch =~ /^-y$/;
}

my @logentries;
my @unique;
open(LOG, "/var/log/nginx/access.log$yesterday");
open(DEBUG, "/home/superkuh/tests/debug.txt");
while((defined (my $line = <LOG>))){

	next if $line =~ /$filter/;

	if($line =~ /($myips)/) {
		$me++;
	}

	$line =~ s/\[.*\]//;
	$line =~ s/HTTP\/1\.(1|0)\"(\d|\s)+\"//;
	$line =~ s/- -  //;
	push(@logentries, $line);
}
@unique = uniq(@logentries);

foreach my $line (@unique) {
	next if botcheck($line);
	if($line =~ /$myips/) {
		#if ($ARGV[1] eq '-') {print $line;}
		#$me++;
	}
	else {
		$real++;
		if ($line =~ /$stringtocheck/i) {$stringtocheckcount++}
		# $line =~ s/- -  //;
		print $line;
	}
}

print "\n";
if ($ARGV[0]) {
	# if ($stringtocheckcount) {
	print "$stringtocheck => $stringtocheckcount" unless $stringtocheck =~ /^-y$/;
}
print "\nreal => $real\nme => $me\nbots total => $bots\n\n";

for my $key ( keys %whichbots ) {
	my $value = $whichbots{$key};
	print "$key => $value, " if ($value);
}
print "\n";




sub botcheck {
	my $line = shift;
	for my $key (keys %whichbots) {
		if ($line =~ /$key/) {
			$whichbots{$key}++;
			$bots++;
			if ($ARGV[1] eq '-') {
				# $line =~ s/- -  //;
				print $line;
			}
			return 1;
		}
	}
	return 0;
}

__DATA__

173.218.146.124 - -  "GET /library/Electronics/High%20Voltage%20Engineering%20Fundamentals_%202nd%20Ed_%20EKuffel%20WSZaengl%20JKuffel.pdf HTTP/1.1" 206 69537 "http://www.superkuh.com/library/Electronics/High%20Voltage%20Engineering%20Fundamentals_%202nd%20Ed_%20EKuffel%20WSZaengl%20JKuffel.pdf" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30"

173.218.146.124 - -  "GET /library/Electronics/High%20Voltage%20Engineering%20Fundamentals_%202nd%20Ed_%20EKuffel%20WSZaengl%20JKuffel.pdf HTTP/1.1" 206 81998 "http://www.superkuh.com/library/Electronics/High%20Voltage%20Engineering%20Fundamentals_%202nd%20Ed_%20EKuffel%20WSZaengl%20JKuffel.pdf" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30"

93.75.59.29 - - [10/Jul/2011:16:00:55 -0500] "GET /library/Electronics/Advances%20in%20High%20Voltage%20Engineering_%20Haddad%20Warne.pdf HTTP/1.1" 206 15687 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3)"

93.75.59.29 - - [10/Jul/2011:16:00:56 -0500] "GET /library/Electronics/Advances%20in%20High%20Voltage%20Engineering_%20Haddad%20Warne.pdf HTTP/1.1" 206 13609 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3)"

93.75.59.29 - - [10/Jul/2011:16:01:18 -0500] "GET /library/Electronics/Advances%20in%20High%20Voltage%20Engineering_%20Haddad%20Warne.pdf HTTP/1.1" 206 2448800 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3)"
