|
|
#!/usr/bin/perl
############################################
## ##
## CounterLog ##
## by Darryl Burgdorf ##
## (e-mail burgdorf@awsd.com) ##
## ##
## version: 1.22 ##
## last modified: 07/05/01 ##
## license modified: 4/13/06 ##
## copyright (c) 2001 ##
## ##
## latest version is available from ##
## http://awsd.com/scripts/ ##
## ##
############################################
# COPYRIGHT NOTICE:
#
# Copyright 2001 Darryl C. Burgdorf.
#
# This program is free software. You can redistribute it and/or
# modify it under the terms of either:
#
# a) the GNU General Public License as published by the Free Software
# Foundation, either version 1 or (at your option) any later version,
#
# or
#
# b) the "Artistic License" which comes with this program.
#
# You should have received a copy of the Artistic License with this
# module, in the file artistic.txt. If you did not, I'll be glad to
# provide one.
#
# You should have received a copy of the GNU General Public License
# along with this program. If you did not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307.
#
# Selling the code for this program without prior written consent is
# expressly forbidden. Obtain permission before redistributing this
# program over the Internet or in any other medium. In all cases
# copyright and header must remain intact.
# DESCRIPTION:
#
# CounterLog monitors accesses to any pages which contain an SSI code
# referencing it. It can create NCSA-format agent and referer logs,
# which can be handy if your server doesn't happen to already provide
# you with that information. It can also be used to put text-based
# access counters on some or all of your pages. (Requires SSI.)
# VERSION HISTORY:
#
# 1.22 07/05/01 Updated spider/robot filter lists
# 1.21 01/09/01 SCRIPT NOW REQUIRES PERL 5
# Added $HourOffset variable
# Added optional "idiot filter" file
# 1.20 12/25/00 Added spider/robot filter
# Allowed script to be called via JavaScript tag
# 1.13 07/04/98 Rewrote file locking routines
# 1.12 05/02/98 Trapped "run on" URLs
# Modified lock routine cycle time
# 1.11 03/09/98 Corrected new glitch in URL processing
# Made agent & referer logs optional
# 1.10 01/23/98 Added "today's" hits to counter display
# Yet a few more small tweaks
# 1.02 08/06/97 A few small tweaks
# 1.01 05/05/97 Fixed minor bug in file locking
# Minor format changes
# 1.00 02/25/97 Initial "public" release
#########
# SETUP #
#########
# Any page you want monitored should contain somewhere the following
# SSI code:
#
#
#
# (If you've put the CounterLog script somewhere other than in your
# cgi-bin directory, the location should be revised accordingly.)
#
# If you're putting a counter on the page, of course, the SSI code
# should be placed on the page where you want the counter to appear.
# Otherwise, it makes no difference at all where you put it.
# Alternately, you can call CounterLog via a JavaScript tag:
#
#
#
# If you call the script in this manner, you won't need to worry
# about making sure your pages are set to be parsed by the server
# for SSI content. Of course, on the down side, you won't be able
# to count hits from anyone who visits your page with a browser that
# doesn't support JavaScript, or who visits with JavaScript turned
# off.
# The following variables define the location of the three log files.
# Make sure that the files actually exist and that they are set world-
# writable! $AccessFile *has* to be defined; the other two are
# optional, and should only be defined if you want CounterLog to
# create the agent and/or referer log files.
$AccessFile = "/home/content/N/i/n/NinaSutton/html/count_access.txt";
$AgentFile = "/home/content/N/i/n/NinaSutton/html/count_agent.txt";
$RefererFile = "/home/content/N/i/n/NinaSutton/html/count_referer.txt";
# The following variable allows you to define the location of an
# optional "idiot filter" file. If this file exists, CounterLog will
# keep track of what IP addresses have visited your pages, and will
# prevent any given IP address from registering as more than a single
# visit to any given page within a half-hour period. This will make
# it impossible for a visitor to artificially inflate your count by
# repeatedly reloading your pages.
#$IdiotFilterFile = "/usr/www/users/foo/idiotfilter.txt";
# If $IncludeDomain is set to "1" the access and referer logs will
# include the domain of the local pages accessed. This can be handy
# if you have several different virtual domains on the system, and want
# to be able to keep track of accesses to pages with the same names
# (e.g., "index.html") under each domain. Setting it to "0" will
# prevent the domain from being listed.
$IncludeDomain = 1;
# If $IgnoreIntRefs is set to "1" the referer log will *not* list
# references from another page within the same domain. (This works
# regardless of the setting of $IncludeDomain.) This helps to keep
# the size of the referer log down. Set this to "0" if you need to
# know how often people are coming to a given page from each of
# several other pages on the same site, but set it to "1" if you're
# really only interested in knowing which outside pages link to yours.
$IgnoreIntRefs = 1;
# If you want to ignore accesses from specific domain names and/or
# IP numbers, list them here:
@IgnoreIP = ();
# By default, the CounterLog script prints nothing on a page which
# accesses it. If you want it to print a counter number on the page,
# list it here as it appears on the access list:
@PlainCounter = ("dealsfordivas.com/index.shtml");
# If you want a particular page to feature an ordinalized counter
# (e.g., 1st, 2nd, 3rd, etc.) rather than a plain counter, list it
# here:
@OrdCounter = ("foo.com/file3.html","foo.com/file4.html");
# If your system supports the flock() command, set the
# $UseLocking variable to "1"; set it to "0" otherwise.
$UseLocking = 1;
# If you and your server are in different time zones, you can define
# $HourOffset so that the script uses *your* time instead of the
# server's. For example, if your server is in the Eastern time zone,
# but you're in the Pacific time zone, you would want to define this
# variable as -3. (This is really only relevant in determining when
# new daily counts should be started, of course.)
$HourOffset = 0;
###############
# ACCESS FILE #
###############
$time = time;
$time += ($HourOffset*3600);
if ($ENV{'QUERY_STRING'} =~ /jscript/) {
print "Cache-Control: no-cache\n";
print "Pragma: no-cache\n";
print "Content-type: application/x-javascript\n\n";
}
else {
print "Content-type: text/html\n\n";
}
if ($ENV{'QUERY_STRING'} =~ /jscript/) {
$doc_uri = $ENV{'HTTP_REFERER'};
$doc_uri =~ s/.*\/\///g;
$doc_uri =~ s/^www\.//i;
}
else {
if (defined $ENV{'DOCUMENT_URI'}) {
$doc_uri = $ENV{'DOCUMENT_URI'};
if (defined $ENV{'SERVER_NAME'}) {
$server = $ENV{'SERVER_NAME'};
$server =~ s/www\.//i;
if ($IncludeDomain) {
$doc_uri = $server . $doc_uri;
}
}
}
else { $doc_uri = ""; }
}
$doc_uri =~ s#([^:])//#$1/#go;
$doc_uri =~ s#\.((s|p)*html*)/.*$#\.$1#o;
$doc_uri =~ s#/$##o;
$harvester_list = 'bullseye|cherrypicker|crescent|emailcollector|emailsiphon|emailwolf|extractor|microsoft url|mozilla/3.mozilla/2.01|newt|nicerspro|webbandit|brutus';
$download_list = 'da \d|dnload|download|fetch|flashget|ftp|getright|gozilla|jetcar|leach|leech';
$linkchecker_list = 'analyze|check|link|netmechanic|netmind|powermarks|redalert|tooter|validat|verif|walk';
$offline_list = 'avantgo|batch|copier|httrack|msiecrawler|msproxy|netattache|netscape-proxy|offline|spacebison|teleport|webcapture|webzip';
$spider_list = 'aport|archive|ask jeeves|behold|borg|bot|catch|crawl|digger|elitesys|enfish|esense|euroseek|ferret|grab|griffon|gulliver|harvest|htdig|hubat|hunt|infoseek|java|leia|lwp-|lwp:|mantraagent|mapper|mata hari|mercator|netants|perl|quest|reader|reaper|roamer|rover|scooter|search|slurp|snatch|spider|spinne|spyder|sweep|t-h-u-n-d-e-r-s-t-o-n-e|ultraseek|url|utopy|webcollage|webster pro|webwhacker|wfarc|wget|whatuseek';
if (($ENV{'HTTP_USER_AGENT'} =~ m#$harvester_list#oi)
|| ($ENV{'HTTP_USER_AGENT'} =~ m#$download_list#oi)
|| ($ENV{'HTTP_USER_AGENT'} =~ m#$linkchecker_list#oi)
|| ($ENV{'HTTP_USER_AGENT'} =~ m#$offline_list#oi)
|| ($ENV{'HTTP_USER_AGENT'} =~ m#$spider_list#oi)) {
unless (($ENV{'HTTP_USER_AGENT'} =~ m#robotics#oi) || ($ENV{'HTTP_USER_AGENT'} =~ m#hotjava#oi)) {
$NoCountHit = 1;
}
}
if ((defined $ENV{'REMOTE_ADDR'}) && (defined @IgnoreIP) && !($NoCountHit)) {
foreach $ignoredip (@IgnoreIP) {
if ($ENV{'REMOTE_ADDR'} =~ /$ignoredip/i) {
$NoCountHit = 1;
}
}
}
if ($IdiotFilterFile && !($NoCountHit)) {
&LockOpen (IDIOTFILTER,"$IdiotFilterFile");
undef (@idiotfilter);
while (defined($idiotfilterline = )) {
chomp ($idiotfilterline);
$idiotfiltertime = int($idiotfilterline);
unless (($time-$idiotfiltertime) > 1800) {
push (@idiotfilter,$idiotfilterline);
if ($idiotfilterline =~ /^\d+ $doc_uri $ENV{'REMOTE_ADDR'}/) {
$NoCountHit = 1;
}
}
}
seek(IDIOTFILTER, 0, 0);
foreach $idiotfilterline (@idiotfilter) {
print IDIOTFILTER "$idiotfilterline\n";
}
unless ($NoCountHit) {
print IDIOTFILTER "$time $doc_uri $ENV{'REMOTE_ADDR'}\n";
}
truncate (IDIOTFILTER, tell(IDIOTFILTER));
&LockClose (IDIOTFILTER,"$IdiotFilterFile");
}
&LockOpen (COUNT,"$AccessFile");
$location = tell COUNT;
while ($line = ) {
if (($acc,$day,$dayacc,$uri) = ($line =~ /^(\d+) (\d+) (\d+) '(\S+)'$/)) {
if ($uri eq $doc_uri) {
last;
}
}
last if ($uri eq $doc_uri);
$location = tell COUNT;
$acc = 0;
$dayacc = 0;
}
$acc = int($acc);
unless ($NoCountHit) { $acc += 1; }
($mday,$mon,$year) = (localtime($time))[3,4,5];
$year += 1900;
if ($mday < 10) { $mday = "0".$mday; }
$mon += 1;
if ($mon < 10) { $mon = "0".$mon; }
$today = $year.$mon.$mday;
$dayacc = int($dayacc);
unless ($day eq $today) { $dayacc = 0; }
unless ($NoCountHit) { $dayacc += 1; }
foreach $key (@PlainCounter) {
if ($doc_uri eq $key) {
$printacc = &commas($acc);
$printdayacc = &commas($dayacc);
last;
}
}
foreach $key (@OrdCounter) {
if ($doc_uri eq $key) {
$printacc = &commas($acc) . &ordinalize($acc);
$printdayacc = &commas($dayacc) . &ordinalize($dayacc);
last;
}
}
if ($printacc) {
$printline = "$printacc";
if ($ENV{'QUERY_STRING'} =~ /jscript/) {
$printline = &JSoutput($printline);
}
print "$printline";
}
if ($doc_uri) {
seek(COUNT, $location, 0);
$longacc = sprintf("%010.10d", $acc);
$longdayacc = sprintf("%010.10d", $dayacc);
print COUNT "$longacc $today $longdayacc '$doc_uri'\n";
}
&LockClose (COUNT,"$AccessFile");
##############
# AGENT FILE #
##############
if ($AgentFile && !($NoCountHit)) {
open (COUNT,">>$AgentFile");
if (defined $ENV{'HTTP_USER_AGENT'}) {
$doc_agent = $ENV{'HTTP_USER_AGENT'};
}
else { $doc_agent = ""; }
if ($doc_agent) {
print COUNT "$doc_agent\n";
}
close (COUNT);
}
################
# REFERER FILE #
################
if ($RefererFile && !($NoCountHit)) {
open (COUNT,">>$RefererFile");
if (defined $ENV{'HTTP_REFERER'}) {
$doc_referer = $ENV{'HTTP_REFERER'};
}
else { $doc_referer = ""; }
if ($doc_referer && $doc_uri) {
unless ($IgnoreIntRefs && ($doc_referer =~ m#$server#oi)) {
print COUNT "$doc_referer -> $doc_uri\n";
}
}
close (COUNT);
}
###############
# SUBROUTINES #
###############
sub LockOpen {
local(*FILE,$lockfilename) = @_;
local($TrysLeft) = 100;
unless (-e "$lockfilename") {
open (FILE,">$lockfilename");
print FILE "\n";
close (FILE);
}
if ($UseLocking) {
open (FILE,"+<$lockfilename") || &Error;
flock(FILE,2) || &Error;
}
else {
if ((-e "$lockfilename.lok") && ((stat("$lockfilename.lok"))[9]+30<$time)) {
unlink ("$lockfilename.lok");
}
while ($TrysLeft--) {
if (-e "$lockfilename.lok") {
select(undef,undef,undef,0.01);
}
else {
open (LOCKFILE,">$lockfilename.lok");
print LOCKFILE "\n";
close (LOCKFILE);
last;
}
}
if ($TrysLeft >= 0) {
open (FILE,"+<$lockfilename") || &Error;
}
else {
&Error;
}
}
}
sub LockClose {
local(*FILE,$lockfilename) = @_;
close (FILE);
unlink ("$lockfilename.lok");
}
sub Error {
$printline = "[File Lock Error]";
if ($ENV{'QUERY_STRING'} =~ /jscript/) {
$printline = &JSoutput($printline);
}
print "$printline";
exit(0);
}
sub ordinalize {
local($count) = @_;
local($last, $last2);
$last2 = $count % 100;
$last = $count % 10;
if ($last2 < 10 || $last2 > 13) {
return "st" if $last == 1;
return "nd" if $last == 2;
return "rd" if $last == 3;
}
return "th";
}
sub commas {
local($_)=@_;
1 while s/(.*\d)(\d\d\d)/$1,$2/;
$_;
}
sub JSoutput {
$_[0] =~ s/\"/\\"/g;
$_[0] =~ s/\r//g;
$_[0] =~ s/\n/\")\;\ndocument.write(\" /g;
$_[0] = "document.write(\" $_[0] \");document.close();";
return $_[0];
}
|
|
 |
|