#/usr/local/bin/perl
# parbitron -- a perl version of the program produces rating sweeps for USENET.

# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. Brian Reid runs the statistics program on the
# first day of each month; it will include any report that has reached it by
# that time. To make sure your site's data is included, run the survey
# program no later than the 20th day of each month.

# This version of arbitron was written by Spike (Joe Ilacqua),
# spike@world.std.com.  It seemed like the right thing to do at the time.

# Arbitron was originally written by Brian Reid, DEC Western Research Lab,
# reid@decwrl.dec.com]

# Notes: The Perl version of arbitron intentionally does not support:
#   NN's "~/.nn/rc" file, the current version of NN uses the ".newsrc".
#   Old B News' 2 field active files.
#  You should upgrade your software, or run the shell version of arbitron.
#
#  As with the shell arbitron, the results of this program are dependent
#  on the rate at which you expire news.  If you are a small site that
#  expires news rapidly, the results may indicate fewer active readers
#  than you actually have.

# Who to send the report to:
# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
$summarypath = 'admins@feenix.metronet.com';

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
$lowUID = 200;
$highUID = 9999;

# If you need to get the active file from another host define activehost.
# The user running parbitron must be able to rsh(1) to the remote host.
#$activehost = 'foo';

$active = '/usr/lib/news/active';
$active = "rsh $activehost cat $active|" if ($activehost);

$users = 0;			# Users who could read news.
$newsreaders = 0;		# Users who do read news.

chop($date = `date`);
($wday,$mon,$day,$hour,$tz,$year) = split(' ',$date);
$dat="$mon$year";

# One of these should return the hostname.
chop($hostname = `hostname || uname -n || uuname -l`);

open(ACTIVE,$active) || die "Can't open active file: $!\n";

while(<ACTIVE>)
{
    next unless /^[a-z][-0-9_a-z]*\./; # from shell arbitron
    ($group,$maximum,$minimum) = split;
    $groupcount{$group} = 0;
    $groupmax{$group} = $maximum;
    $groupmin{$group} = $minimum;
}
close(ACTIVE);

while (($user,$pass,$uid,$gid,$quota,$com,$gcos,$dir) = getpwent) {
    next if ($uid < $lowUID) || ($uid > $highUID);
    $users++;

    next if $homes{$dir};	# Don't do a .newsrc twice
    $homes{$dir} = 1;
    next if (! -r "$dir/.newsrc");
    open(NEWSRC,"$dir/.newsrc") || next; # This shouldn't fail

    $counted = 0;

    while(<NEWSRC>) {
	next if (!/: [0-9]/);
	($group,$arts) = split;
	$group =~ s/://;
	next unless defined($groupcount{$group}); # bogus group
	next if $hits{$group};	# Don't count a group twice
	$hits{$group} = 1;

	$maximum = $groupmax{$group};
	$minimum = $groupmin{$group};
	next if $minimum == $maximum; # No articles if $minimum == $maximum

# We want the last article read from the line in the .newsrc, it is
# a comma septated number or range (i.e ...,415 or ...,3001-3078)

	@arts = split(',',$arts); # Split the line up on ","s
# Spilt the last element on "-" if need be
	@arts = split('-',$arts[$#arts]) if ($arts[$#arts] =~ /-/);
	if (($arts[$#arts] >= $groupmin{$group})
	    && ($arts[$#arts] <= $groupmax{$group})) {
	    $groupcount{$group}++;
	    if (!$counted) {
		$newsreaders++;	# We have found another reader of news
		$counted++;	# only count them once!
	    }
	}
    }
    undef %hits;
    close(NEWSRC);
}

undef %groupmax;
undef %groupmin;
undef %homes;

$i = 0;

while (($group,$count) = each %groupcount) {
    $tosort[$i++] = "$count $group";
}

undef %groupcount;

sub nr {  # test like 'sort -nr' for sort function
    ($anum,$astring) = split(' ',$a);
    ($bnum,$bstring) = split(' ',$b);
    if ($anum != $bnum) { -($anum <=> $bnum); }
    else {-($astring cmp $bstring);}
}
			      
@sorted = sort nr @tosort; # sort most read to least

open(MAIL,"|/bin/mail $summarypath");

print MAIL "Host\t\t$hostname\n";
print MAIL "Users\t\t$users\n";
print MAIL "NetReaders\t$newsreaders\n";
print MAIL "ReportDate\t$dat\n";
print MAIL "SystemType\tnews-perl-arbitron-2.4\n";
print MAIL join("\n",@sorted), "\n"; # output the sorted data

close(MAIL);