#! /usr/bin/perl -w
use lib '/usr/lib/perl'; use INN::Config;

##############################################################################
# perl-nocem - a NoCeM-on-spool implementation for INN 2.x.
# Copyright 2000 by Miquel van Smoorenburg <miquels@cistron.nl>
# Copyright 2001 by Marco d'Itri <md@linux.it>
# This program is licensed under the terms of the GNU General Public License.
#
# List of changes since the original implementation:
#
# 2002: Patch by Steven M. Christey for untrusted printf input.
# 2007: Patch by Christoph Biedl for checking a timeout.
# Documentation improved by Jeffrey M. Vinocur (2002), Russ Allbery (2006)
# and Julien Élie (2007).
# Various bug fixes, code and documentation improvements by Julien Élie
# in 2007-2009, 2013, 2014, 2016, 2018, 2020-2024.
##############################################################################

require 5.00403;
use strict;

use Getopt::Std;
use POSIX qw(locale_h strftime);
use Sys::Hostname;

# Use high resolution timers when available.
# To be effectively used, the time() function is imported by an evaluation
# inside a BEGIN block.
BEGIN {
    eval "use Time::HiRes qw(time);";
}

$0 =~ s!.*/!!;

my $usage = "Usage:
  Usually called as a channel feed in newsfeeds.  A typical entry is:

      nocem!\
        :!*,news.lists.filters\
        :Tc,Wf,Ap:$INN::Config::pathbin/$0 [-b directory] [-c filename] \
                  [-g command] [-G command] [-i filename] [-k keyring] \
                  [-l] [-u] [-v level]

  On command line, for testing purpose, a storage token or a file name is
  expected on standard input:

      grephistory '<Message-ID>' | $0 [options]
      echo '/path/to/a/nocem/message' | $0 [options]


Options:
  -b directory    Back up cancelled articles, storing them into files in the
                  given directory.

  -c filename     Use the specified configuration file (with an absolute path)
                  instead of the default one ($INN::Config::pathetc/nocem.ctl).

  -g command      Full command to run when invoking gpg.  By default,
                  \"$INN::Config::gpg --status-fd=1 --verify
                  --allow-weak-digest-algos\" is used.

  -G command      Full command to run when invoking gpgv.  By default,
                  \"$INN::Config::gpgv --status-fd=1\" is used.
                  Note that gpg will be preferentially used over gpgv when
                  both are available, unless -g is given an empty string.

  -h              Print this message.

  -i filename     Include an external Perl script (with an absolute path) to
                  load functions defining local rules.

  -k keyring      Use the specified keyring (with an absolute path) instead of
                  the default one ($INN::Config::pathetc/pgp/ncmring.gpg).
                  \"--keyring=\" is added with the keyring value to the gpg
                  and gpgv commands unless keyring is an empty string.

  -l              Send logs to perl-nocem.log in $INN::Config::pathlog
                  instead of using syslog.  (Error log level will still
                  additionally be sent to syslog if available.)

  -u              Exclude unprocessed NoCeM notices from daily reports
                  generated by innreport.  Issuers or types of notices not
                  configured in nocem.ctl will still be mentioned in the logs
                  but the related log lines will not be parsed by innreport.

  -v level        Increase log verbosity to that level (1 to 3).  Default is 1.
";

sub HELP_MESSAGE {
    print $usage;
    exit 0;
}

use vars
  qw($opt_b $opt_c $opt_g $opt_G $opt_h $opt_i $opt_k $opt_l $opt_u $opt_v);
getopts("b:c:g:G:hi:k:luv:") || die $usage;

HELP_MESSAGE() if defined($opt_h);

my $defaultPermfile = $INN::Config::pathetc . '/nocem.ctl';
my $permfile = $opt_c || $defaultPermfile;

my $defaultKeyring = $INN::Config::pathetc . '/pgp/ncmring.gpg';
my $keyring = defined($opt_k) ? $opt_k : $defaultKeyring;

my $defaultLoglevel = 0;
my $loglevel = $opt_v || $defaultLoglevel;

my ($working, $got_sighup, $got_sigterm, @ncmperm, $cancel);
my $syslog_available = 0;
my $log_open = 0;
my $nntp_open = 0;
my $last_cancel = 0;
my $socket_timeout = int($INN::Config::peertimeout * 0.95);
my ($hostname) = split(/\./, hostname());

# Not configurable as innreport explicitly parses a file with that name.
my $logfile = $INN::Config::pathlog . '/perl-nocem.log';

my $gpg = defined($opt_g) ? $opt_g : $INN::Config::gpg;
my $gpgv = defined($opt_G) ? $opt_G : $INN::Config::gpgv;
my ($gpgbin, $gpgvbin) = ($gpg, $gpgv);
$gpgbin =~ s/ .*//;
$gpgvbin =~ s/ .*//;

# initialization and main loop ###############################################

eval { require Sys::Syslog; import Sys::Syslog; $syslog_available = 1; };

if ($syslog_available) {
    if ($Sys::Syslog::VERSION lt 0.15) {
        eval "sub Sys::Syslog::_PATH_LOG { '/dev/log' }" if $^O eq 'dec_osf';
        Sys::Syslog::setlogsock('unix')
          if $^O =~ /linux|dec_osf|freebsd|darwin/;
    }
    openlog('nocem', '', $INN::Config::syslog_facility);
}

if (not $gpgbin and not $gpgvbin) {
    logmsg("Paths to both gpg and gpgv binaries unset", 'err');
    exit 1;
}

if ($gpgbin and not -x $gpgbin) {
    logmsg("gpg binary $gpgbin does not exist or is not executable", 'err');
    exit 1;
} elsif ($gpgvbin and not -x $gpgvbin) {
    logmsg("gpgv binary $gpgvbin does not exist or is not executable", 'err');
    exit 1;
}

if ($keyring and not -r $keyring) {
    logmsg("Keyring $keyring does not exist or is not readable", 'err');
    exit 1;
}

if ($loglevel !~ m!^\d+$!) {
    logmsg("-v value is not an integer: $opt_v", 'err');
    exit 1;
}

if ($opt_b and not -w $opt_b) {
    logmsg("Backup directory $opt_b does not exist or is not writable", 'err');
    exit 1;
}

if ($opt_i) {
    if (not -r $opt_i) {
        logmsg("File $opt_i does not exist or is not readable", 'err');
        exit 1;
    }
    if (open(my $localfile_fh, '<', $opt_i)) {
        my $lines = join('', <$localfile_fh>);
        close($localfile_fh);
        eval($lines);
        if ($@) {
            logmsg("Cannot load $opt_i: $@", 'err');
            exit 1;
        }
    } else {
        logmsg("Cannot open local file: $!", 'err');
        exit 1;
    }
}

if ($INN::Config::version and not $INN::Config::version =~ /^INN 2\.[0123]\./)
{
    $cancel = \&cancel_nntp;
} else {
    $cancel = \&cancel_ctlinnd;
}

$SIG{HUP} = \&hup_handler;
$SIG{INT} = \&term_handler;
$SIG{TERM} = \&term_handler;
$SIG{PIPE} = \&term_handler;

logmsg('Starting up');

unless (read_ctlfile()) {
    exit 1;
}

while (<STDIN>) {
    chop;
    $working = 1;
    do_nocem($_);
    $working = 0;
    term_handler() if $got_sigterm;
    hup_handler() if $got_sighup;
}

logmsg('Exiting', 'info');
exit 0;

##############################################################################

# Process one NoCeM message.
sub do_nocem {
    my $token = shift;
    my $start = time;

    # open the article and verify the notice
    my $artfh = open_article($token);
    return if not defined $artfh;
    my ($msgid, $nid, $issuer, $type, $skipped, $nocems) = read_nocem($artfh);
    close $artfh;
    return unless $msgid;

    my $nr = scalar(@$nocems);
    if ($nr > 0) {
        if ($opt_b) {
            backup_articles($nocems, $issuer, $type);
        }
        &$cancel($nocems);
        logmsg("Articles cancelled: " . join(' ', @$nocems));
    }
    my $diff = (time - $start) || 0.01;
    logmsg(
        sprintf(
            "Article %s: processed notice %s by %s for %s "
              . "(%d ids, %d skipped, %.5f s, %.1f/s)",
            $msgid, $nid, $issuer, $type, $nr, $skipped, $diff,
            ($nr + $skipped) / $diff,
        )
    );
}

# - Check if it is a PGP-signed NoCeM message
# - See if we want it
# - Then check PGP signature
sub read_nocem {
    my $artfh = shift;

    # Examine the first 200 lines to see if it is a PGP-signed NoCeM.
    my $ispgp = 0;
    my $isncm = 0;
    my $inhdr = 1;
    my $i = 0;
    my $body = '';
    my $from = '';
    my $subject = '';
    my $hasreferences = 0;
    my $msgid = '<>';

    while (<$artfh>) {
        last if $i++ > 200;
        s/\r\n$/\n/;
        if ($inhdr) {
            if (/^$/) {
                $inhdr = 0;
            } elsif (/^From:\s+(.*)\s*$/i) {
                $from = $1;
            } elsif (/^Message-ID:\s+(<.*>)/i) {
                $msgid = $1;
            } elsif (/^Subject:\s+(.*)$/i) {
                $subject = $1;
            } elsif (/^References:\s+/i) {
                $hasreferences = 1;
            }
        } else {
            $body .= $_;
            $ispgp = 1 if /^-----BEGIN PGP SIGNED MESSAGE-----/;
            if (/^\@\@BEGIN NCM HEADERS/) {
                $isncm = 1;
                last;
            }
        }
    }

    # Must be a PGP-signed NoCeM.
    if (not $ispgp) {
        logmsg("Article $msgid: not PGP-signed", 'info');
        return;
    }
    if (not $isncm) {
        logmsg("Article $msgid: not a NoCeM", 'info');
        return;
    }
    # Subject begins with @@NCM or @@newsgroup.
    if ($subject !~ /^@@\w/) {
        logmsg("Article $msgid: missing tag in Subject header field", 'info');
        return;
    }
    # Must not be a followup.
    if ($hasreferences) {
        logmsg("Article $msgid: has a References header field", 'info');
        return;
    }

    # Read the headers of this NoCeM, and check if it's supported.
    my %hdrs;
    while (<$artfh>) {
        s/\r\n/\n/;
        $body .= $_;
        last if /^\@\@BEGIN NCM BODY/;
        my ($key, $val) = /^([^:]+)\s*:\s*(.*)$/;
        $hdrs{ lc $key } = $val if defined($key);
    }
    foreach (qw(action issuer notice-id type version)) {
        next if $hdrs{$_};
        logmsg("Article $msgid: missing $_ pseudo header field", 'info');
        return;
    }
    return if not supported_nocem($msgid, \%hdrs);

    # Decide if we want it.  If not, count the number of skipped articles.
    if (not want_nocem(\%hdrs)) {
        my $inbody = 1;
        my $skipped = 0;
        while (<$artfh>) {
            s/\r\n$/\n/;
            $inbody = 0 if /^\@\@END NCM BODY/;
            next if not $inbody or /^#/;

            my ($id, $grp) = /^(\S*)\s+(\S+.*)$/;
            next if not $grp;
            $skipped++ if $id;
        }

        # innreport will parse "unwanted notice" lines.
        logmsg(
            sprintf(
                "Article %s: %s notice %s by %s for %s (%d ids)", $msgid,
                $opt_u ? "skipped" : "unwanted", $hdrs{'notice-id'},
                $hdrs{issuer}, $hdrs{type}, $skipped,
            )
        );
        return;
    }

    # We do want it, so read the entire article.  Also copy it to
    # a temp file so that we can check the PGP signature when done.
    my $tmpfile = "$INN::Config::pathtmp/nocem.$$";
    if (not open(OFD, ">$tmpfile")) {
        logmsg("Cannot open temp file $tmpfile: $!", 'err');
        return;
    }
    print OFD $body;
    undef $body;

    # Process NoCeM body.
    my $inbody = 1;
    my @nocems;
    my $skipped = 0;
    my ($lastid, $lastgrp);
    while (<$artfh>) {
        s/\r\n$/\n/;
        print OFD;
        $inbody = 0 if /^\@\@END NCM BODY/;
        next if not $inbody or /^#/;

        my ($id, $grp) = /^(\S*)\s+(\S+.*)$/;
        next if not $grp;
        # Convert the whitespace-separated list of newsgroups to a
        # comma-separated list of newsgrous.
        $grp =~ s/\s+/,/g;
        if ($id) {
            if ($lastid) {
                if (want_cancel_id($msgid, $lastid, $lastgrp, \%hdrs)) {
                    push(@nocems, $lastid);
                } else {
                    $skipped++;
                }
            }
            $lastid = $id;
            $lastgrp = $grp;
        } else {
            $lastgrp .= ',' . $grp;
        }
    }
    if ($lastid) {
        if (want_cancel_id($msgid, $lastid, $lastgrp, \%hdrs)) {
            push(@nocems, $lastid);
        } else {
            $skipped++;
        }
    }
    close OFD;

    # At this point we need to verify the PGP signature.
    my $e = pgp_check($hdrs{issuer}, $hdrs{type}, $msgid, $tmpfile);
    unlink $tmpfile;
    return if not $e;

    return (
        $msgid, $hdrs{'notice-id'}, $hdrs{issuer}, $hdrs{type}, $skipped,
        \@nocems,
    );
}

# Code to discard notices we do not want to take into account.
# It currently only calls a possible local function loaded with the -i flag.
# Some general rules could be added (for instance not to actually cancel
# articles in groups we do not carry).
sub want_cancel_id {
    my ($msgid, $artid, $groups, $hdrs) = @_;
    my $accepted = 1;

    $accepted = local_want_cancel_id(@_) if defined &local_want_cancel_id;

    if (not $accepted) {
        logmsg("Keeping $artid by local rule", 'info');
    }
    return $accepted;
}

# Do we actually want this NoCeM?
sub want_nocem {
    my $hdrs = shift;

    foreach (@ncmperm) {
        my ($issuer, $type) = split(/\001/);
        if ($hdrs->{issuer} =~ /\Q$issuer\E/i) {
            return 1 if '*' eq $type or lc $hdrs->{type} eq $type;
        }
    }
    return 0;
}

sub supported_nocem {
    my ($msgid, $hdrs) = @_;

    if ($hdrs->{version} !~ /^0\.9[0-9]?$/) {
        logmsg(
            "Article $msgid: version $hdrs->{version} not supported",
            'info',
        );
        return 0;
    }
    if ($hdrs->{action} ne 'hide') {
        logmsg(
            "Article $msgid: action $hdrs->{action} not supported",
            'info',
        );
        return 0;
    }
    return 1;
}

# Check the PGP signature on an article.
sub pgp_check {
    my ($issuer, $type, $msgid, $art) = @_;
    my @command;

    if ($gpgbin) {
        if ($opt_g) {
            @command = split(/ /, $gpg);
        } else {
            @command = (
                $INN::Config::gpg, '--status-fd=1',
                '--verify',
            );
            if ($INN::Config::gpg_has_allow_weak_digest_algos_flag) {
                push(@command, '--allow-weak-digest-algos');
            }
        }
    } else {
        if ($opt_G) {
            @command = split(/ /, $gpgv);
        } else {
            @command = ($INN::Config::gpgv, '--status-fd=1');
        }
    }
    if ($keyring) {
        push(@command, '--keyring=' . $keyring);
    }

    # fork and spawn a child
    my $pid = open(PFD, '-|');
    if (not defined $pid) {
        logmsg("pgp_check: cannot fork: $!", 'err');
        return 0;
    }
    if ($pid == 0) {
        open(STDERR, '>&STDOUT');
        exec(@command, $art);
        exit 126;
    }

    # Read the result and check status code.
    local $_ = join('', <PFD>);

    logmsg("Command line was: " . join(' ', @command) . " $art", 'debug');
    logmsg("Full PGP output: >>>$_<<<", 'debug');

    my $status = 0;
    if (not close PFD) {
        if ($? >> 8) {
            $status = $? >> 8;
        } else {
            logmsg("Article $msgid: "
                  . ($gpgbin ? $gpgbin : $gpgvbin)
                  . " killed by signal "
                  . ($? & 255), 'err');
            return 0;
        }
    }

    if (/^\[GNUPG:\]\s+GOODSIG\s+\S+\s+(.*)/m) {
        return 1 if $1 =~ /\Q$issuer\E/i;
        logmsg("Article $msgid: signed by $1 instead of $issuer for $type");
    } elsif (/^\[GNUPG:\]\s+NO_PUBKEY\s+(\S+)/m) {
        logmsg("Article $msgid: $issuer (ID $1) for $type not in keyring");
    } elsif (/^\[GNUPG:\]\s+BADSIG\s+\S+\s+(.*)/m) {
        logmsg("Article $msgid: bad signature from $1 for $type");
    } elsif (/^\[GNUPG:\]\s+BADARMOR/m or /^\[GNUPG:\]\s+UNEXPECTED/m) {
        logmsg("Article $msgid: malformed signature from $issuer for $type");
    } elsif (/^\[GNUPG:\]\s+ERRSIG\s+(\S+)/m) {
        # safety net: we get there if we don't know about some token
        logmsg("Article $msgid: unknown error (ID $1) from $issuer for $type");
    } else {
        # some other error we don't know about happened.
        # 126 is returned by the child if exec fails.
        s/ at \S+ line \d+\.\n$//;
        s/\n/_/;
        logmsg(
            "Article $msgid: "
              . ($gpgbin ? $gpgbin : $gpgvbin)
              . " exited "
              . (($status == 126) ? "($_)" : "with status $status"),
            'err',
        );
    }
    return 0;
}

# Read article.
sub open_article {
    my $token = shift;

    if ($token =~ /^\@.+\@$/) {
        my $pid = open(ART, '-|');
        if ($pid < 0) {
            logmsg('Cannot fork: ' . $!, 'err');
            return undef;
        }
        if ($pid == 0) {
            exec("$INN::Config::newsbin/sm", '-q', $token)
              or logmsg("Cannot exec sm: $!", 'err');
            return undef;
        }
        return *ART;
    } else {
        return *ART if open(ART, $token);
        logmsg("Cannot open article $token: $!", 'err');
    }
    return undef;
}

# Back up cancelled articles.
# Follow Appendix A of RFC 4155 (mbox format) but keep 8-bit characters.
# No need to escape "From " in articles.
sub backup_articles {
    my ($ids, $issuer, $type) = @_;
    my ($fh, $token, @article);

    my $filename = $issuer . "%" . $type;

    # Sanitize file names.
    $filename =~ s/[^A-Za-z0-9\-_+@.%]//g;

    my $backupfile = "$opt_b/$filename";

    # Use traditional ctime output.
    setlocale(LC_TIME, 'C');
    my $now = strftime("%a %b %e %T %Y", gmtime());

    if (not open($fh, ">>", "$backupfile")) {
        logmsg("Cannot open $backupfile: $!", 'err');
        return 0;
    }

    foreach my $mid (@$ids) {
        $token = qx($INN::Config::newsbin/grephistory -e '$mid' 2>/dev/null);
        $token =~ s/\n$//;
        if ($token =~ /^\@.+\@$/) {
            @article = qx($INN::Config::newsbin/sm -S '$token' 2>/dev/null);

            if ($? == 0 and scalar(@article) > 1) {
                logmsg("Backing up $mid into $backupfile", 'info');
                # Remove the first line (rnews header).
                shift(@article);
                print $fh "From $issuer $now\n";
                foreach my $line (@article) {
                    $line =~ s/\r\n?$/\n/;
                    print $fh "$line";
                }
                print $fh "\n";
            } else {
                logmsg("Backing up $mid not possible (not in spool)", 'debug');
            }
        } else {
            logmsg(
                "Backing up $mid not possible (no token in history)",
                'debug',
            );
        }
    }

    close($fh);

    return 1;
}

# Cancel a number of Message-IDs.  We use ctlinnd to do this,
# and we run up to 15 of them at the same time (10 usually).
sub cancel_ctlinnd {
    my @ids = @{ $_[0] };

    while (@ids > 0) {
        my $max = @ids <= 15 ? @ids : 10;
        for (my $i = 1; $i <= $max; $i++) {
            my $msgid = shift @ids;
            my $pid;
            sleep 5 until (defined($pid = fork));
            if ($pid == 0) {
                exec "$INN::Config::pathbin/ctlinnd", '-s', '-t', '180',
                  'cancel', $msgid;
                exit 126;
            }
            logmsg("Cancelling $msgid [$i/$max]", 'debug');
        }
        # Now wait for all children.
        while ((my $pid = wait) > 0) {
            next unless $?;
            if ($? >> 8) {
                logmsg("Child $pid died with status " . ($? >> 8), 'err');
            } else {
                logmsg("Child $pid killed by signal " . ($? & 255), 'err');
            }
        }
    }
}

sub cancel_nntp {
    my $ids = shift;
    my $r;

    if ($nntp_open and time - $socket_timeout > $last_cancel) {
        logmsg('Close socket for timeout', 'debug');
        close(NNTP);
        $nntp_open = 0;
    }
    if (not $nntp_open) {
        use Socket;
        if (not socket(NNTP, PF_UNIX, SOCK_STREAM, 0)) {
            logmsg("socket: $!", 'err');
            goto ERR;
        }
        if (not connect(NNTP, sockaddr_un($INN::Config::pathrun . '/nntpin')))
        {
            logmsg("connect: $!", 'err');
            goto ERR;
        }
        if (($r = <NNTP>) !~ /^200 /) {
            $r =~ s/\r\n$//;
            logmsg("Bad reply from server: $r", 'err');
            goto ERR;
        }
        select NNTP;
        $| = 1;
        select STDOUT;
        print NNTP "MODE CANCEL\r\n";
        if (($r = <NNTP>) !~ /^284 /) {
            $r =~ s/\r\n$//;
            logmsg("MODE CANCEL not supported: $r", 'err');
            goto ERR;
        }
        $nntp_open = 1;
    }
    foreach (@$ids) {
        logmsg("Cancelling $_", 'debug');
        print NNTP "$_\r\n";
        if (($r = <NNTP>) !~ /^289/) {
            $r =~ s/\r\n$//;
            logmsg("Cannot cancel $_: $r", 'err');
            goto ERR;
        }
    }
    $last_cancel = time;
    return;

  ERR:
    # discard unusable socket
    close(NNTP);
    logmsg('Switching to ctlinnd...', 'err');
    cancel_ctlinnd($ids);
    $cancel = \&cancel_ctlinnd;
}

sub read_ctlfile {
    unless (open(CTLFILE, $permfile)) {
        logmsg("Cannot open $permfile: $!", 'err');
        return 0;
    }
    while (<CTLFILE>) {
        chop;
        s/^\s+//;
        s/\s+$//;
        next if /^#/ or /^$/;
        my ($issuer, $type) = split(/:/, lc $_);
        if (not(defined($issuer) and defined($type))) {
            logmsg(
                "Cannot parse $permfile line <<$_>>; "
                  . "syntax is <<issuer:type>>.",
                'err',
            );
            next;
        }
        $type =~ s/\s//g;
        foreach (split(/,/, $type)) {
            push(@ncmperm, "$issuer\001$_");
        }
    }
    close CTLFILE;
    return 1;
}

sub logmsg {
    my ($msg, $lvl) = @_;

    # Default is logging at notice level.
    $lvl ||= 'notice';

    # Write the message across several log lines.
    $msg =~ s/\r/ /g;
    my @logs = split('\n', $msg);

    if ($opt_l or not $syslog_available) {
        if ($log_open == 0) {
            open(LOG, ">>$logfile") or die "Cannot open log: $!";
            $log_open = 1;
            select LOG;
            $| = 1;
            select STDOUT;
        }
        # Only log by default messages of level "err" or "notice".
        return if ($lvl eq 'info' and $loglevel < 2);
        return if ($lvl eq 'debug' and $loglevel < 3);

        my $now = strftime("%Y-%m-%d %T %z", localtime());
        foreach my $logline (@logs) {
            print LOG "$now $hostname nocem[$$]: $logline\n";
        }
    }

    if ($syslog_available) {
        # Always write errors to syslog so that scanlogs emphasizes them
        # in daily Usenet reports.
        if ($lvl eq 'err' or not $opt_l) {
            foreach my $logline (@logs) {
                syslog($lvl, '%s', $logline);
            }
        }
    }

    return;
}

sub hup_handler {
    $got_sighup = 1;
    return if $working;
    close LOG;
    $log_open = 0;
}

sub term_handler {
    $got_sigterm = 1;
    return if $working;
    logmsg('Exiting because of signal');
    exit 1;
}

__END__

=head1 NAME

perl-nocem - A NoCeM-on-spool implementation for INN 2.x

=head1 SYNOPSIS

B<perl-nocem> [B<-hlu>] [B<-b> I<directory>] [B<-c> I<filename>] [B<-g>
I<command>] [B<-G> I<command>] [B<-i> I<filename>] [B<-k> I<keyring>]
[B<-v> I<level>]

=head1 DESCRIPTION

NoCeM, which is pronounced I<No See 'Em>, is a protocol enabling
authenticated third-parties to issue notices which can be used
to cancel unwanted articles (like spam and articles in moderated
newsgroups which were not approved by their moderators).  It can
also be used by readers as a I<third-party killfile>.  It is
intended to eventually replace the protocol for third-party cancel
messages which can too easily be forged as cancel messages are not
authenticated, contrary to NoCeM notices.

B<perl-nocem> processes third-party, PGP-signed article cancellation
notices.  It is possible to honour a selective set of NoCeM notices,
and not all of them.  You can configure in F<nocem.ctl> in I<pathetc>
the list of NoCeM issuers you trust and add the corresponding
public PGP keys to your NoCeM keyring (by default, no PGP keys are
installed, so nobody is trusted).  It is up to you to decide whether
you wish to honour their notices, depending on the criteria they use.
You are encouraged to regularly have a look at the official L<NoCeM
Registry|https://rosalind.home.xs4all.nl/nocemreg/nocemreg.html>
to check for possible new NoCeM issuers to add and see an
overview of their policy.  The L<daily summary of NoCeM messages
sent|https://www.novabbs.com/SEARCH/search_nocem.php?stats=daily> may also
be useful.

B<perl-nocem> expects either storage tokens or file names on its standard
input.  It will then read and process the corresponding article.  It appends
all status messages to F<perl-nocem.log> in I<pathlog> if the syslog facility
is not available or the B<-l> flag is explicitly used; otherwise, the syslog
facility is used in which case status messages are appended to a file usually
named F<news.notice> in I<pathlog>.  (Some logs will be written only if debug
level is set up to be logged via syslog.)

=head1 CONFIGURATION

Processing NoCeM notices is easy to set up:

=over 4

=item 1.

If not already done, install GnuPG, or an equivalent implementation of the
OpenPGP standard, to be able to verify the signature of NoCeM messages.
It will provide the B<gpg> and B<gpgv> programs.  If GnuPG was already
installed when INN was configured, then the paths to these programs were taken
into account.  Otherwise, you can use the B<-g> and B<-G> flags to set (or
even override) the commands B<perl-nocem> will use to run these programs.

All still active NoCeM issuers use rather modern PGP keys accepted by both
GnuPG 1.x and 2.x versions.  It is no longer needed to explicitly use B<gpg1>
to process NoCeM notices.

=item 2.

Import the public keys of the NoCeM issuers you trust in order to check the
authenticity of their notices.  You can run the following command:

    gpg --no-default-keyring --allow-non-selfsigned-uid \
        --primary-keyring <pathetc>/pgp/ncmring.gpg --no-options \
        --no-permission-warning --batch --import <key-file>
    chmod 644 <pathetc>/pgp/ncmring.gpg

where <pathetc> is the value of the I<pathetc> parameter set in F<inn.conf>
and <key-file> the file containing the public key(s) to import.  The keyring
is located in F<< <pathetc>/pgp/ncmring.gpg >> by default; you only have
to create the directory F<< <pathetc>/pgp >> before using B<gpg> (it will
automatically generate the F<ncmring.gpg> file) and make sure the news user
can read this file, once generated.  You can use another location and file
name for the keyring, and then run B<perl-nocem> with the B<-k> flag.

The public keys of NoCeM issuers can be found in the L<web site of The NoCeM
Registry|https://rosalind.home.xs4all.nl/nocemreg/nocemreg.html> where you can
even download a unique file which contains all the public keys.

=item 3.

Create or update the F<nocem.ctl> configuration file in I<pathetc> to
indicate the NoCeM issuers and the types of notices you want to follow.
This permission file contains lines like:

    bleachbot@httrack.com:spam,site
    pgpmoose@killfile.org:pgpmoose-forged-moderation

This will remove all articles for which the issuer (first part of the line,
before the colon C<:>) has issued NoCeM notices of a type present in the
comma-separated list of types specified after the colon (using C<*> is
possible, and means that all types are accepted).

Blank lines and lines beginning with a hash sign (C<#>) are ignored.  Case is
insensitive.  Any entry with no corresponding public PGP key in the keyring
will be skipped.

You will also find information about the issuers on the web site of I<The
NoCeM Registry>.  Note that INN is shipped with an up-to-date F<nocem.ctl>
file already configured with the current NoCeM issuers.  (Only the public PGP
keys installed at the previous step are not included, so as to leave you the
choice of whom to trust, and download the most recent ones, in case they have
changed.)

=item 4.

Add to the F<newsfeeds> file in I<pathetc> an entry like this one in
order to feed B<perl-nocem> with the NoCeM messages (cross)posted to
news.lists.filters, the global newsgroup where notices should be sent:

    nocem!\
        :!*,news.lists.filters\
        :Tc,Wf,Ap:<pathbin>/perl-nocem

with the correct path to B<perl-nocem>, located in <pathbin>, and any optional
flag you want to use.  Then, run C<inncheck> to ensure the syntax of the
modified F<newsfeeds> file is correct, and reload it (via C<ctlinnd reload
newsfeeds 'NoCeM channel feed'>).

Note that you should at least carry news.lists.filters on your news
server (or other newsgroups where NoCeM notices are sent) if you wish
to process them.

=item 5.

Everything should now work.  However, do not hesitate to manually test
B<perl-nocem> with a NoCeM message, using either:

    grephistory '<Message-ID>' | perl-nocem -l -v 2
    echo '/path/to/a/nocem/message' | perl-nocem -l -v 2

B<perl-nocem> expects either storage tokens or file names on its standard
input (B<grephistory> returns the storage token of an article identified by
its Message-ID).

Check the logs of that test in F<perl-nocem.log> in I<pathlog>.

You can also check the list of installed PGP public keys with the following
command, adapted to the location of the NoCeM keyring:

    gpg --no-default-keyring --list-keys \
        --primary-keyring <pathetc>/pgp/ncmring.gpg

=back

=head1 OPTIONS

=over 4

=item B<-b> I<directory>

Back up cancelled articles into files in I<directory>.  The directory should
exist and be writable by the news user.  Files are named F<issuer%type>, and
contain the articles actually removed from your news spool by the given NoCeM
issuer for the given type of NoCeM notices.  Cancelled articles which are not
stored in your news spool when the notice is processed cannot be backed up.

This flag is primarily meant for auditing possible false positives.  As the
backups are not automatically purged, you should prune them from time to time
after having inspected their contents.

=item B<-c> I<filename>

By default, B<perl-nocem> reads a configuration file named F<nocem.ctl> in
I<pathetc>.  This flag permits reading another configuration file, specified
with an absolute path.

=item B<-g> I<command>

By default, B<perl-nocem> runs the gpg(1) binary whose path was determined
when INN was configured.  If GnuPG wasn't installed at that time, or if you
want to use another program, this flag will be helpful.  Please ensure that
at least an equivalent of the default options given to B<gpg> is present in
I<command>; otherwise, B<perl-nocem> may malfunction.

Assuming C</bin/gpg> corresponds to the B<gpg> binary, the default is:

    -g "/bin/gpg --status-fd=1 --verify --allow-weak-digest-algos"

=item B<-G> I<command>

By default, B<perl-nocem> runs the gpgv(1) binary whose path was determined
when INN was configured.  If GnuPG wasn't installed at that time, or if you
want to use another program, this flag will be helpful.  Please ensure that
at least an equivalent of the default option given to B<gpgv> is present in
I<command>; otherwise, B<perl-nocem> may malfunction.

Assuming C</bin/gpgv> corresponds to the B<gpgv> binary, the default is:

    -G "/bin/gpgv --status-fd=1"

Note that B<gpg> will be preferentially used over B<gpgv> when both are
available, unless B<-g> is given an empty string.

=item B<-h>

Print to standard output a usage message and exit.

=item B<-i> I<filename>

When this flag is used, B<perl-nocem> includes an external Perl script
named I<filename>, specified with an absolute path.  It permits loading a
C<local_want_cancel_id> function with local rules to fine-tune within a NoCeM
notice which articles get cancelled.  All the articles present in a NoCeM
notice are otherwise cancelled by default.

This function is called for every article in the notice and has access
to several variables: the Message-ID C<$msgid> of the NoCeM message, the
Message-ID C<$artid> of the article to cancel, the comma-separated list
of newsgroups C<$groups> to which the article to cancel was posted, the
hash reference C<$hdrs> to the pseudo header fields of the NoCeM notice,
in lowercase value.  The article is kept if the function returns C<0>, and
cancelled otherwise.

Here is an illustration of what this function can do, when defined in
I<filename> and the B<-i> flag is used:

    sub local_want_cancel_id {
        my ($msgid, $artid, $groups, $hdrs) = @_;

        my $carried = 0;

        # Walk through the newsgroups the article was posted to.
        foreach my $group (split(/,/, $groups)) {
            # Keep it if posted to news.software.nntp.
            return 0 if $group eq "news.software.nntp";

            # Keep it if posted to fr.* and the issuer is "john".
            return 0
              if $group =~ /^fr\./ and $hdrs->{issuer} eq "john";

            # The article has been posted to at least a newsgroup
            # in the fr.* or news.* hierarchy.  You may use here
            # a regular expression corresponding to the newsgroups
            # pattern you ask your feeds to send you.
            $carried = 1 if $group =~ /^(fr|news)\./;
        }

        # In case the server only carries fr.* and news.*, this
        # rule permits discarding NoCeM notices related to
        # newsgroups not carried by the server, and therefore
        # neither treating nor remembering in the history file
        # Message-IDs of articles which won't reach the server.
        return 0 if not $carried;

        # Keep it if only posted to news.admin.net-abuse.usenet
        # and the type of the notice is "spam".
        return 0
          if $groups eq "news.admin.net-abuse.usenet"
          and $hdrs->{type} eq "spam";

        # Cancel it!
        return 1;
    }

If any syntax problem occurs when Perl loads the function, B<perl-nocem> will
die and report the reason in the logs.  You'll then have to fix the function.

=item B<-k> I<keyring>

By default, B<perl-nocem> verifies the signatures of NoCeM messages
with the PGP public keys present in a keyring named F<ncmring.gpg> in the
I<pathetc>/pgp directory.  This flag permits using another keyring, specified
with an absolute path.

A B<--keyring=> flag with the I<keyring> value is then passed to the gpg(1)
and gpgv(1) commands unless I<keyring> is an empty string.

=item B<-l>

By default, B<perl-nocem> send logs to syslog(3).  In case the syslog facility
is not available or this flag is used, logs are sent to F<perl-nocem.log>
in I<pathlog>.  Error log level will still additionally be sent to syslog if
available.

=item B<-u>

By default, B<innreport> will show unprocessed NoCeM notices in daily
reports it generates.  When this flag is used, issuers or types of notices
not configured in F<nocem.ctl> will still be mentioned in the logs but the
corresponding log lines will not be parsed by B<innreport>.

=item B<-v> I<level>

Increase log verbosity to that I<level>, from 1 to 3.  Default is C<1>.

=back

=head1 FILES

=over 4

=item I<pathbin>/perl-nocem

The Perl script itself used to process NoCeM messages.

=item I<pathetc>/nocem.ctl

The configuration file which specifies the NoCeM notices to be processed.
Another file can be specified with the B<-c> flag.

=item I<pathetc>/pgp/ncmring.gpg

The keyring which contains the public keys of trusted NoCeM issuers.  Another
file can be specified with the B<-k> flag.

=item I<pathlog>/perl-nocem.log

The log file used when the syslog facility is not available or the B<-l> flag
is used.

=back

=head1 NOTES

The accuracy of the newsgroups following the Message-IDs to cancel in
the NoCeM body is not checked, nor is the Newsgroups pseudo header field
if present.  Well, as we already trust the issuer of the notice about the
Message-IDs he marks as spam, let's also be confident about the listed
newsgroups.

=head1 HISTORY

Copyright 2000 by Miquel van Smoorenburg <miquels@cistron.nl>.

Copyright 2001 by Marco d'Itri <md@linux.it>.

=head1 SEE ALSO

gpg(1), gpgv(1), grephistory(1), inn.conf(5), innreport(8), newsfeeds(5),
pgp(1).

=cut
