#!/usr/local/bin/perl
'di';
'ig00';
#
#
# dburst - burst digests conforming to RFC 1153, 934, or 1521 into
#         an mbox file, and run a mail reader on that mailbox.
#         While we're at it, we'll read in a killfile and use that
#         to filter messages out of the mbox file.
#
# This program is its own manual page. Install a link from it to
# /usr/man/man1/dburst.1 or whereever seems right. It will run
# under perl4.036 or perl5
#
# By Alan Schwartz (c) 1996
#
# Usage: dburst < digest
#        or, from mailers, |dburst
#
require 'getopts.pl';

$mailer = $ENV{'MAILER'} ? $ENV{'MAILER'} : 'elm';
$mailer .= ' -f';

&Getopts("f:o:");

# Load the kill file
&loadkillfile($opt_f);

# Create the output file
if ($opt_o eq "-") {
  open(TMP,">&STDOUT");
} else {
  $tmpfile = $opt_o ? $opt_o : "/tmp/burst.$$";
  die "Unable to create $tmpfile\n" unless open(TMP,">$tmpfile");
}

# Read the message headers and preamble and try to figure out what kind of
# digest this is.
# 1. If there's a MIME multipart/digest Content-Type, it's RFC 1521
# 2. If the subject header is "listname digest" and a number, and 
#    there's a line of 70 hyphens, it's probably RFC 1153
# 3. Otherwise, let's assume it's RFC 934
#
while (<STDIN>) {
  if (/^\s+\S/) {
   # Continuation line
   $headers[$#headers] .= " $_";
  } else {
   push(@headers,$_);
  }
  print TMP;
  last if /^$/;
}
foreach (@headers) {
  if (/^Content-Type:\s*multipart\/digest;\s*boundary="(.*)"/i) {
    $rfc = 1521; 
    $boundary = "--$1(--)?";
    last;
  }
  if (/^Subject:/) {
    $rfc = 934 unless /digest.*\d/i;
  }
  # Listproc uses 1153, as does L-Soft LISTSERV
  $rfc = 1153 if (/^X-Listprocessor/ || /^X-LSV-ListID/);
    
}

# MIME is easy, just do it.
if ($rfc != 1521) {

# Now, we'd still like to know if it's really rfc 934 or 1153,
# but they can be made very similar to one another, and often are,
# so we'll do the best we can - if they're similar enough, it won't
# matter anyway.
# If 934, the first message boundary will begin with "-\S"
#  If we see "- -\S", that's probably 934, too, since it escapes like that.
# If 1153, the first boundary will be 70 hyphens, period.
# So find the first line beginning with a hyphen:
#  (a) if it's a row of 70 hyphens, 1153 for sure
#  (b) if it's a row of any other number of hyphens, 934 for sure
#  (d) if it's anything else, well, guess 934.
#
# When you retrieve archives from a LISTSERV, it starts them and separates
# them with a huge row of equal signs, so let's look for that, too.
#
$seventyhyphens = "-" x 70;
while (<STDIN>) {
  print TMP;
  if (/^$seventyhyphens$/o) {
   $rfc = 1153;
   $boundary = "-" x 30;
   last;
  } elsif (/^-\S/) {
   $rfc = 934;
   chop($boundary = $_);
   last;
  } elsif (/^===============================/) {
   $rfc = "LISTSERV DIGEST";
   chop($boundary = $_);
   last;
  }
}   

if (!$rfc) {
  # Oh, drat. We couldn't figure it out. Let's cop out
  close(TMP);
  unlink($tmpfile) unless $opt_o;
  die "Unable to determine type of digest. Sorry.\n";
}
}

# Ok, we've done the preamble and we've got a $boundary set.
# Let's go through the message and split out the individual
# messages. We're making an assumption that RFC 934 boundaries
# don't change through the message - the RFC allows that, but 
# no one's foolish enough to build digests like that, right?
#
while (<STDIN>) { 
  next if /^$/;  # Skip blank lines at the beginning of the message
  undef(@headers);
  undef(@body);
  if (/^$boundary$/) {
    while (<STDIN>) {
      last unless /^$/;
    } 
  }
  if ($rfc == 1153 && /^Topic No\./) {
    # Topic number, skip it
    while (<STDIN>) {
      last unless /^$/;
    } 
  }
  if (/:/) {
    push(@headers,$_);
    # message header
    while (<STDIN>) {
      last if /^$/;
      push(@headers,$_);
    }
  } else {
    # This is probably the epilogue, which doesn't have headers
    push(@body,$_);
    print TMP "\n";
  }
  # message body
  while (<STDIN>) {
    last if /^$boundary$/o;
    s/^- -/-/ if $rfc == 934;
    push(@body,$_);
  }
  undef $action;
  if ($hdrprog) {
    eval $hdrprog;
    die $@ if $@;
    next if $action eq "k";
  }
  if ($bodyprog) { 
    eval $bodyprog;
    die $@ if $@;
    next if $action eq "k";
  }
  push(@headers,substr($action,1,length($action))."\n") if $action =~ /^\+/;
  &dummyheader;  # Put in an SMTP From
  print TMP @headers;
  print TMP "\n";
  print TMP @body;
}

# Well, I think we're done. Let's run the mailer
close(TMP);
exec ("$mailer $tmpfile <&2 ; /bin/rm -f $tmpfile") unless $opt_o;


# The kill file format:
# Lines beginning with #'s are comments
# Other lines are kill directives in the format:
# /regexp/location:action
# location may be "h" (headers), "b" (body) or "a" (all). If not given,
# defaults to "h"
# action may be "j" or "k" (junk/kill) or "+" (set urgent status)
#
# We build an eval'able program for searching in headers and body
# from this.
#
sub loadkillfile {
  $killfile = $_[0] ? $_[0] : "$ENV{'HOME'}/.burstrc";
  return unless open(KILL,$killfile);
  $actions{"k"}++;
  $actions{"+"}++;
  while (<KILL>) {
    next if /^#/ || /^$/;
    if (m#/(.*)/(.)?:(.+)#) {
      $pat = $1; $loc = $2; $act = $3;
      $loc = "h" unless $loc;
      $act = "k" if $act =~ /^kj/i;
      warn "Unknown action $act for /$pat/\n", next 
         unless $actions{substr($act,0,1)};
      if ($loc =~ /[ba]/i) {
	$bodyprog .= "\$action = \'$act\', last PROG if /$pat/;\n";
      }
      if ($loc =~ /[ha]/i) {
	$hdrprog .= "\$action = \'$act\', last PROG if /$pat/;\n";
      }
    } else {
      warn "Unknown killfile directive: $_";
    }
  } 
  close (KILL);
  $hdrprog = "PROG: foreach (\@headers) { $hdrprog }" if $hdrprog; 
  $bodyprog = "PROG: foreach (\@body) { $bodyprog }" if $bodyprog; 
}


# A fake SMTP header
sub dummyheader {
  print TMP "From dummy Wed Feb  29 12:12:12 1990\n";
}


##############################################################################

	# These next few lines are legal in both Perl and nroff.

.00;			# finish .ig
 
'di			\" finish diversion--previous line must be blank
.nr nl 0-1		\" fake up transition to first page again
.nr % 0			\" start at page 1
'; __END__ ############# From here on it's a standard manual page ############
.TH DBURST 1 "April 25, 1996"
.AT 3
.SH NAME
dburst \- burst a mail digest, filter messages, and read as a mailbox
.SH SYNOPSIS
.B dburst [-f killfile] [-o filename]
.SH DESCRIPTION
.I dburst
accepts a mail digest as standard input, and bursts it into a 
temporary mailbox file. It then runs \fIelm\fP (or the program
in the user's MAILER environment variable) on the temporary mailbox.
.I dburst
can recognize digests formatted according to RFC 1153, RFC 934, or
RFC 1521. A convenient way to use \fIdburst\fP from, say, \fIelm\fP(1),
is to simply pipe the digest to \fIdburst\fP.
.LP
The "-o filename" argument bursts the digest into the specified filename,
and does not run the mailer on the file. A filename of "-" refers to
standard output, so \fIdburst\fB can be used as a pipe.
.SH KILLFILES
In addition to bursting digests, \fIdburst\fP supports a "killfile"
which instructs the program to do special processing on messages in 
the digest which match a \fIperl\fP(1) regular expression. The killfile
can be given on the command line using the \fB-f\fP argument; it defaults
to the file \fB.burstrc\fP in the user's home directory.
.LP
Blank lines and lines beginning with "#" in the killfile are ignored.
Other lines should in in this format:
.LP
/\fIpattern\fB/\fIlocation\fP:\fIaction\fP
.LP
The \fIpattern\fB can be any regular expression. Matching is
case-sensitive. The \fIlocation\fP can 
be "h" (match in the headers of the messages), "b" (match in the body
of the messages), or "a" (match in both header and body). If \fIlocation\fP
is omitted, "h" is assumed.
.LP
Two \fIaction\fPs are currently supported: killing messages and adding headers
to messages. To kill a message so that it will not appear in the temporary
mailbox, use "k" or "j" for \fIaction\fP. To add a header to matching
messages, use "+Header: Value" for \fIaction\fP.
.SH KILLFILE EXAMPLE
.nf
# A sample killfile might look like this:

# I don't want to read messages from thatguy@thatplace
/From:.*thatguy@thatplace/:k

# I don't want to read any messages that have the word 
# "snugglebunnies" anywhere in the headers or body.
/[Ss]nugglebunnies/a:k

# I'd like messages that contain myname to appear as "Urgent"
# in elm:
/myname/a:+Priority: U
.fi
.SH ENVIRONMENT
The MAILER environment variable gives the name of the program that
should be run on the temporary mailbox. The program must accept
the "-f mailbox" switch. Elm, pine, and BSD mail work fine.
.SH FILES
$HOME/.burstrc        Default killfile location
.SH AUTHOR
Alan Schwartz
.SH "SEE ALSO"
Internet RFC's 934, 1153, 1521
.br
elm(1), perl(1)
.ex