#! /bin/sh
# check-spool -- find orphaned news articles (those without history entries)
# Author: tale@isc.org (David Lawrence)

# This script works by making two lists in TMPDIR.
# First the articles (and other random files, like core files) are
#     listed in SPOOL.LIST.
# Then the files that the history file knows about are listed in HIST.LIST.
# Then the two lists are compared, with SPOOL.LIST files that are not in
#     HIST.LIST saved to ORPHAHS.LIST and also listed on the standard output.
# HIST.LIST and COMM.LIST are removed, but ORPHANS.LIST is left behind
# for further use.

#
# Note that because no locking is done, and the system can keep receiving
# and expiring news the whole time the script is running, it is possible
# that the output will include files that existed when SPOOL.LIST was
# made but no longer existed when HIST.LIST was made because an intervening
# expire ran.  Since most people run a script like this to look for stray
# files to remove, these false positives aren't of any consequence.
#
# Because the find is done before the history scan, there should never be
# articles reported which are really valid, having arrived while the script
# was running.

### INN set up.
##  =()<. @<_PATH_SHELLVARS>@>()=
. /var/news/innshellvars

### Uncomment for C News set up (and comment the innshellvars line above)
## =()<#NEWSCONFIG=${NEWSCONFIG-@<NEWSCONFIG>@}>()=
#NEWSCONFIG=${NEWSCONFIG-/var/news/bin/config}
#SPOOL=$NEWSARTS
#TMPDIR=${TMPDIR-/var/tmp}

cd $SPOOL

# List files of all subdirectories of the spool
# EXCEPT any top level directory that has a dot in its name, or lost+found.
# In the output, ignore any .overview files.  (Ideally you should have
# .overview files some place other than the article tree.)

find `ls | egrep -v '\.|lost+found'` -type f -print |
   egrep -v '\.overview' > $TMPDIR/SPOOL.LIST

cd $TMPDIR
sort -o SPOOL.LIST SPOOL.LIST

# Search the history file to see which articles it knows about.
# Gawk is using a field separator of a hard tab.
# The tr command is translating dot to slash and space to newline.
gawk -F'	' '$3 != "" {print $3}' /news/lib/history | tr '. ' '/
' | sort > HIST.LIST

comm -13 HIST.LIST SPOOL.LIST | tee ORPHANS.LIST

rm -f HIST.LIST SPOOL.LIST

exit 0