SOURCES: cleanfeed.8 (NEW) - taken from old sources

blues blues at pld-linux.org
Tue Aug 12 12:27:12 CEST 2008


Author: blues                        Date: Tue Aug 12 10:27:12 2008 GMT
Module: SOURCES                       Tag: HEAD
---- Log message:
- taken from old sources

---- Files affected:
SOURCES:
   cleanfeed.8 (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: SOURCES/cleanfeed.8
diff -u /dev/null SOURCES/cleanfeed.8:1.1
--- /dev/null	Tue Aug 12 12:27:13 2008
+++ SOURCES/cleanfeed.8	Tue Aug 12 12:27:07 2008
@@ -0,0 +1,1386 @@
+.rn '' }`
+''' $RCSfile$$Revision$$Date$
+'''
+''' $Log$
+''' Revision 1.1  2008/08/12 10:27:07  blues
+''' - taken from old sources
+'''
+'''
+.de Sh
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp
+.if t .sp .5v
+.if n .sp
+..
+.de Ip
+.br
+.ie \\n(.$>=3 .ne \\$3
+.el .ne 3
+.IP "\\$1" \\$2
+..
+.de Vb
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve
+.ft R
+
+.fi
+..
+'''
+'''
+'''     Set up \*(-- to give an unbreakable dash;
+'''     string Tr holds user defined translation string.
+'''     Bell System Logo is used as a dummy character.
+'''
+.tr \(*W-|\(bv\*(Tr
+.ie n \{\
+.ds -- \(*W-
+.ds PI pi
+.if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+.if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+.ds L" ""
+.ds R" ""
+'''   \*(M", \*(S", \*(N" and \*(T" are the equivalent of
+'''   \*(L" and \*(R", except that they are used on ".xx" lines,
+'''   such as .IP and .SH, which do another additional levels of
+'''   double-quote interpretation
+.ds M" """
+.ds S" """
+.ds N" """""
+.ds T" """""
+.ds L' '
+.ds R' '
+.ds M' '
+.ds S' '
+.ds N' '
+.ds T' '
+'br\}
+.el\{\
+.ds -- \(em\|
+.tr \*(Tr
+.ds L" ``
+.ds R" ''
+.ds M" ``
+.ds S" ''
+.ds N" ``
+.ds T" ''
+.ds L' `
+.ds R' '
+.ds M' `
+.ds S' '
+.ds N' `
+.ds T' '
+.ds PI \(*p
+'br\}
+.\"	If the F register is turned on, we'll generate
+.\"	index entries out stderr for the following things:
+.\"		TH	Title 
+.\"		SH	Header
+.\"		Sh	Subsection 
+.\"		Ip	Item
+.\"		X<>	Xref  (embedded
+.\"	Of course, you have to process the output yourself
+.\"	in some meaninful fashion.
+.if \nF \{
+.de IX
+.tm Index:\\$1\t\\n%\t"\\$2"
+..
+.nr % 0
+.rr F
+.\}
+.TH cleanfeed 8 "Version 0.95.7b" "26/Aug/98" "Cleanfeed - Because spam sucks"
+.UC
+.if n .hy 0
+.if n .na
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.de CQ          \" put $1 in typewriter font
+.ft CW
+'if n "\c
+'if t \\&\\$1\c
+'if n \\&\\$1\c
+'if n \&"
+\\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
+'.ft R
+..
+.\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
+.	\" AM - accent mark definitions
+.bd B 3
+.	\" fudge factors for nroff and troff
+.if n \{\
+.	ds #H 0
+.	ds #V .8m
+.	ds #F .3m
+.	ds #[ \f1
+.	ds #] \fP
+.\}
+.if t \{\
+.	ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+.	ds #V .6m
+.	ds #F 0
+.	ds #[ \&
+.	ds #] \&
+.\}
+.	\" simple accents for nroff and troff
+.if n \{\
+.	ds ' \&
+.	ds ` \&
+.	ds ^ \&
+.	ds , \&
+.	ds ~ ~
+.	ds ? ?
+.	ds ! !
+.	ds /
+.	ds q
+.\}
+.if t \{\
+.	ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+.	ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+.	ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+.	ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+.	ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+.	ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
+.	ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
+.	ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.	ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
+.\}
+.	\" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
+.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
+.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
+.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+.ds oe o\h'-(\w'o'u*4/10)'e
+.ds Oe O\h'-(\w'O'u*4/10)'E
+.	\" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+.	\" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+.	ds : e
+.	ds 8 ss
+.	ds v \h'-1'\o'\(aa\(ga'
+.	ds _ \h'-1'^
+.	ds . \h'-1'.
+.	ds 3 3
+.	ds o a
+.	ds d- d\h'-1'\(ga
+.	ds D- D\h'-1'\(hy
+.	ds th \o'bp'
+.	ds Th \o'LP'
+.	ds ae ae
+.	ds Ae AE
+.	ds oe oe
+.	ds Oe OE
+.\}
+.rm #[ #] #H #V #F C
+.SH "NAME"
+Cleanfeed \- spam filter for Usenet news servers
+.SH "SYNOPSIS"
+\fBINN:\fR Installed as \fBfilter_innd.pl\fR, location is configured into
+INN at compile time.
+.PP
+\fBHighwind servers:\fR <command line> \-program cleanfeed \-body
+.PP
+\fBNNTPRelay\fR: ExternalFilter=c:/perl/bin/perl.exe c:/news/cleanfeed.pl
+.SH "DESCRIPTION"
+A spam filter for Usenet servers.  \fBCleanfeed\fR blocks spam on the way
+into your server, before it is written to disk or propagated to outbound
+feeds.  It can also block binaries in non-binary newsgroups and includes
+several other features to keep your newsfeed clean.
+.PP
+Cleanfeed currently works with INN, Cyclone, Typhoon, Breeze, and
+NNTPRelay servers.  See my webpage (listed at the end of this document)
+for pointers to information about using Cleanfeed with CNews, Diablo,
+Collabra, or INN versions earlier than 1.5.1.
+.SH "USAGE"
+For all versions, place the \fIcleanfeed.conf\fR configuration file
+somewhere, then edit the Cleanfeed source file and change the
+\fB$config_dir\fR option at the top to point to the directory where
+the config file lives.
+.Ip "\fB\s-1INN\s0\fR" 4
+Install the filter file (called cleanfeed) as \fIfilter_innd.pl\fR, and
+cleanfeed.conf, in the location you specified in \fIconfig.data\fR (\s-1INN\s0
+1.7.2 and earlier) or when configuring \s-1INN\s0 2.x (usually the bin/filter
+directory under the installation root).  Make sure both files are readable
+by the news user.  Once in place, the filter is loaded with the command
+\fBctlinnd reload filter.perl meow\fR.  Filtering can be turned on with
+\fBctlinnd perl y\fR and turned off with \fBctlinnd perl n\fR.
+.Ip "\fBCyclone/Typhoon/Breeze\fR" 4
+Add the \fB\-program\fR <file> and \fB\-body\fR options to the \fIbin/start\fR
+script, where <file> is the location and name of the Cleanfeed
+program. Restart the server.  Cleanfeed will run as an external process
+(standalone mode).  \s-1IMPORTANT\s0: make sure both cleanfeed and cleanfeed.conf
+are readable by the news user!  Double-check the permissions as this is
+a fairly common mistake!
+.Ip "\fBNNTPRelay\fR" 4
+Find the ExternalFilter directive in \fIconfig.txt\fR and make it look like:
+.Sp
+ExternalFilter=c:/perl/bin/perl.exe c:/news/cleanfeed.pl
+.Sp
+Cleanfeed will run as an external process (standalone mode).
+.PP
+More detailed installation instructions are provided later in this
+document.
+.SH "CONFIGURATION OPTIONS"
+Configuration is accomplished by setting the various options in the
+\fIcleanfeed.conf\fR configuration file.  This file is evaluated as Perl
+code, so comments can be included in the usual Perl # syntax.  A
+sample default file is included with the distribution.
+.PP
+If you would rather not use \fIcleanfeed.conf\fR, you can set its
+location to \*(L"undef\*(R" in the source and edit the configuration
+variables directly in the source file.
+.PP
+\fIcleanfeed.conf\fR has two sections (which define perl hashes):
+\fB%config_local\fR and \fB%config_append\fR.  Entries in \fB%config_local\fR
+will override the default settings of the same name in the Cleanfeed
+source.  Entries in \fB%config_append\fR can be used to add to most of
+the default regular expressions, for items such as \fBbadguys\fR,
+\fBbin_allowed\fR, \fBpoison_groups\fR, etc.  Settings in \fB%config_append\fR
+for these items will be appended to the default regexps, seperated by
+\*(L"|\*(R" (or).
+.PP
+If you want to completely override the default regexps for these options,
+rather than just add to the defaults, you can add an entry for them into
+the \fB%config_local\fR section of \fIcleanfeed.conf\fR.
+.PP
+All of this is done quite blindly, so if you do anything odd, be careful.
+(Cleanfeed will remove the common mistake of including two \*(L"|\*(R" (or) signs
+in a row.)  All config options are exposed to \fB%config_local\fR, including
+any that may not be present in the sample file.  Only the defined list of
+options are exposed to \fB%config_append\fR.
+.PP
+Options that are on/off or yes/no should be set to 1 for on/yes, or 0
+for off/no.
+.PP
+First, you need to tell Cleanfeed which news server software you are
+using.  At the top of the file, set the appropriate variable to 1.  For
+INN, set \fB$inn\fR; for Cyclone, Typhoon, or Breeze, set \fB$highwind\fR; and
+for NNTPRelay, set \fB$nntprelay\fR.  Ensure the other two (the ones you're
+not using) are set to 0.
+.Sh "\fBGeneral Settings\fR"
+.Ip "\fBaggressive\fR" 4
+Set this to 0 to disable all content-based filters.  Helpful to please
+paranoid lawyers, or paranoid customers.
+.Ip "\fBactive_file\fR" 8
+Set this to the full path to an active file, to allow Cleanfeed to know
+what groups are moderated.  This is normally your server's active file,
+but it doesn't have to be; it is possible, for example, to run Cyclone
+with no active file, but give one to Cleanfeed anyway.
+.Sh "\fB\s-1MD5\s0 Body Filter Settings\fR"
+.Ip "\fBdo_md5\fR" 8
+When turned on, the \s-1MD5\s0 \s-1EMP\s0 checks will be done.  This should be left
+on unless you have a really good reason to turn it off.  If you're
+running Hippo along with Cleanfeed, you might feel Cleanfeed's \s-1MD5\s0
+checks are redundant and want to turn them off, for example.  It
+would probably be better to leave it on with the history turned
+down, instead.
+.Ip "\fBmd5maxmultiposts\fR" 8
+Start rejecting articles after we have seen this many copies, according
+to the \s-1MD5\s0 checksum filter.
+.Ip "\fBMD5History\fR" 8
+How many articles to remember for \s-1MD5-\s0based \s-1EMP\s0 comparison.  Since the \s-1MD5\s0
+filter is not prone to false positives, setting this higher is a good idea
+to catch more spam, if you have the \s-1RAM\s0 to spare.
+.Ip "\fBMD5maxlife\fR" 8
+When a spam is identified by the \s-1MD5\s0 \s-1EMP\s0 filter, it is saved for continual
+rejection. \fBMD5maxlife\fR specifies how long, in hours, to keep a saved
+\s-1MD5\s0 id which is no longer getting any hits.  (A spam id which is still
+getting matches will be saved regardless of age.)  24 hours works well.
+.Ip "\fBfuzzy_md5\fR" 8
+When turned on, the message bodies will be munged up a bit before \s-1MD5\s0
+checksums are generated.  Whitespace and other non-alphanumeric
+characters are stripped and letters are forced to lowercase, as well
+as a couple other bits of treachery to try to defeat the \*(L"hashbuster\*(R"
+spam-bots.  This adds a bit of \*(L"fuzziness\*(R" to the \s-1MD5\s0 filter, and
+results in a performance hit as well.
+.Sp
+Since the smarter spammers have discovered hashbusting, I recommend
+that this be turned on.
+.Ip "\fBfuzzy_max_length\fR" 8
+Sets the maximum amount of lines for an article body to be subject to
+the \fBfuzzy_md5\fR munging (above).  This keeps extremely large articles
+out of those nasty regular expressions.
+.Ip "\fBmd5_skips_followups\fR" 8
+Determines whether the \s-1MD5\s0 filter checks articles with References
+headers.  The default is to skip them.  Setting this option to 0
+will result in all articles passing through the \s-1MD5\s0 filter, which
+can result in a major performance hit, but does close another hole
+in the filter.  If you turn this off, you should increase \fBMD5history\fR
+as well to avoid shortening your \*(L"window\*(R".
+.Ip "\fBMD5HistSize\fR" 8
+The maximum allowed size of the \s-1EMP\s0 memory for the \s-1MD5-\s0checksum \s-1EMP\s0 filter.
+Use this as a \*(L"sanity check\*(R" to prevent a sudden burst of spam from eating
+up all of your memory.  It should be set high enough so that you normally
+never hit this number; use the \fBMD5MaxLife\fR to expire the hash instead.
+.Sh "\fBHeader-Based \s-1EMP\s0 Filter Settings\fR"
+.Ip "\fBdo_phl\fR" 8
+Turns on the \s-1NNTP\s0\-Posting-Host/Lines \s-1EMP\s0 filter.  This filter identifies
+spam by identical posting-host headers and article sizes in a short period
+of time.  You really don't want to turn this off.
+.Ip "\fBdo_fsl\fR" 8
+Turns on the From/Subject/Lines \s-1EMP\s0 filter.  This filter identifies spam
+by identical From and Subject headers and article sizes in a short period
+of time.  This is the one that gets the least number of hits these days,
+so you won't lose much by shutting it off.
+.Ip "\fBmaxmultiposts\fR" 8
+Start rejecting articles after we have seen this many copies, according
+to the header-based \s-1EMP\s0 filter.  Since false positives are somewhat more
+likely with this filter than with \s-1MD5\s0, this should be set appropriately
+higher to reduce the odds.
+.Ip "\fBArticleHistory\fR" 8
+How many ids to remember for header-based \s-1EMP\s0 comparison.  Setting this
+higher will catch more spam because there will be a larger \*(L"window\*(R" to
+look at.  Larger settings will also consume more memory and have a (small)
+impact on performance, as well as slightly increase the chance of a false
+positive (since the sample size will be larger).  Most articles will
+actually take up two entries in this history because there are two
+different header-based filters.
+.Ip "\fBEMPmaxlife\fR" 8
+Same as \fBMD5maxlife\fR but for the header-based \s-1EMP\s0 filter.
+.Ip "\fBEMPHistSize\fR" 8
+Same as \fBMD5HistSize\fR but for the header-based \s-1EMP\s0 filter.  If you are
+running the header-based filter but not the \s-1MD5\s0 filter for whatever
+reason, set this high.
+.Sh "\fBExcessive Crosspost Settings\fR"
+.Ip "\fBmaxgroups\fR" 8
+Reject articles crossposted so that followups will be to more than
+this many newsgroups.
+.Ip "\fBlow_xpost_maxgroups\fR" 8
+Specify a special, lower crosspost limit for certain groups, specifed
+by regular expression in \fBlow_xpost_groups\fR (below).  Useful for being
+more strict in groups plagued by crossposting, such as sex, binaries,
+and jobs groups.  (Replaces the old \fBtfjmaxgroups\fR option.)
+.Sh "\fBMisplaced Binaries Filter\fR"
+.Ip "\fBblock_binaries\fR" 8
+Enables blocking of binary posts in non-binary newsgroups.  Which newsgroups
+allow binaries is configured with \fBbin_allowed\fR (below).
+.Ip "\fBmax_encoded_lines\fR" 8
+Sets the number of uuencoded or base64-encoded lines to allow before
+considering a post to be a binary.  This should be set high enough to pass
+regular \s-1PGP\s0 signatures.  (Those satanic Netscape crypto-sigs can die along
+with the other binaries.)  Default is 15 lines, which may be a little low if
+you are lenient, which you're not.
+.Ip "\fBbinaries_in_mod_groups\fR" 8
+If set, binaries are allowed in spite of \fBblock_binaries\fR if they are
+posted only to moderated groups (requires \fBactive_file\fR).
+.Sh "\fB\s-1HTML\s0\fR"
+.Ip "\fBblock_mime_html\fR" 8
+Enables blocking of \s-1MIME\s0\-encapsulated \s-1HTML\s0 posts.  This does \s-1NOT\s0 affect
+straight text/html or multipart/alternative posts of the type created by
+misconfigured Netscape and Microsoft \*(L"newsreaders\*(R", but \s-1ONLY\s0 posts which
+are \s-1MIME\s0\-encapsulated \s-1HTML\s0, a favorite format of sex spammers which
+often sneaks in under the \s-1EMP\s0 radar.
+.Ip "\fBblock_html\fR" 8
+Enables blocking of \s-1HTML\s0 and multipart/alternative posts.  You can specify
+group patterns where \s-1HTML\s0 is allowed by setting html_allowed (below).
+.Sh "\fBCancel Message Filtering\fR"
+.Ip "\fBblock_late_cancels\fR" 8
+If turned on, cancels for recently rejected articles will be rejected.
+Set the window with \fBMIDmaxlife\fR (below).  This will result in a
+\fIhuge\fR number of rejections if you have multiple full feeds and you
+aren't backlogging.  If you are concerned about your downstream sites
+receiving the cancels, leave this off. If you need a performance boost,
+turn it on.
+.Ip "\fBMIDmaxlife\fR" 8
+How long to remember rejected message-ids so cancels for these posts can
+later be rejected.  Specified in hours.  This only has an effect if
+\fBblock_late_cancels\fR is enabled (above).
+.Sh "\fBDisabling Other Filters\fR"
+.Ip "\fBdo_scoring_filter\fR" 8
+Enables the (new) \*(L"scoring\*(R" filter.  You probably want to leave this on,
+even if you need to turn of \fBaggressive\fR mode (turning off \fBaggressive\fR
+mode will disable the content-based parts of the scoring filter).
+.Ip "\fBdo_mid_filter\fR (\s-1INN\s0 only)" 8
+Enables the message-id filter.  This requires an additional patch to
+\s-1INN\s0 1.7.2, which is included with Cleanfeed (but optional).  The patch
+adds a new Perl hook to check message-id's during the \s-1NNTP\s0 \s-1CHECK\s0
+transaction, and decide whether to refuse the article.  There is a
+patch for this for \s-1INN\s0 2.0 which may get incorporated into the \s-1INN\s0
+distribution at some point.  The default is off.
+.Ip "\fBdo_bot_checks\fR" 8
+Enables the filters that check for spam bot signatures.  The only reason
+you would ever want to turn this off is if you've written your own
+version, or something.  Otherwise, leave it on.
+.Ip "\fBdo_supersedes_filter\fR" 8
+Enables the Excessive Supersedes filter, to catch rogue Supersedes
+attacks.  This filter begins dropping articles with Supersedes headers
+if too many appear from the same posting-host in a short time.  Moderated
+groups are given a higher limit (if \fBactive_file\fR is set), as is
+news.answers.  Default is on.
+.Ip "\fBcheck_supersedes_path\fR" 8
+If set, \fBbad_cancel_paths\fR will also be applied to Supersedes articles.
+Articles with Supersedes headers, where a path element matches the regexp
+in \fBbad_cancel_paths\fR, will be dropped.  Default is on.
+.Ip "\fBdrop_useless_controls\fR" 8
+If set, all control messages of types sendsys, senduuname, and version
+will be dropped.  These are no longer useful and are a hole for
+denial-of-service attacks due to the way \s-1INN\s0 and some other servers
+handle them.  On by default.
+.Ip "\fBdrop_ihave_sendme\fR" 8
+If set, control messages of types ihave and sendme will be dropped.
+See \fBdrop_useless_controls\fR.  If you use these types of control messages,
+turn this off.  If you're not sure, then you're not using them.
+.Ip "\fBdrop_control_with_supersedes\fR" 8
+Drops any and all control messages which contain a Supersedes header.
+Since control messages are not passed through the same filters as regular
+messages, a rogue Supersedes attack can use control messages to avoid
+filtering; this option closes this hole.  Legitimate control messages
+don't have Supersedes headers.  On by default.
+.Sh "\fBHash-Trimming\fR"
+.Ip "\fBtrimcycles\fR" 8
+The \s-1EMP\s0 memories are trimmed every \fBtrimcycles\fR times through the filter.
+.Ip "\fBEMPstarttrimming\fR" 8
+Tells the filter not to waste time trimming the \s-1EMP\s0 memories until they
+have this many entries.  Just a minor performance enhancement during
+the first hours the filter is running or when you first start \fBinnd\fR.
+.Sh "\fBLogging\fR"
+.Ip "\fBverbose\fR" 8
+When turned on, verbose logging to news.notice will happen; spam domains
+will be listed, etc.  When off, only general messages will be logged,
+making the news.daily summaries less interesting but much shorter and
+more to the point.  (There is, alas, no way to shut off news.notice
+logging entirely.)  (news.notice only applies to \s-1INN\s0.)  Note that this
+will not reduce the number of log entries, but only their verbosity.
+.Ip "\fBlogfile\fR (Standalone Mode)" 8
+If set to the path to a file, this will enable logging of message-ids
+of all articles processed by the filter.  Rejections will be logged
+with the reason for rejection.  Note that this will create a very large
+logfile which you will need to rotate or delete (see \fBmax_log_size\fR,
+below).
+.Ip "\fBreportfile\fR (Standalone Mode)" 8
+If set to the path to a file, this will enable generation of a simple
+report of articles accepted and rejected.  The report file will contain
+one entry per line with the start time, end time, number of articles
+accepted, and number of articles rejected, tab-separated.
+.Ip "\fBlog_accepts\fR (Standalone Mode)" 8
+When using the above logfiles, this setting determines whether articles
+accepted should be logged.  When disabled, only rejections will be logged.
+.Ip "\fBmax_log_size\fR (Standalone Mode)" 8
+The size at which to rotate the \fBlogfile\fR.  This will be replaced by
+time-based rotation at some point.
+.Ip "\fBstatfile\fR" 8
+If this is set to the full path of a file, a crude stats file will be
+written each time the filter is reloaded with \fBctlinnd reload
+filter.perl meow\fR (for \s-1INN\s0) or whenever the Cleanfeed process receives a
+\s-1SIGUSR1\s0 (for standalone mode).  The file shows how many entries are
+present in each of the \s-1EMP\s0 histories, \s-1MID\s0 history and excessive
+supersedes history; timer information if enabled (see \fBtimer_info\fR);
+and the contents of all configuration settings.  Posting-hosts in for
+each supersedes entry will be listed, along with their counts; these
+are not being rejected unless they are over the threshold.  The
+default for this is undef, which disables creation of the stat file.
+.Sp
+More comprehensive stats are planned for the future.
+.Sh "\fBTiming Info\fR"
+.Ip "\fBtimer_info\fR" 8
+When enabled, Cleanfeed will generate timing statistics telling you
+how many articles per second are being examined by the filter and
+being accepted by the filter.  This information will appear in the
+statfile if this is enabled, and in the output of \s-1INN\s0's \fBctlinnd mode\fR
+if the \fImode.patch\fR is applied to \s-1INN\s0.  Note that the accepted/second
+rate is not necessarily the rate at which your server is accepting
+articles; articles can be rejected by the server after Cleanfeed
+passes them, for example if they are posted to groups not in your
+active file.
+.Ip "\fBtimer_interval\fR" 8
+The period over which to average timing information, in seconds.  The
+default is 600 seconds, or 5 minutes.
+.Sh "\fBDebugging\fR"
+.Ip "\fBdebug_batch_directory\fR" 8
+Specifies a directory where debugging \*(L"batchfiles\*(R" can be written.
+See the Hacker's Guide in this document for more information.
+.Ip "\fBdebug_batch_size\fR" 8
+The maximum size of a debugging batchfile before it gets rotated.
+Rotation is done by renaming the file to file.1, file.2, etc.,
+using the lowest number that doesn't already exist.
+.Sh "\fBRegular Expressions\fR"
+You can add to most of these regular expressions in the \fB%config_append\fR
+section of \fIcleanfeed.conf\fR; settings you add there will be added to
+the defaults, rather than overriding them.  If you want to completely
+override the default settings you can add entries for these to the
+\fB%config_local\fR section instead.
+.Ip "\fBbin_allowed\fR" 8
+This is a regular expression telling the anti-binary filter in which
+newsgroups binaries are allowed.  If all groups in the Newsgroups header
+match this pattern, binaries are allowed through the filter.  (This
+obviously has no effect when the binary filter is disabled.)  If the
+binary filter is enabled and this is set to a null string (by overriding
+the default in the local config) the result will be blocking all binaries
+regardless of where they are posted.
+.Ip "\fBpoison_groups\fR" 8
+If any groups in the Newsgroups header match this regexp, the article
+will be rejected.  Thus you can reject crossposts to certain groups even
+if they are also posted to groups you carry.
+.Ip "\fBhtml_allowed\fR" 8
+This is a regular expression telling the anti-\s-1HTML\s0 filter in which
+newsgroups \s-1HTML\s0 and multipart/alternative posts are allowed.  This
+only has an effect if \fBblock_html\fR is turned on (above).  The default
+(to allow \s-1HTML\s0 in microsoft.* groups) can be added to in \fIcleanfeed.conf\fR.
+.Sp
+If you don't want to allow \s-1HTML\s0 anywhere, not even the microsoft.*
+groups, override this setting in the local configuration and set it
+to a null string or undef.
+.Ip "\fBmd5exclude\fR" 8
+If an article is posted only to groups matching this regexp, the \s-1MD5\s0 \s-1EMP\s0
+filter will not be applied.  Useful for \*(L"test\*(R" groups where it's okay
+for lots of the posts to be the same.
+.Ip "\fBallexclude\fR" 8
+If an article is posted only to groups matching this regexp, \s-1NO\s0 checks
+are applied at all.
+.Ip "\fBlow_xpost_groups\fR" 8
+If a group matches this regular expression, it gets a special crosspost
+limit, set in \fBlow_xpost_maxgroups\fR, rather than the general crosspost
+limit set in \fBmaxgroups\fR.  This is useful for groups plagued by excessive
+crossposting, such as sex, binaries, and jobs groups.  The default is
+to limit crossposts to 6 groups in test, forsale, and jobs groups.
+Setting this to a null string, or undef, will disable this feature.
+.Ip "\fBbadguys\fR" 8
+This is a monster regular expression containing domains of known spammers.
+Only the \*(L"middle\*(R" part of the domains are listed; these are checked as
+email addresses in From headers by appending a list of top-level domains
+to the end, and as URLs by prepending http:// and an optional \*(L"www.\*(R".  If
+you modify this list, be \fIvery\fR careful not to end up with \*(L"||\*(R" in there
+(two \*(L"or\*(R" signs in a row); this will match every single post that comes
+through, which is Bad.
+.Ip "\fBbaddomainpat\fR" 8
+If a post contains a \s-1URL\s0 for a site whose domain name matches this
+pattern (in .com, .net, and .nu TLDs only) the post will be rejected.
+For example, there are hundreds of spamming porn sites whose domain names
+begin or end with \*(L"xxx\*(R".  This prevents us from having to keep up with
+their nonsense.  Yes, it's a little aggressive, but it works.
+.Ip "\fBexempt\fR" 8
+Regular expression of \s-1NNTP\s0\-Posting-Hosts that are exempt from the
+posting-host-based \s-1EMP\s0 filter.  This is for high-output systems where
+all posts contain the same \s-1NNTP\s0\-Posting-Host header, such as \s-1AOL\s0, which
+if not exempted would end up hitting the posting-host \s-1EMP\s0 filter with
+all of their posts.  There aren't many of these out there; a \*(L"regular\*(R"
+multi-user system does not present a problem because the filter doesn't
+kick in until it sees a large number of posts from the same posting-host
+and also of the same length, in a short period of time.
+.Ip "\fBsupersedes_exempt\fR" 8
+Regular expression of \s-1NNTP\s0\-Posting-Hosts that are exempt from the
+excessive supersedes filter.  Generally this will be systems which
+post a lot of FAQs.
+.Ip "\fBbad_cancel_paths\fR" 8
+Cancel messages will be rejected if the Path header contains elements
+matching this regular expression.  Also applied to the \s-1NNTP\s0\-Posting-Host.
+If \fBcheck_supersedes_path\fR is set, this will also be checked against
+the Path header of articles with Supersedes headers.  This list contains
+sites which are or have recently been the source of rogue cancel attacks.
+.Ip "\fBrefuse_messageids\fR (\s-1INN\s0 only)" 8
+If you have \fBdo_mid_filter\fR (above) enabled, and you have the optional
+message-id patch applied to \s-1INN\s0 (or otherwise have obtained the hook
+for filter_messageid in \s-1INN\s0 2.0), this regular expression will be applied
<<Diff was trimmed, longer than 597 lines>>


More information about the pld-cvs-commit mailing list