########### Sample BOGOFILTER Configuration File ########################### # $Id: bogofilter.cf.example 6811 2009-02-21 20:32:50Z relson $ # Default settings (as defined in the bogofilter source code) # have a single hash mark at the beginning of the line. # Alternate values have two hash marks. # Comment lines MUST have their hash mark in the leftmost column. # Comments can be added at the end of any line (after whitespace and a '#'). # Blank lines are allowed. ########### General Settings ######################################## #### BOGOFILTER_DIR # # directory for wordlists # #bogofilter_dir=~/.bogofilter bogofilter_dir=/var/spool/bogofilter #### name/location of user config file # #user_config_file=~/.bogofilter.cf ##user_config_file=~/.bogofilterrc user_config_file=~/.bogofilter/config #### TRANSACTIONS: enable/disable database transactions # # boolean indicating whether transactions # should be enabled (yes) or disabled (no) # db_transaction=no # default ##db_transaction=yes # (alternate) #### WORDLIST: define additional word lists # # char type: 'r' (regular) or 'i' (ignore) # char *name: name of list, e.g. "system", "user", "ignore" # char *path: absolute path to file or # file name (relative to bogofilter_dir) # int order - once found, skip higher numbered lists # ##wordlist i,ignore,~/ignorelist.db,1 ##wordlist r,wordlist,~/wordlist.db,2 #### SPAM_HEADER_NAME # # used in reporting spamicity and # in removing already existing headers # spam_header_name=X-Bogosity #### SPAM_HEADER_PLACE # # used in placing the SPAM_HEADER_NAME line # #spam_header_place=DomainKey-Signature #### SPAM_SUBJECT_TAG # # tag added to "Subject: " line for identifying spam or unsure # default is to add nothing. # ##spam_subject_tag=***SPAM*** ##unsure_subject_tag=???UNSURE??? #### STATS_IN_HEADER # # non-zero (default): put spamicity info in message header # zero: put spamicity info in message body # can use "bool" values of True, False, Yes, No, 1, or 0 # stats_in_header=Yes # default ##stats_in_header=No # (alternate) #### DB_CACHESIZE # # non-zero: set this as DB cache size (in Mbytes) # zero: use DB default cache size (.25 Mbyte in 4.0.14) # # note that Berkeley DB increases any buffer size below 500 MB # by 25%! # This helps most when doing massive changes to the data base that # involve a lot of overwrites, such as registering mail boxes, # whereas it is mostly a waste of memory for read-only # applications such as scoring. # WARNING: If you set this too large, bogofilter will fail. # db_cachesize=0 # default ##db_cachesize=16 # (alternate) #### DB_LOG_AUTOREMOVE # # boolean indicating whether auto-removing of # logs should be enabled (yes) or disabled (no) # #db_log_autoremove=yes # default ##db_log_autoremove=no # (alternate) #### TIMESTAMP # # enables or disables token timestamps # timestamp=Yes #### Format of spamicity output # # for two-state output the third entry is not needed and not used # spamicity_tags = Spam, Ham, Unsure spamicity_formats = %0.6f, %0.6f, %0.6f # ##spamicity_tags = Yes, No, Unsure ##spamicity_formats = %0.6f, %0.6f, %0.6f #### Format of SPAM_HEADER # # formatting characters: # # h - spam_header_name, e.g. "X-Bogosity" # # c - classification, e.g. Yes/No, Spam/Ham/Unsure, +/-/? # # D - date, fixed ISO-8601 format for Universal Time ("GMT") # # e - spamicity as 'e' format # f - spamicity as 'f' format # g - spamicity as 'g' format # # A - IP address (from first Received: statement having one) # Not guaranteed to be the originating address of the message. # I - Message ID # Q - Queue ID (from first id tag found in Received: headers) # # l - logging tag (from '-l' option) # # o - spam_cutoff, ex. cutoff=%o # # p - spamicity value # d - if ham or unsure, the spamicity # if spam, difference of spamicity from 1.0 # # r - runtype # w - word count # m - message count # # u - username - this will either be the login from getlogin(), # if that is empty, the pw_name obtained from # the password database, or the user id # prefixed by #, for instance, #1003 # # v - version # # customizable messages: # # header_format - the "X-Bogosity" line that '-p' adds to # the message header and '-v' outputs. # terse_format - an abbreviated form of header_format; # selected by command line option '-t' # log_header_format - written to syslog by '-u' option # when classifying messages. # log_update_format - written to syslog by '-u' option # when registering messages. # # header_format = %h: %c, tests=bogofilter, spamicity=%p, version=%v terse_format = %1.1c %f #log_header_format = %h: %c, spamicity=%p, version=%v #log_update_format = register-%r, %w words, %m messages ##log_header_format = %h: %c, spamicity=%f, ipaddr=%A, queueID=%Q, msgID=%I, version=%v #### TERSE # # if enabled, format the X-Bogosity using the 'terse_format' specificaton. # terse=no # default ##terse=yes # (alternate) ########### Tokenizer Settings ###################################### #### BLOCK ON SUBNETS # # convert IPADDRs into a special token, url:1.2.3.4, # and also return url:1.2.3, url:1.2, and url:1 # to allow identifying spammers by ip address / subnets. # #block_on_subnets=no #### CHARSET handling # # specify default charset # charset_default=iso-8859-1 # default #charset_default=us-ascii # (alternate) ##charset_default=cp866 # for Russian #### REPLACE_NONASCII_CHARACTERS # # replace non-7bit chars with '?' # #replace_nonascii_characters=N # default ##replace_nonascii_characters=Y # (alternate) #### UNICODE handling # # boolean indicating whether raw storage (no) or unicode (yes) # is the default encoding for the wordlist # #unicode=yes # default ##unicode=no # (alternate) #### lexer parameters # # minimum and maximum lengths for single tokens # #min-token-len=3 # default #max-token-len=30 # default # # count and length for multi-word tokens # Note: if length not specified, defaults to # multi-token-count * max-token-len (approx) # #multi-token-count=1 # default #max-multi-token-len=0 # default ########### Classification Constants Settings ####################### # # See man page for a more detailled description of the parameters. #### MINIMUM DEVIATION # # if token spamicity closer to EVEN_ODDS (0.5) # than MIN_DEV, don't use the word in the # spamicity calculation # #min_dev=0.375 # default #### Robinson Constants # # floating point values for # Robinson S and X coefficients. # #robs=0.0178 # default #robx=0.52 # default #### CUTOFF Values # # both ham_cutoff and spam_cutoff are allowed. # setting ham_cutoff to a non-zero value will # enable tri-state results (Spam/Ham/Unsure). # #ham_cutoff = 0.45 # default #spam_cutoff= 0.99 # default # # for two-state classification: # ##ham_cutoff = 0.00 # default ##spam_cutoff = 0.99 # default #### Effective Size Factor Values # #ns_esf = 1.000 # default #sp_esf = 1.000 # default #### Auto-update threshold # # Skip autoupdating if the spamicity is within this value # of 0.000000 (surely ham) or 1.000000 (surely spam). # ## thresh_update=0.01 # (optional) #### token count parameters # # coerce the number of tokens used to score a message # Note: zero means no coercing # ##token_count=0 # default ##token_count_min=0 # default ##token_count_max=0 # default