loadplugin FuzzyOcr FuzzyOcr.pm body FUZZY_OCR eval:fuzzyocr_check() describe FUZZY_OCR Mail contains an image with common spam text inside body FUZZY_OCR_WRONG_CTYPE eval:dummy_check() describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set body FUZZY_OCR_CORRUPT_IMG eval:dummy_check() describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image body FUZZY_OCR_KNOWN_HASH eval:dummy_check() describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash priority FUZZY_OCR 900 ########### Plugin Configuration ############# #### Logging options ##### # Verbosity level (see manual) Attention: Don't set to 0, but to 0.0 for quiet operation. (Default value: 1) #focr_verbose 2 # # Logfile (make sure it is writable by the plugin) (Default value: /etc/mail/spamassassin/FuzzyOcr.log) #focr_logfile /etc/mail/spamassassin/FuzzyOcr.log ########################## ##### Wordlists ##### # Here we defined the words to scan for (Default value: /etc/mail/spamassassin/FuzzyOcr.words) #focr_global_wordlist /etc/mail/spamassassin/FuzzyOcr.words # # This is the path RELATIVE to the respektive home directory for the personalized list # This list is merged with the global word list on execution (Default value: .spamassassin/fuzzyocr.words) # If focr_personal_wordlist begins with '/', treats option as fixed path and does not search HOME #focr_personal_wordlist .spamassassin/fuzzyocr.words ##################### # These parameters can be used to change other detection settings # If you leave these commented out, the defaults will be used. # Do not use " " around any parameters! # ##### Location of helper applications (path + binary) (Default values: /usr/bin/) ##### #focr_bin_giffix /usr/bin/giffix #focr_bin_giftext /usr/bin/giftext #focr_bin_gifinter /usr/bin/gifinter #focr_bin_giftopnm /usr/bin/giftopnm #focr_bin_jpegtopnm /usr/bin/jpegtopnm #focr_bin_pngtopnm /usr/bin/pngtopnm #focr_bin_bmptopnm /usr/bin/bmptopnm #focr_bin_ppmhist /usr/bin/ppmhist #focr_bin_gocr /usr/bin/gocr # #focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin # ############################################################################################ ##### Scansets, comma seperated (Default value: $gocr -i -, $gocr -l 180 -d 2 -i -) ##### # Each scanset consists of one or more commands which make text out of pnm input. # Each scanset is run seperately on the PNM data, results are combined in scoring. focr_scansets $gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile, $gocr -l 140 -d 2 -i $pfile # # To use only one scan with default values, uncomment the next line instead #focr_scansets $gocr -i $pfile # # Some example for more advanced sets # Thisone uses the first the standard scan, then a scanset which first reduces the image to 3 colors and then scans it with custom settings # and then it scans again only with these custom settings # NOTE: This is for advanced users only, if you have questions how to use this, ask on the ML or on IRC #focr_scansets $gocr -i $pfile, pnmnorm $pfile 2>$efile | pnmquant 3 2>>$efile | pnmnorm 2>>$efile | $gocr -l 180 -d 2 -i -, $gocr -l 180 -d 2 -i $pfile ######################################################################################### ##### Various Score/Scan settings ##### # Timeout for the plugin, in seconds. (Maximum runtime of the plugin) (Default value: 10) #focr_timeout 15 # # Default detection treshold (see manual) (Default value: 0.3) (Can be changed on a per word basis in the wordlist). #focr_threshold 0.3 # # This is the score for a hit after focr_counts_required matches #focr_base_score 5 # # This is the additional score for every additional match after focr_counts_required matches (Default value: 1) #focr_add_score 0.375 # # This is the score to give for a wrong content-type (e.g. JPEG image but content type says GIF) (Default value: 1.5) #focr_wrongctype_score 1.5 # # This is the score to give for a corrupted image (This currently affects only GIF images) (Default value: 2.5) #focr_corrupt_score 2.5 # # This is the score to give for a corrupted unfixable image (This currently affects only GIF images) (Default value: 5) #focr_corrupt_unfixable_score 5 # # This is used to disable the OCR engine if the message has already more points than this value (Default value: 10) #focr_autodisable_score 20 # # Number of minimum matches before the rule scores (Default value: 2) #focr_counts_required 3 # # Specifies, how many frames an animated gif must contain, so the second (less resource consuming) animated gif test is used. (Default value: 5) #focr_gif_max_frames 5 # # For animated-gif images: # Specifies the threshold for the delay of a frame in an animated-gif image (Default value: 100) # + If the delay is greater than threshold, the frame is considered otherwise, the frame is skipped. #focr_anim_delay 100 # # Specifies the number of frames to consider (Default value: 2) # + Keeps at least the largest N frames. #focr_anim_max_frames 2 # ####################################### ##### Image Hash Database settings (Experimental, disabled by default) ##### # # Set this to 1 to enable the Image Hash database feature (Default value: 0.0) # Value = 1 ... use digest_hash only # Value = 2 ... use digest_db w/digest_hash import #focr_enable_image_hashing 2 # # The score is saved with the hash in the database, so no extra scoring for a db hit is required. # # If the image hash database feature is enabled, specify the file here to use as database # (Default value: /etc/mail/spamassassin/FuzzyOcr.hashdb) #focr_digest_db /etc/mail/spamassassin/FuzzyOcr.hashdb # # If the image hash db feature is enabled, specify the file here to use as database # (Default value: /etc/mail/spamassassin/FuzzyOcr.db) #focr_db_hash /etc/mail/spamassassin/FuzzyOcr.db # # If the image hash db feature is enabled, specify the file here to use as database # (Default value: /etc/mail/spamassassin/FuzzyOcr.safe.db) #focr_db_safe /etc/mail/spamassassin/FuzzyOcr.safe.db # # Expire records from focr_digest_db after (Default: 35) days #focr_db_max_days 15 # # Automatically add hashes of spam images recognized by OCR to the Image Hash database, to disable, set to 0.0 (Default value: 1) #focr_hashing_learn_scanned 1 # # Keep files that generate errors # 0 = always cleanup # 1 = keep only if error # 2 = always keep #focr_keep_bad_images 1 # # Score images who's global word count is below focr_counts_required using focr_add_score * word count as score. #focr_score_ham 1 ######################################################################