Only in FuzzyOcr-2.3e: CHANGES diff -rbc FuzzyOcr-2.3d/FuzzyOcr.cf FuzzyOcr-2.3e/FuzzyOcr.cf *** FuzzyOcr-2.3d/FuzzyOcr.cf Fri Sep 8 10:43:45 2006 --- FuzzyOcr-2.3e/FuzzyOcr.cf Tue Sep 12 08:28:36 2006 *************** *** 55,70 **** ##### Scansets, comma seperated (Default value: $gocr -i -, $gocr -l 180 -d 2 -i -) ##### # Each scanset consists of one or more commands which make text out of pnm input. # Each scanset is run seperately on the PNM data, results are combined in scoring. ! #focr_scansets $gocr -i -, $gocr -l 180 -d 2 -i - # # To use only one scan with default values, uncomment the next line instead ! #focr_scansets $gocr -i - # # Some example for more advanced sets ! # Thisone uses the first the standard scan, then a scanset which first reduces the image to 3 colors and then scans it with custom settings ! # and then it scans again only with these custom settings # NOTE: This is for advanced users only, if you have questions how to use this, ask on the ML or on IRC ! #focr_scansets $gocr -i -, pnmnorm 2>$errfile | pnmquant 3 2>>$errfile | pnmnorm 2>>$errfile | $gocr -l 180 -d 2 -i -, $gocr -l 180 -d 2 -i - ######################################################################################### ##### Various Score/Scan settings ##### --- 55,71 ---- ##### Scansets, comma seperated (Default value: $gocr -i -, $gocr -l 180 -d 2 -i -) ##### # Each scanset consists of one or more commands which make text out of pnm input. # Each scanset is run seperately on the PNM data, results are combined in scoring. ! #focr_scansets $gocr -i $pfile, $gocr -l 180 -d 2 -i $pfile # # To use only one scan with default values, uncomment the next line instead ! #focr_scansets $gocr -i $pfile # # Some example for more advanced sets ! # This one uses the first the standard scan, then a scanset which first reduces the image ! # to 3 colors and then scans it with custom settings and then it scans again only with ! # these custom settings # NOTE: This is for advanced users only, if you have questions how to use this, ask on the ML or on IRC ! #focr_scansets $gocr -i $pfile, pnmnorm $pfile 2>$efile| pnmquant 3 2>>$efile | pnmnorm 2>>$efile | $gocr -l 180 -d 2 -i -, $gocr -l 180 -d 2 -i $pfile ######################################################################################### ##### Various Score/Scan settings ##### diff -rbc FuzzyOcr-2.3d/FuzzyOcr.pm FuzzyOcr-2.3e/FuzzyOcr.pm *** FuzzyOcr-2.3d/FuzzyOcr.pm Mon Sep 11 11:08:03 2006 --- FuzzyOcr-2.3e/FuzzyOcr.pm Tue Sep 12 08:28:48 2006 *************** *** 90,96 **** our @pgm_opts = qw/personal_wordlist global_wordlist logfile threshold counts_required verbose timeout gif_max_frames ! db_hash db_max_days path_bin scansets keep_bad_images enable_image_hashing digest_db hashing_learn_scanned/; our @paths = qw(/usr/local/netpbm/bin /usr/local/bin /usr/bin); --- 90,96 ---- our @pgm_opts = qw/personal_wordlist global_wordlist logfile threshold counts_required verbose timeout gif_max_frames ! db_safe db_hash db_max_days path_bin scansets keep_bad_images enable_image_hashing digest_db hashing_learn_scanned/; our @paths = qw(/usr/local/netpbm/bin /usr/local/bin /usr/bin); *************** *** 358,477 **** else { return $_[0] } } - sub reorder { - my $tmp = join( '', @_ ); - return split( '\n', $tmp ); - } - - sub pipe_io { - $SIG{PIPE} = 'IGNORE'; - my $pipecmd = shift; - my $input = shift; - my $filecount = 0; - my $silent = 0; - my $ignerror = 0; - my $tmpdir; - my @stdout = (); - my @stderr = (); - my ( $tmpfile, $tfilepath ) = Mail::SpamAssassin::Util::secure_tmpfile(); - my ( $errfile, $efilepath ) = Mail::SpamAssassin::Util::secure_tmpfile(); - close($tmpfile); - close($errfile); - if ($tmpfile eq $errfile) { - debuglog("Got same tmpfile twice! Aborting pipe_io() to avoid deadlocking"); - return ( 1, \@stdout, \@stderr ); - unlink($tmpfile); - } - - if($pipecmd =~ /\$tmpdir/) { - $tmpdir = Mail::SpamAssassin::Util::secure_tmpdir(); - $pipecmd =~ s/\$tmpdir/$tmpdir/g; - $filecount = shift; - } else { - $silent = shift; - $ignerror = shift; - } - - $pipecmd =~ s/\$errfile/$errfile/g; - my $pipe_pid = open( PIPE_IN, "| $pipecmd 1>$tmpfile 2>>$errfile" ); - - unless ($pipe_pid) { - unless($silent) { - handle_error( $err_msges[0], ( $pipecmd, $? >> 8, $!, $tmpfile ) ); - } - unlink($tmpfile); - unlink($errfile); - return ( $?, \@stdout, \@stderr ); - } - flock( PIPE_IN, LOCK_EX ); - print PIPE_IN $input; - flock( PIPE_IN, LOCK_UN ); - close(PIPE_IN); - if ($? and not $ignerror) { - unless($silent) { - handle_error( $err_msges[1], ( $pipecmd, $? >> 8, $!, $tmpfile ) ); - } - unlink($tmpfile); - unlink($errfile); - return ( $?, \@stdout, \@stderr ); - } - if ($filecount) { - my $tsize = 0; - my $tcount = 0; - foreach my $nr (0..$filecount-1) { - my $filesize = 0; - if ($nr < 10) { - $filesize = -s "$tmpdir/out0$nr.gif"; - } else { - $filesize = -s "$tmpdir/out$nr.gif"; - } - if ($filesize > $tsize) { - $tsize = $filesize; - $tcount = $nr; - } - } - if ($tcount < 10) { - open( PIPE_OUT, "< $tmpdir/out0$tcount.gif" ); - } else { - open( PIPE_OUT, "< $tmpdir/out$tcount.gif" ); - } - flock( PIPE_OUT, LOCK_EX ); - @stdout = ; - flock( PIPE_OUT, LOCK_UN ); - close PIPE_OUT; - foreach my $nr (0..$filecount) { - if ($nr < 10) { - unlink("$tmpdir/out0$nr.gif"); - } else { - unlink("$tmpdir/out$nr.gif"); - } - } - rmdir($tmpdir); - } else { - unless (open( PIPE_OUT, "< $tmpfile" ) - and open( PIPE_ERR, "< $errfile" ) ) - { - unless($silent) { - handle_error( $err_msges[1], ( $pipecmd, $? >> 8, $!, $tmpfile ) ); - } - unlink($tmpfile); - unlink($errfile); - return ( $?, \@stdout, \@stderr ); - } - flock( PIPE_OUT, LOCK_EX ); - flock( PIPE_ERR, LOCK_EX ); - @stdout = ; - @stderr = ; - flock( PIPE_OUT, LOCK_UN ); - flock( PIPE_ERR, LOCK_UN ); - close(PIPE_OUT); - close(PIPE_ERR); - } - unlink($tmpfile) if (-e $tmpfile); - unlink($errfile) if (-e $errfile); - return ( 0, \@stdout, \@stderr ); - } - sub handle_error { my ( $err_msg, @var_vals ) = @_; debuglog(sprintf( $err_msg, @var_vals )); --- 358,363 ---- *************** *** 731,737 **** sub wrong_ctype { my ( $format, $ctype ) = @_; ! if ($Score{wctypescore}) { my $debuginfo = ""; if ( $Option{verbose} > 0 ) { $debuginfo = --- 617,623 ---- sub wrong_ctype { my ( $format, $ctype ) = @_; ! if ($Score{wrongctype}) { my $debuginfo = ""; if ( $Option{verbose} > 0 ) { $debuginfo = *************** *** 740,748 **** } for my $set ( 0 .. 3 ) { $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_CTYPE"} = ! sprintf( "%0.3f", $Score{wctypescore} ); } ! $pms->_handle_hit( "FUZZY_OCR_WRONG_CTYPE", $Score{wctypescore}, "BODY: ", $pms->{conf}->{descriptions}->{FUZZY_OCR_WRONG_CTYPE} . "\n$debuginfo" ); } } --- 626,634 ---- } for my $set ( 0 .. 3 ) { $pms->{conf}->{scoreset}->[$set]->{"FUZZY_OCR_WRONG_CTYPE"} = ! sprintf( "%0.3f", $Score{wrongctype} ); } ! $pms->_handle_hit( "FUZZY_OCR_WRONG_CTYPE", $Score{wrongctype}, "BODY: ", $pms->{conf}->{descriptions}->{FUZZY_OCR_WRONG_CTYPE} . "\n$debuginfo" ); } } *************** *** 885,891 **** my $ptype = 0; my $tfile = $file; my $pfile = $file . ".pnm"; ! my $efile = $file . ".stderr"; if ( substr($$pic{header},0,3) eq "\x47\x49\x46" ) { debuglog("Found GIF header name=\"$$pic{fname}\""); --- 771,777 ---- my $ptype = 0; my $tfile = $file; my $pfile = $file . ".pnm"; ! my $efile = $file . ".err"; if ( substr($$pic{header},0,3) eq "\x47\x49\x46" ) { debuglog("Found GIF header name=\"$$pic{fname}\""); *************** *** 1142,1151 **** my $scan = $scanset; $scan =~ s/\$gocr/$App{gocr}/; $scan =~ s/\$pfile/$pfile/; debuglog("Trying: $scanset"); my @ocrdata; $retcode = $t->run_and_catch(sub { ! @ocrdata = qx($scan 2>$efile); }); if ($retcode) { chomp $retcode; --- 1028,1039 ---- my $scan = $scanset; $scan =~ s/\$gocr/$App{gocr}/; $scan =~ s/\$pfile/$pfile/; + $scan =~ s/\$efile/$efile/; + unlink $efile if -e $efile; debuglog("Trying: $scanset"); my @ocrdata; $retcode = $t->run_and_catch(sub { ! @ocrdata = qx($scan 2>>$efile); }); if ($retcode) { chomp $retcode;