Improved spam checking and fix error in spooling.
This commit is contained in:
parent
d985138dd1
commit
8ecdbdae6f
|
@ -443,8 +443,8 @@ function message_post($subject, $from, $newsgroups, $ref, $body, $encryptthis =
|
|||
/*
|
||||
* SPAM CHECK
|
||||
*/
|
||||
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) {
|
||||
$spam_result_array = check_spam($subject, $from, $newsgroups, $ref, $body, $msgid);
|
||||
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($CONFIG['enable_nntp'] != true)) {
|
||||
$spam_result_array = check_spam($subject, $from, $newsgroups, $ref, $body, $msgid, true);
|
||||
$res = $spam_result_array['res'];
|
||||
$spamresult = $spam_result_array['spamresult'];
|
||||
$spamcheckerversion = $spam_result_array['spamcheckerversion'];
|
||||
|
|
|
@ -1305,13 +1305,39 @@ function is_multibyte($s)
|
|||
return mb_strlen($s, 'utf-8') < strlen($s);
|
||||
}
|
||||
|
||||
function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid)
|
||||
function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid, $useheaders = false)
|
||||
{
|
||||
global $msgid_generate, $msgid_fqdn, $spooldir, $logdir;
|
||||
global $CONFIG;
|
||||
$spamdir = $spooldir . '/spam';
|
||||
if (! is_dir($spamdir)) {
|
||||
mkdir($spamdir);
|
||||
}
|
||||
$logfile = $logdir . '/spam.log';
|
||||
$spamfile = tempnam($spooldir, 'spam-');
|
||||
file_put_contents($spamfile, $body);
|
||||
if ($useheaders) {
|
||||
// Add headers
|
||||
$head = '';
|
||||
if (trim($subject) != '') {
|
||||
$head .= 'Subject: ' . $subject . "\r\n";
|
||||
}
|
||||
if (trim($from) != '') {
|
||||
$head .= 'From: ' . $from . "\r\n";
|
||||
}
|
||||
if (trim($newsgroups) != '') {
|
||||
$head .= 'Newsgroups: ' . $newsgroups . "\r\n";
|
||||
}
|
||||
if (trim($ref) != '') {
|
||||
$head .= 'References: ' . $ref . "\r\n";
|
||||
}
|
||||
if (trim($msgid) != '') {
|
||||
$head .= 'Message-ID: ' . $msgid . "\r\n";
|
||||
}
|
||||
$message = $head . "\r\n" . $body;
|
||||
} else {
|
||||
$message = $body;
|
||||
}
|
||||
file_put_contents($spamfile, $message);
|
||||
$spamcommand = $CONFIG['spamc'] . ' -E < ' . $spamfile;
|
||||
ob_start();
|
||||
passthru($spamcommand, $res);
|
||||
|
@ -1337,9 +1363,10 @@ function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid)
|
|||
}
|
||||
unlink($spamfile);
|
||||
if ($res === 1) {
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $spamresult . "\n", FILE_APPEND);
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " identified spam: " . $from . " " . $newsgroups . " " . $msgid, FILE_APPEND);
|
||||
file_put_contents($spamdir . '/' . $msgid, $spamresult);
|
||||
} else {
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " Checked: " . $from . " " . $newsgroups . " " . $msgid . "\n------------\n", FILE_APPEND);
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " clean message: " . $from . " " . $newsgroups . " " . $msgid, FILE_APPEND);
|
||||
}
|
||||
return array(
|
||||
'res' => $res,
|
||||
|
@ -1898,6 +1925,43 @@ function get_next_article_number($group)
|
|||
return $local;
|
||||
}
|
||||
|
||||
function check_duplicate_msgid($msgid, $group)
|
||||
{
|
||||
global $spooldir, $logdir;
|
||||
|
||||
$found = false;
|
||||
|
||||
$database = $spooldir . '/articles-overview.db3';
|
||||
$table = 'overview';
|
||||
$dbh = overview_db_open($database, $table);
|
||||
$stmt = $dbh->prepare("SELECT * FROM $table WHERE msgid=:msgid AND newsgroup=:newsgroup");
|
||||
$stmt->bindParam(':msgid', $msgid);
|
||||
$stmt->bindParam(':newsgroup', $group);
|
||||
$stmt->execute();
|
||||
while ($row = $stmt->fetch()) {
|
||||
if($row['msgid'] == $msgid) {
|
||||
$found = true;
|
||||
}
|
||||
}
|
||||
$dbh = null;
|
||||
|
||||
$database = $spooldir . '/history.db3';
|
||||
$table = 'history';
|
||||
$dbh = history_db_open($database, $table);
|
||||
$stmt = $dbh->prepare("SELECT * FROM $table WHERE msgid=:msgid AND newsgroup=:newsgroup");
|
||||
$stmt->bindParam(':msgid', $msgid);
|
||||
$stmt->bindParam(':newsgroup', $group);
|
||||
$stmt->execute();
|
||||
while ($row = $stmt->fetch()) {
|
||||
if($row['msgid'] == $msgid) {
|
||||
$found = true;
|
||||
}
|
||||
}
|
||||
$dbh = null;
|
||||
|
||||
return $found;
|
||||
}
|
||||
|
||||
function insert_article_from_array($this_article, $check_duplicates = true)
|
||||
{
|
||||
global $CONFIG, $config_name, $spooldir, $logdir;
|
||||
|
@ -1906,47 +1970,24 @@ function insert_article_from_array($this_article, $check_duplicates = true)
|
|||
$grouppath = $path . preg_replace('/\./', '/', $group);
|
||||
|
||||
if ($check_duplicates) {
|
||||
// Create list of message-ids
|
||||
$database = $spooldir . '/articles-overview.db3';
|
||||
$table = 'overview';
|
||||
$dbh = overview_db_open($database, $table);
|
||||
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
|
||||
$stmt->bindParam(':newsgroup', $group);
|
||||
$stmt->execute();
|
||||
while ($row = $stmt->fetch()) {
|
||||
$msgids[$row['msgid']] = true;
|
||||
}
|
||||
$dbh = null;
|
||||
|
||||
// Check history database for deleted message-ids
|
||||
$database = $spooldir . '/history.db3';
|
||||
$table = 'history';
|
||||
$dbh = history_db_open($database, $table);
|
||||
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
|
||||
$stmt->bindParam(':newsgroup', $group);
|
||||
$stmt->execute();
|
||||
while ($row = $stmt->fetch()) {
|
||||
$msgids[$row['msgid']] = true;
|
||||
}
|
||||
$dbh = null;
|
||||
|
||||
if ($msgids[$this_article['mid']] == true) {
|
||||
echo "\nDuplicate Message-ID for: " . $group . ":" . $this_article['local'];
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $this_article['local'], FILE_APPEND);
|
||||
if (check_duplicate_msgid($this_article['mid'], $group)) {
|
||||
echo "\n(newsportal)Duplicate Message-ID for: " . $group . ":" . $this_article['mid'];
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $this_article['mid'], FILE_APPEND);
|
||||
return "441 Insert failed (duplicate)\r\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Open articles Database
|
||||
if ($CONFIG['article_database'] == '1') {
|
||||
$article_dbh = article_db_open($spooldir . '/' . $group . '-articles.db3');
|
||||
$article_sql = 'INSERT OR IGNORE INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)';
|
||||
$article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)';
|
||||
$article_stmt = $article_dbh->prepare($article_sql);
|
||||
}
|
||||
// Open overview database
|
||||
$database = $spooldir . '/articles-overview.db3';
|
||||
$table = 'overview';
|
||||
$overview_dbh = overview_db_open($database, $table);
|
||||
$overview_sql = 'INSERT OR IGNORE INTO overview(newsgroup, number, msgid, date, datestring, name, subject, refs, bytes, lines, xref) VALUES(?,?,?,?,?,?,?,?,?,?,?)';
|
||||
$overview_sql = 'INSERT INTO overview(newsgroup, number, msgid, date, datestring, name, subject, refs, bytes, lines, xref) VALUES(?,?,?,?,?,?,?,?,?,?,?)';
|
||||
$overview_stmt = $overview_dbh->prepare($overview_sql);
|
||||
|
||||
// Overview
|
||||
|
@ -1985,7 +2026,7 @@ function insert_article_from_array($this_article, $check_duplicates = true)
|
|||
touch($grouppath . "/" . $this_article['local'], $article_date);
|
||||
}
|
||||
|
||||
echo "\nRetrieved: " . $group . " " . $this_article['local'];
|
||||
echo "\nSpooling: " . $group . " " . $this_article['local'];
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Spooling: " . $group . ":" . $this_article['local'], FILE_APPEND);
|
||||
$status = "spooled";
|
||||
$statusdate = time();
|
||||
|
|
|
@ -395,7 +395,7 @@ function process_post($message, $group)
|
|||
* SPAM CHECK
|
||||
*/
|
||||
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) {
|
||||
$spam_result_array = check_spam($subject[1], $from[1], $newsgroups, $references, $body, $msgid);
|
||||
$spam_result_array = check_spam($subject[1], $from[1], $newsgroups, $references, $body, $msgid, true);
|
||||
$res = $spam_result_array['res'];
|
||||
$spamresult = $spam_result_array['spamresult'];
|
||||
$spamcheckerversion = $spam_result_array['spamcheckerversion'];
|
||||
|
|
|
@ -149,7 +149,7 @@ if ($CONFIG['remote_server'] != '') {
|
|||
if ($ns2) {
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Updating threads for: " . $name[0] . "...", FILE_APPEND);
|
||||
echo 'Use ns2: ' . $ns2 . "\n";
|
||||
thread_load_newsserver($ns2, $name[0], 0);
|
||||
// thread_load_newsserver($ns2, $name[0], 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -216,31 +216,6 @@ function get_articles($ns, $group)
|
|||
if ($article < $detail[2]) {
|
||||
$article = $detail[2];
|
||||
}
|
||||
|
||||
// Create list of message-ids
|
||||
$database = $spooldir . '/articles-overview.db3';
|
||||
$table = 'overview';
|
||||
$dbh = overview_db_open($database, $table);
|
||||
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
|
||||
$stmt->bindParam(':newsgroup', $group);
|
||||
$stmt->execute();
|
||||
while ($row = $stmt->fetch()) {
|
||||
$msgids[$row['msgid']] = true;
|
||||
}
|
||||
$dbh = null;
|
||||
|
||||
// Check history database for deleted message-ids
|
||||
$database = $spooldir . '/history.db3';
|
||||
$table = 'history';
|
||||
$dbh = history_db_open($database, $table);
|
||||
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
|
||||
$stmt->bindParam(':newsgroup', $group);
|
||||
$stmt->execute();
|
||||
while ($row = $stmt->fetch()) {
|
||||
$msgids[$row['msgid']] = true;
|
||||
}
|
||||
$dbh = null;
|
||||
|
||||
// Get overview from server
|
||||
$server_overview = array();
|
||||
$re = false;
|
||||
|
@ -277,9 +252,9 @@ function get_articles($ns, $group)
|
|||
if ($CONFIG['enable_nntp'] != true) {
|
||||
$local = $article;
|
||||
}
|
||||
if ($msgids[$overview_msgid[$article]] == true) {
|
||||
echo "\nDuplicate Message-ID for: " . $group . ":" . $local;
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $article, FILE_APPEND);
|
||||
if (check_duplicate_msgid($overview_msgid[$article], $group)) {
|
||||
echo "\n(spoolnews)Duplicate Message-ID for: " . $group . ":" . $overview_msgid[$article];
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $overview_msgid[$article], FILE_APPEND);
|
||||
$article ++;
|
||||
continue;
|
||||
}
|
||||
|
@ -312,6 +287,9 @@ function get_articles($ns, $group)
|
|||
}
|
||||
if ($is_header == 1) {
|
||||
$response = str_replace("\t", " ", $response);
|
||||
if (strpos($response, ': ') !== false) {
|
||||
$ref = 0;
|
||||
}
|
||||
// Find article date
|
||||
if (stripos($response, "Date: ") === 0) {
|
||||
$finddate = explode(': ', $response, 2);
|
||||
|
@ -323,28 +301,24 @@ function get_articles($ns, $group)
|
|||
if (preg_match($msgid_filter, $mid[1])) {
|
||||
$banned = "msgid_filter";
|
||||
}
|
||||
$ref = 0;
|
||||
}
|
||||
if (stripos($response, "From: ") === 0) {
|
||||
$from = explode(': ', $response, 2);
|
||||
if (preg_match($from_filter, $from[1])) {
|
||||
$banned = "from_filter";
|
||||
}
|
||||
$ref = 0;
|
||||
}
|
||||
if (stripos($response, "Path: ") === 0) {
|
||||
$msgpath = explode(': ', $response, 2);
|
||||
if (preg_match($path_filter, $msgpath[1])) {
|
||||
$banned = "path_filter";
|
||||
}
|
||||
$ref = 0;
|
||||
}
|
||||
if (stripos($response, "Subject: ") === 0) {
|
||||
$subject = explode('Subject: ', $response, 2);
|
||||
if (preg_match($subject_filter, $subject[1])) {
|
||||
$banned = "subject_filter";
|
||||
}
|
||||
$ref = 0;
|
||||
}
|
||||
if (stripos($response, "Newsgroups: ") === 0) {
|
||||
$response = str_ireplace($group, $group, $response);
|
||||
|
@ -367,14 +341,12 @@ function get_articles($ns, $group)
|
|||
$current_article['xref'] .= ' ' . $agroup . ':' . $artnum;
|
||||
}
|
||||
}
|
||||
$ref = 0;
|
||||
}
|
||||
if (stripos($response, "Xref: ") === 0) {
|
||||
if (isset($CONFIG['enable_nntp']) && $CONFIG['enable_nntp'] == true) {
|
||||
$is_xref = true;
|
||||
}
|
||||
$xref = $response;
|
||||
$ref = 0;
|
||||
}
|
||||
if (stripos($response, "Content-Type: ") === 0) {
|
||||
preg_match('/.*charset=.*/', $response, $te);
|
||||
|
@ -385,10 +357,8 @@ function get_articles($ns, $group)
|
|||
$references = $this_references[1];
|
||||
$ref = 1;
|
||||
}
|
||||
if ((stripos($response, ':') === false) && (strpos($response, '>'))) {
|
||||
if ($ref == 1) {
|
||||
$references = $references . $response;
|
||||
}
|
||||
if (preg_match('/^\s/', $response) && $ref == 1) {
|
||||
$references = $references . $response;
|
||||
}
|
||||
} else {
|
||||
$body .= $response . "\n";
|
||||
|
@ -400,7 +370,7 @@ function get_articles($ns, $group)
|
|||
$response = fgets($ns, 1200);
|
||||
if ($response == false) {
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Lost connection to " . $CONFIG['remote_server'] . ":" . $CONFIG['remote_port'] . " retrieving article " . $article, FILE_APPEND);
|
||||
unlink($grouppath . "/" . $local);
|
||||
unlink($articleHandle);
|
||||
break;
|
||||
// continue;
|
||||
}
|
||||
|
@ -412,7 +382,7 @@ function get_articles($ns, $group)
|
|||
$bytes = $bytes + ($lines * 2);
|
||||
// Don't spool article if $banned != 0
|
||||
if ($banned != false) {
|
||||
unlink($grouppath . "/" . $local);
|
||||
unlink($articleHandle);
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " banned in " . $banned, FILE_APPEND);
|
||||
$article ++;
|
||||
} else {
|
||||
|
@ -420,11 +390,11 @@ function get_articles($ns, $group)
|
|||
if (strpos($subject[1], $nocem_check) !== false) {
|
||||
$is_from = address_decode($from[1], 'nowhere');
|
||||
$nocem_file = tempnam($spooldir . "/nocem", $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]");
|
||||
copy($grouppath . "/" . $local, $nocem_file);
|
||||
copy($articleHandle, $nocem_file);
|
||||
chmod($nocem_file, 0644);
|
||||
if ($save_nocem_messages == true) {
|
||||
$saved_nocem_file = tempnam($nocem_dir, $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]-");
|
||||
copy($grouppath . "/" . $local, $saved_nocem_file);
|
||||
copy(articleHandle, $saved_nocem_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -432,20 +402,20 @@ function get_articles($ns, $group)
|
|||
if (strpos($subject[1], $bbsmail_check) !== false) {
|
||||
$bbsmail_file = preg_replace('/@@RSL /', '', $subject[1]);
|
||||
$bbsmail_filename = $spooldir . "/bbsmail/in/bbsmail-" . $bbsmail_file;
|
||||
copy($grouppath . "/" . $local, $bbsmail_filename);
|
||||
copy($articleHandle, $bbsmail_filename);
|
||||
}
|
||||
}
|
||||
$this_article = file_get_contents($articleHandle);
|
||||
if ($CONFIG['article_database'] == '1') {
|
||||
$this_article = file_get_contents($grouppath . "/" . $local);
|
||||
unlink($articleHandle);
|
||||
// CREATE SEARCH SNIPPET
|
||||
$this_snippet = get_search_snippet($body, $content_type[1]);
|
||||
} else {
|
||||
if ($article_date > time()) {
|
||||
$article_date = time();
|
||||
}
|
||||
touch($grouppath . "/" . $local, $article_date);
|
||||
touch($articleHandle, $article_date);
|
||||
}
|
||||
|
||||
$current_article['mid'] = $mid[1];
|
||||
$current_article['epochdate'] = $article_date;
|
||||
$current_article['stringdate'] = $finddate[1];
|
||||
|
@ -460,14 +430,14 @@ function get_articles($ns, $group)
|
|||
// Check Spam
|
||||
$res = 0;
|
||||
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($OVERRIDES['disable_spamassassin_spooling'] !== true)) {
|
||||
$spam_result_array = check_spam($subject[1], $from[1], $groupnames[1], $references, $body, $mid[1]);
|
||||
$spam_result_array = check_spam($subject[1], $from[1], $groupnames[1], $references, $this_article, $mid[1]);
|
||||
$res = $spam_result_array['res'];
|
||||
$spamresult = $spam_result_array['spamresult'];
|
||||
$spamcheckerversion = $spam_result_array['spamcheckerversion'];
|
||||
$spamlevel = $spam_result_array['spamlevel'];
|
||||
}
|
||||
if ($res === 1) {
|
||||
unlink($grouppath . "/" . $local);
|
||||
unlink($articleHandle);
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " Exceeds Spam Score", FILE_APPEND);
|
||||
// $orig_newsgroups = $newsgroups;
|
||||
// $newsgroups = $CONFIG['spamgroup'];
|
||||
|
@ -475,6 +445,7 @@ function get_articles($ns, $group)
|
|||
$i --;
|
||||
$local --;
|
||||
} else {
|
||||
$pass = false;
|
||||
foreach ($allgroups as $agroup) {
|
||||
$agroup = trim($agroup);
|
||||
if ((! testGroup($agroup)) || $agroup == '') {
|
||||
|
@ -483,11 +454,16 @@ function get_articles($ns, $group)
|
|||
$current_article['group'] = $agroup;
|
||||
if ($group == $agroup) {
|
||||
$current_article['local'] = $local;
|
||||
insert_article_from_array($current_article, false);
|
||||
} else {
|
||||
$current_article['local'] = get_next_article_number($agroup);
|
||||
insert_article_from_array($current_article, false);
|
||||
}
|
||||
$tmp = insert_article_from_array($current_article, true);
|
||||
if ($tmp[0] != "4") {
|
||||
$pass = true;
|
||||
}
|
||||
}
|
||||
if (! $pass) {
|
||||
$i --;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue