Improved spam checking and fix error in spooling.

This commit is contained in:
Retro_Guy 2023-12-26 09:05:40 -07:00
parent d985138dd1
commit 8ecdbdae6f
4 changed files with 105 additions and 88 deletions

View File

@ -443,8 +443,8 @@ function message_post($subject, $from, $newsgroups, $ref, $body, $encryptthis =
/* /*
* SPAM CHECK * SPAM CHECK
*/ */
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) { if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($CONFIG['enable_nntp'] != true)) {
$spam_result_array = check_spam($subject, $from, $newsgroups, $ref, $body, $msgid); $spam_result_array = check_spam($subject, $from, $newsgroups, $ref, $body, $msgid, true);
$res = $spam_result_array['res']; $res = $spam_result_array['res'];
$spamresult = $spam_result_array['spamresult']; $spamresult = $spam_result_array['spamresult'];
$spamcheckerversion = $spam_result_array['spamcheckerversion']; $spamcheckerversion = $spam_result_array['spamcheckerversion'];

View File

@ -1305,13 +1305,39 @@ function is_multibyte($s)
return mb_strlen($s, 'utf-8') < strlen($s); return mb_strlen($s, 'utf-8') < strlen($s);
} }
function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid) function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid, $useheaders = false)
{ {
global $msgid_generate, $msgid_fqdn, $spooldir, $logdir; global $msgid_generate, $msgid_fqdn, $spooldir, $logdir;
global $CONFIG; global $CONFIG;
$spamdir = $spooldir . '/spam';
if (! is_dir($spamdir)) {
mkdir($spamdir);
}
$logfile = $logdir . '/spam.log'; $logfile = $logdir . '/spam.log';
$spamfile = tempnam($spooldir, 'spam-'); $spamfile = tempnam($spooldir, 'spam-');
file_put_contents($spamfile, $body); if ($useheaders) {
// Add headers
$head = '';
if (trim($subject) != '') {
$head .= 'Subject: ' . $subject . "\r\n";
}
if (trim($from) != '') {
$head .= 'From: ' . $from . "\r\n";
}
if (trim($newsgroups) != '') {
$head .= 'Newsgroups: ' . $newsgroups . "\r\n";
}
if (trim($ref) != '') {
$head .= 'References: ' . $ref . "\r\n";
}
if (trim($msgid) != '') {
$head .= 'Message-ID: ' . $msgid . "\r\n";
}
$message = $head . "\r\n" . $body;
} else {
$message = $body;
}
file_put_contents($spamfile, $message);
$spamcommand = $CONFIG['spamc'] . ' -E < ' . $spamfile; $spamcommand = $CONFIG['spamc'] . ' -E < ' . $spamfile;
ob_start(); ob_start();
passthru($spamcommand, $res); passthru($spamcommand, $res);
@ -1337,9 +1363,10 @@ function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid)
} }
unlink($spamfile); unlink($spamfile);
if ($res === 1) { if ($res === 1) {
file_put_contents($logfile, "\n" . format_log_date() . " " . $spamresult . "\n", FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " identified spam: " . $from . " " . $newsgroups . " " . $msgid, FILE_APPEND);
file_put_contents($spamdir . '/' . $msgid, $spamresult);
} else { } else {
file_put_contents($logfile, "\n" . format_log_date() . " Checked: " . $from . " " . $newsgroups . " " . $msgid . "\n------------\n", FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " clean message: " . $from . " " . $newsgroups . " " . $msgid, FILE_APPEND);
} }
return array( return array(
'res' => $res, 'res' => $res,
@ -1898,6 +1925,43 @@ function get_next_article_number($group)
return $local; return $local;
} }
function check_duplicate_msgid($msgid, $group)
{
global $spooldir, $logdir;
$found = false;
$database = $spooldir . '/articles-overview.db3';
$table = 'overview';
$dbh = overview_db_open($database, $table);
$stmt = $dbh->prepare("SELECT * FROM $table WHERE msgid=:msgid AND newsgroup=:newsgroup");
$stmt->bindParam(':msgid', $msgid);
$stmt->bindParam(':newsgroup', $group);
$stmt->execute();
while ($row = $stmt->fetch()) {
if($row['msgid'] == $msgid) {
$found = true;
}
}
$dbh = null;
$database = $spooldir . '/history.db3';
$table = 'history';
$dbh = history_db_open($database, $table);
$stmt = $dbh->prepare("SELECT * FROM $table WHERE msgid=:msgid AND newsgroup=:newsgroup");
$stmt->bindParam(':msgid', $msgid);
$stmt->bindParam(':newsgroup', $group);
$stmt->execute();
while ($row = $stmt->fetch()) {
if($row['msgid'] == $msgid) {
$found = true;
}
}
$dbh = null;
return $found;
}
function insert_article_from_array($this_article, $check_duplicates = true) function insert_article_from_array($this_article, $check_duplicates = true)
{ {
global $CONFIG, $config_name, $spooldir, $logdir; global $CONFIG, $config_name, $spooldir, $logdir;
@ -1906,47 +1970,24 @@ function insert_article_from_array($this_article, $check_duplicates = true)
$grouppath = $path . preg_replace('/\./', '/', $group); $grouppath = $path . preg_replace('/\./', '/', $group);
if ($check_duplicates) { if ($check_duplicates) {
// Create list of message-ids if (check_duplicate_msgid($this_article['mid'], $group)) {
$database = $spooldir . '/articles-overview.db3'; echo "\n(newsportal)Duplicate Message-ID for: " . $group . ":" . $this_article['mid'];
$table = 'overview'; file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $this_article['mid'], FILE_APPEND);
$dbh = overview_db_open($database, $table);
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
$stmt->bindParam(':newsgroup', $group);
$stmt->execute();
while ($row = $stmt->fetch()) {
$msgids[$row['msgid']] = true;
}
$dbh = null;
// Check history database for deleted message-ids
$database = $spooldir . '/history.db3';
$table = 'history';
$dbh = history_db_open($database, $table);
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
$stmt->bindParam(':newsgroup', $group);
$stmt->execute();
while ($row = $stmt->fetch()) {
$msgids[$row['msgid']] = true;
}
$dbh = null;
if ($msgids[$this_article['mid']] == true) {
echo "\nDuplicate Message-ID for: " . $group . ":" . $this_article['local'];
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $this_article['local'], FILE_APPEND);
return "441 Insert failed (duplicate)\r\n"; return "441 Insert failed (duplicate)\r\n";
} }
} }
// Open articles Database // Open articles Database
if ($CONFIG['article_database'] == '1') { if ($CONFIG['article_database'] == '1') {
$article_dbh = article_db_open($spooldir . '/' . $group . '-articles.db3'); $article_dbh = article_db_open($spooldir . '/' . $group . '-articles.db3');
$article_sql = 'INSERT OR IGNORE INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)'; $article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)';
$article_stmt = $article_dbh->prepare($article_sql); $article_stmt = $article_dbh->prepare($article_sql);
} }
// Open overview database // Open overview database
$database = $spooldir . '/articles-overview.db3'; $database = $spooldir . '/articles-overview.db3';
$table = 'overview'; $table = 'overview';
$overview_dbh = overview_db_open($database, $table); $overview_dbh = overview_db_open($database, $table);
$overview_sql = 'INSERT OR IGNORE INTO overview(newsgroup, number, msgid, date, datestring, name, subject, refs, bytes, lines, xref) VALUES(?,?,?,?,?,?,?,?,?,?,?)'; $overview_sql = 'INSERT INTO overview(newsgroup, number, msgid, date, datestring, name, subject, refs, bytes, lines, xref) VALUES(?,?,?,?,?,?,?,?,?,?,?)';
$overview_stmt = $overview_dbh->prepare($overview_sql); $overview_stmt = $overview_dbh->prepare($overview_sql);
// Overview // Overview
@ -1985,7 +2026,7 @@ function insert_article_from_array($this_article, $check_duplicates = true)
touch($grouppath . "/" . $this_article['local'], $article_date); touch($grouppath . "/" . $this_article['local'], $article_date);
} }
echo "\nRetrieved: " . $group . " " . $this_article['local']; echo "\nSpooling: " . $group . " " . $this_article['local'];
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Spooling: " . $group . ":" . $this_article['local'], FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Spooling: " . $group . ":" . $this_article['local'], FILE_APPEND);
$status = "spooled"; $status = "spooled";
$statusdate = time(); $statusdate = time();

View File

@ -395,7 +395,7 @@ function process_post($message, $group)
* SPAM CHECK * SPAM CHECK
*/ */
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) { if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) {
$spam_result_array = check_spam($subject[1], $from[1], $newsgroups, $references, $body, $msgid); $spam_result_array = check_spam($subject[1], $from[1], $newsgroups, $references, $body, $msgid, true);
$res = $spam_result_array['res']; $res = $spam_result_array['res'];
$spamresult = $spam_result_array['spamresult']; $spamresult = $spam_result_array['spamresult'];
$spamcheckerversion = $spam_result_array['spamcheckerversion']; $spamcheckerversion = $spam_result_array['spamcheckerversion'];

View File

@ -149,7 +149,7 @@ if ($CONFIG['remote_server'] != '') {
if ($ns2) { if ($ns2) {
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Updating threads for: " . $name[0] . "...", FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Updating threads for: " . $name[0] . "...", FILE_APPEND);
echo 'Use ns2: ' . $ns2 . "\n"; echo 'Use ns2: ' . $ns2 . "\n";
thread_load_newsserver($ns2, $name[0], 0); // thread_load_newsserver($ns2, $name[0], 0);
} }
} }
} }
@ -216,31 +216,6 @@ function get_articles($ns, $group)
if ($article < $detail[2]) { if ($article < $detail[2]) {
$article = $detail[2]; $article = $detail[2];
} }
// Create list of message-ids
$database = $spooldir . '/articles-overview.db3';
$table = 'overview';
$dbh = overview_db_open($database, $table);
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
$stmt->bindParam(':newsgroup', $group);
$stmt->execute();
while ($row = $stmt->fetch()) {
$msgids[$row['msgid']] = true;
}
$dbh = null;
// Check history database for deleted message-ids
$database = $spooldir . '/history.db3';
$table = 'history';
$dbh = history_db_open($database, $table);
$stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup");
$stmt->bindParam(':newsgroup', $group);
$stmt->execute();
while ($row = $stmt->fetch()) {
$msgids[$row['msgid']] = true;
}
$dbh = null;
// Get overview from server // Get overview from server
$server_overview = array(); $server_overview = array();
$re = false; $re = false;
@ -277,9 +252,9 @@ function get_articles($ns, $group)
if ($CONFIG['enable_nntp'] != true) { if ($CONFIG['enable_nntp'] != true) {
$local = $article; $local = $article;
} }
if ($msgids[$overview_msgid[$article]] == true) { if (check_duplicate_msgid($overview_msgid[$article], $group)) {
echo "\nDuplicate Message-ID for: " . $group . ":" . $local; echo "\n(spoolnews)Duplicate Message-ID for: " . $group . ":" . $overview_msgid[$article];
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $article, FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $overview_msgid[$article], FILE_APPEND);
$article ++; $article ++;
continue; continue;
} }
@ -312,6 +287,9 @@ function get_articles($ns, $group)
} }
if ($is_header == 1) { if ($is_header == 1) {
$response = str_replace("\t", " ", $response); $response = str_replace("\t", " ", $response);
if (strpos($response, ': ') !== false) {
$ref = 0;
}
// Find article date // Find article date
if (stripos($response, "Date: ") === 0) { if (stripos($response, "Date: ") === 0) {
$finddate = explode(': ', $response, 2); $finddate = explode(': ', $response, 2);
@ -323,28 +301,24 @@ function get_articles($ns, $group)
if (preg_match($msgid_filter, $mid[1])) { if (preg_match($msgid_filter, $mid[1])) {
$banned = "msgid_filter"; $banned = "msgid_filter";
} }
$ref = 0;
} }
if (stripos($response, "From: ") === 0) { if (stripos($response, "From: ") === 0) {
$from = explode(': ', $response, 2); $from = explode(': ', $response, 2);
if (preg_match($from_filter, $from[1])) { if (preg_match($from_filter, $from[1])) {
$banned = "from_filter"; $banned = "from_filter";
} }
$ref = 0;
} }
if (stripos($response, "Path: ") === 0) { if (stripos($response, "Path: ") === 0) {
$msgpath = explode(': ', $response, 2); $msgpath = explode(': ', $response, 2);
if (preg_match($path_filter, $msgpath[1])) { if (preg_match($path_filter, $msgpath[1])) {
$banned = "path_filter"; $banned = "path_filter";
} }
$ref = 0;
} }
if (stripos($response, "Subject: ") === 0) { if (stripos($response, "Subject: ") === 0) {
$subject = explode('Subject: ', $response, 2); $subject = explode('Subject: ', $response, 2);
if (preg_match($subject_filter, $subject[1])) { if (preg_match($subject_filter, $subject[1])) {
$banned = "subject_filter"; $banned = "subject_filter";
} }
$ref = 0;
} }
if (stripos($response, "Newsgroups: ") === 0) { if (stripos($response, "Newsgroups: ") === 0) {
$response = str_ireplace($group, $group, $response); $response = str_ireplace($group, $group, $response);
@ -367,14 +341,12 @@ function get_articles($ns, $group)
$current_article['xref'] .= ' ' . $agroup . ':' . $artnum; $current_article['xref'] .= ' ' . $agroup . ':' . $artnum;
} }
} }
$ref = 0;
} }
if (stripos($response, "Xref: ") === 0) { if (stripos($response, "Xref: ") === 0) {
if (isset($CONFIG['enable_nntp']) && $CONFIG['enable_nntp'] == true) { if (isset($CONFIG['enable_nntp']) && $CONFIG['enable_nntp'] == true) {
$is_xref = true; $is_xref = true;
} }
$xref = $response; $xref = $response;
$ref = 0;
} }
if (stripos($response, "Content-Type: ") === 0) { if (stripos($response, "Content-Type: ") === 0) {
preg_match('/.*charset=.*/', $response, $te); preg_match('/.*charset=.*/', $response, $te);
@ -385,10 +357,8 @@ function get_articles($ns, $group)
$references = $this_references[1]; $references = $this_references[1];
$ref = 1; $ref = 1;
} }
if ((stripos($response, ':') === false) && (strpos($response, '>'))) { if (preg_match('/^\s/', $response) && $ref == 1) {
if ($ref == 1) { $references = $references . $response;
$references = $references . $response;
}
} }
} else { } else {
$body .= $response . "\n"; $body .= $response . "\n";
@ -400,7 +370,7 @@ function get_articles($ns, $group)
$response = fgets($ns, 1200); $response = fgets($ns, 1200);
if ($response == false) { if ($response == false) {
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Lost connection to " . $CONFIG['remote_server'] . ":" . $CONFIG['remote_port'] . " retrieving article " . $article, FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Lost connection to " . $CONFIG['remote_server'] . ":" . $CONFIG['remote_port'] . " retrieving article " . $article, FILE_APPEND);
unlink($grouppath . "/" . $local); unlink($articleHandle);
break; break;
// continue; // continue;
} }
@ -412,7 +382,7 @@ function get_articles($ns, $group)
$bytes = $bytes + ($lines * 2); $bytes = $bytes + ($lines * 2);
// Don't spool article if $banned != 0 // Don't spool article if $banned != 0
if ($banned != false) { if ($banned != false) {
unlink($grouppath . "/" . $local); unlink($articleHandle);
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " banned in " . $banned, FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " banned in " . $banned, FILE_APPEND);
$article ++; $article ++;
} else { } else {
@ -420,11 +390,11 @@ function get_articles($ns, $group)
if (strpos($subject[1], $nocem_check) !== false) { if (strpos($subject[1], $nocem_check) !== false) {
$is_from = address_decode($from[1], 'nowhere'); $is_from = address_decode($from[1], 'nowhere');
$nocem_file = tempnam($spooldir . "/nocem", $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]"); $nocem_file = tempnam($spooldir . "/nocem", $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]");
copy($grouppath . "/" . $local, $nocem_file); copy($articleHandle, $nocem_file);
chmod($nocem_file, 0644); chmod($nocem_file, 0644);
if ($save_nocem_messages == true) { if ($save_nocem_messages == true) {
$saved_nocem_file = tempnam($nocem_dir, $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]-"); $saved_nocem_file = tempnam($nocem_dir, $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]-");
copy($grouppath . "/" . $local, $saved_nocem_file); copy(articleHandle, $saved_nocem_file);
} }
} }
} }
@ -432,20 +402,20 @@ function get_articles($ns, $group)
if (strpos($subject[1], $bbsmail_check) !== false) { if (strpos($subject[1], $bbsmail_check) !== false) {
$bbsmail_file = preg_replace('/@@RSL /', '', $subject[1]); $bbsmail_file = preg_replace('/@@RSL /', '', $subject[1]);
$bbsmail_filename = $spooldir . "/bbsmail/in/bbsmail-" . $bbsmail_file; $bbsmail_filename = $spooldir . "/bbsmail/in/bbsmail-" . $bbsmail_file;
copy($grouppath . "/" . $local, $bbsmail_filename); copy($articleHandle, $bbsmail_filename);
} }
} }
$this_article = file_get_contents($articleHandle);
if ($CONFIG['article_database'] == '1') { if ($CONFIG['article_database'] == '1') {
$this_article = file_get_contents($grouppath . "/" . $local); unlink($articleHandle);
// CREATE SEARCH SNIPPET // CREATE SEARCH SNIPPET
$this_snippet = get_search_snippet($body, $content_type[1]); $this_snippet = get_search_snippet($body, $content_type[1]);
} else { } else {
if ($article_date > time()) { if ($article_date > time()) {
$article_date = time(); $article_date = time();
} }
touch($grouppath . "/" . $local, $article_date); touch($articleHandle, $article_date);
} }
$current_article['mid'] = $mid[1]; $current_article['mid'] = $mid[1];
$current_article['epochdate'] = $article_date; $current_article['epochdate'] = $article_date;
$current_article['stringdate'] = $finddate[1]; $current_article['stringdate'] = $finddate[1];
@ -460,14 +430,14 @@ function get_articles($ns, $group)
// Check Spam // Check Spam
$res = 0; $res = 0;
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($OVERRIDES['disable_spamassassin_spooling'] !== true)) { if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($OVERRIDES['disable_spamassassin_spooling'] !== true)) {
$spam_result_array = check_spam($subject[1], $from[1], $groupnames[1], $references, $body, $mid[1]); $spam_result_array = check_spam($subject[1], $from[1], $groupnames[1], $references, $this_article, $mid[1]);
$res = $spam_result_array['res']; $res = $spam_result_array['res'];
$spamresult = $spam_result_array['spamresult']; $spamresult = $spam_result_array['spamresult'];
$spamcheckerversion = $spam_result_array['spamcheckerversion']; $spamcheckerversion = $spam_result_array['spamcheckerversion'];
$spamlevel = $spam_result_array['spamlevel']; $spamlevel = $spam_result_array['spamlevel'];
} }
if ($res === 1) { if ($res === 1) {
unlink($grouppath . "/" . $local); unlink($articleHandle);
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " Exceeds Spam Score", FILE_APPEND); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " Exceeds Spam Score", FILE_APPEND);
// $orig_newsgroups = $newsgroups; // $orig_newsgroups = $newsgroups;
// $newsgroups = $CONFIG['spamgroup']; // $newsgroups = $CONFIG['spamgroup'];
@ -475,6 +445,7 @@ function get_articles($ns, $group)
$i --; $i --;
$local --; $local --;
} else { } else {
$pass = false;
foreach ($allgroups as $agroup) { foreach ($allgroups as $agroup) {
$agroup = trim($agroup); $agroup = trim($agroup);
if ((! testGroup($agroup)) || $agroup == '') { if ((! testGroup($agroup)) || $agroup == '') {
@ -483,11 +454,16 @@ function get_articles($ns, $group)
$current_article['group'] = $agroup; $current_article['group'] = $agroup;
if ($group == $agroup) { if ($group == $agroup) {
$current_article['local'] = $local; $current_article['local'] = $local;
insert_article_from_array($current_article, false);
} else { } else {
$current_article['local'] = get_next_article_number($agroup); $current_article['local'] = get_next_article_number($agroup);
insert_article_from_array($current_article, false);
} }
$tmp = insert_article_from_array($current_article, true);
if ($tmp[0] != "4") {
$pass = true;
}
}
if (! $pass) {
$i --;
} }
} }