From 8ecdbdae6f87cad7d9458e198540c2930b0cb679 Mon Sep 17 00:00:00 2001 From: Retro_Guy Date: Tue, 26 Dec 2023 09:05:40 -0700 Subject: [PATCH] Improved spam checking and fix error in spooling. --- Rocksolid_Light/rocksolid/lib/post.inc.php | 4 +- Rocksolid_Light/rocksolid/newsportal.php | 109 ++++++++++++------ .../rslight/scripts/rslight-lib.php | 2 +- Rocksolid_Light/rslight/scripts/spoolnews.php | 78 +++++-------- 4 files changed, 105 insertions(+), 88 deletions(-) diff --git a/Rocksolid_Light/rocksolid/lib/post.inc.php b/Rocksolid_Light/rocksolid/lib/post.inc.php index 60d4709..c3f94ad 100644 --- a/Rocksolid_Light/rocksolid/lib/post.inc.php +++ b/Rocksolid_Light/rocksolid/lib/post.inc.php @@ -443,8 +443,8 @@ function message_post($subject, $from, $newsgroups, $ref, $body, $encryptthis = /* * SPAM CHECK */ - if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) { - $spam_result_array = check_spam($subject, $from, $newsgroups, $ref, $body, $msgid); + if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($CONFIG['enable_nntp'] != true)) { + $spam_result_array = check_spam($subject, $from, $newsgroups, $ref, $body, $msgid, true); $res = $spam_result_array['res']; $spamresult = $spam_result_array['spamresult']; $spamcheckerversion = $spam_result_array['spamcheckerversion']; diff --git a/Rocksolid_Light/rocksolid/newsportal.php b/Rocksolid_Light/rocksolid/newsportal.php index 020efca..f6a24e6 100644 --- a/Rocksolid_Light/rocksolid/newsportal.php +++ b/Rocksolid_Light/rocksolid/newsportal.php @@ -1305,13 +1305,39 @@ function is_multibyte($s) return mb_strlen($s, 'utf-8') < strlen($s); } -function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid) +function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid, $useheaders = false) { global $msgid_generate, $msgid_fqdn, $spooldir, $logdir; global $CONFIG; + $spamdir = $spooldir . '/spam'; + if (! is_dir($spamdir)) { + mkdir($spamdir); + } $logfile = $logdir . '/spam.log'; $spamfile = tempnam($spooldir, 'spam-'); - file_put_contents($spamfile, $body); + if ($useheaders) { + // Add headers + $head = ''; + if (trim($subject) != '') { + $head .= 'Subject: ' . $subject . "\r\n"; + } + if (trim($from) != '') { + $head .= 'From: ' . $from . "\r\n"; + } + if (trim($newsgroups) != '') { + $head .= 'Newsgroups: ' . $newsgroups . "\r\n"; + } + if (trim($ref) != '') { + $head .= 'References: ' . $ref . "\r\n"; + } + if (trim($msgid) != '') { + $head .= 'Message-ID: ' . $msgid . "\r\n"; + } + $message = $head . "\r\n" . $body; + } else { + $message = $body; + } + file_put_contents($spamfile, $message); $spamcommand = $CONFIG['spamc'] . ' -E < ' . $spamfile; ob_start(); passthru($spamcommand, $res); @@ -1337,9 +1363,10 @@ function check_spam($subject, $from, $newsgroups, $ref, $body, $msgid) } unlink($spamfile); if ($res === 1) { - file_put_contents($logfile, "\n" . format_log_date() . " " . $spamresult . "\n", FILE_APPEND); + file_put_contents($logfile, "\n" . format_log_date() . " identified spam: " . $from . " " . $newsgroups . " " . $msgid, FILE_APPEND); + file_put_contents($spamdir . '/' . $msgid, $spamresult); } else { - file_put_contents($logfile, "\n" . format_log_date() . " Checked: " . $from . " " . $newsgroups . " " . $msgid . "\n------------\n", FILE_APPEND); + file_put_contents($logfile, "\n" . format_log_date() . " clean message: " . $from . " " . $newsgroups . " " . $msgid, FILE_APPEND); } return array( 'res' => $res, @@ -1898,6 +1925,43 @@ function get_next_article_number($group) return $local; } +function check_duplicate_msgid($msgid, $group) +{ + global $spooldir, $logdir; + + $found = false; + + $database = $spooldir . '/articles-overview.db3'; + $table = 'overview'; + $dbh = overview_db_open($database, $table); + $stmt = $dbh->prepare("SELECT * FROM $table WHERE msgid=:msgid AND newsgroup=:newsgroup"); + $stmt->bindParam(':msgid', $msgid); + $stmt->bindParam(':newsgroup', $group); + $stmt->execute(); + while ($row = $stmt->fetch()) { + if($row['msgid'] == $msgid) { + $found = true; + } + } + $dbh = null; + + $database = $spooldir . '/history.db3'; + $table = 'history'; + $dbh = history_db_open($database, $table); + $stmt = $dbh->prepare("SELECT * FROM $table WHERE msgid=:msgid AND newsgroup=:newsgroup"); + $stmt->bindParam(':msgid', $msgid); + $stmt->bindParam(':newsgroup', $group); + $stmt->execute(); + while ($row = $stmt->fetch()) { + if($row['msgid'] == $msgid) { + $found = true; + } + } + $dbh = null; + + return $found; +} + function insert_article_from_array($this_article, $check_duplicates = true) { global $CONFIG, $config_name, $spooldir, $logdir; @@ -1906,47 +1970,24 @@ function insert_article_from_array($this_article, $check_duplicates = true) $grouppath = $path . preg_replace('/\./', '/', $group); if ($check_duplicates) { - // Create list of message-ids - $database = $spooldir . '/articles-overview.db3'; - $table = 'overview'; - $dbh = overview_db_open($database, $table); - $stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup"); - $stmt->bindParam(':newsgroup', $group); - $stmt->execute(); - while ($row = $stmt->fetch()) { - $msgids[$row['msgid']] = true; - } - $dbh = null; - - // Check history database for deleted message-ids - $database = $spooldir . '/history.db3'; - $table = 'history'; - $dbh = history_db_open($database, $table); - $stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup"); - $stmt->bindParam(':newsgroup', $group); - $stmt->execute(); - while ($row = $stmt->fetch()) { - $msgids[$row['msgid']] = true; - } - $dbh = null; - - if ($msgids[$this_article['mid']] == true) { - echo "\nDuplicate Message-ID for: " . $group . ":" . $this_article['local']; - file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $this_article['local'], FILE_APPEND); + if (check_duplicate_msgid($this_article['mid'], $group)) { + echo "\n(newsportal)Duplicate Message-ID for: " . $group . ":" . $this_article['mid']; + file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $this_article['mid'], FILE_APPEND); return "441 Insert failed (duplicate)\r\n"; } } + // Open articles Database if ($CONFIG['article_database'] == '1') { $article_dbh = article_db_open($spooldir . '/' . $group . '-articles.db3'); - $article_sql = 'INSERT OR IGNORE INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)'; + $article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)'; $article_stmt = $article_dbh->prepare($article_sql); } // Open overview database $database = $spooldir . '/articles-overview.db3'; $table = 'overview'; $overview_dbh = overview_db_open($database, $table); - $overview_sql = 'INSERT OR IGNORE INTO overview(newsgroup, number, msgid, date, datestring, name, subject, refs, bytes, lines, xref) VALUES(?,?,?,?,?,?,?,?,?,?,?)'; + $overview_sql = 'INSERT INTO overview(newsgroup, number, msgid, date, datestring, name, subject, refs, bytes, lines, xref) VALUES(?,?,?,?,?,?,?,?,?,?,?)'; $overview_stmt = $overview_dbh->prepare($overview_sql); // Overview @@ -1985,7 +2026,7 @@ function insert_article_from_array($this_article, $check_duplicates = true) touch($grouppath . "/" . $this_article['local'], $article_date); } - echo "\nRetrieved: " . $group . " " . $this_article['local']; + echo "\nSpooling: " . $group . " " . $this_article['local']; file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Spooling: " . $group . ":" . $this_article['local'], FILE_APPEND); $status = "spooled"; $statusdate = time(); diff --git a/Rocksolid_Light/rslight/scripts/rslight-lib.php b/Rocksolid_Light/rslight/scripts/rslight-lib.php index b1eb78f..0cf6006 100644 --- a/Rocksolid_Light/rslight/scripts/rslight-lib.php +++ b/Rocksolid_Light/rslight/scripts/rslight-lib.php @@ -395,7 +395,7 @@ function process_post($message, $group) * SPAM CHECK */ if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true)) { - $spam_result_array = check_spam($subject[1], $from[1], $newsgroups, $references, $body, $msgid); + $spam_result_array = check_spam($subject[1], $from[1], $newsgroups, $references, $body, $msgid, true); $res = $spam_result_array['res']; $spamresult = $spam_result_array['spamresult']; $spamcheckerversion = $spam_result_array['spamcheckerversion']; diff --git a/Rocksolid_Light/rslight/scripts/spoolnews.php b/Rocksolid_Light/rslight/scripts/spoolnews.php index b73b003..3b0e2f1 100644 --- a/Rocksolid_Light/rslight/scripts/spoolnews.php +++ b/Rocksolid_Light/rslight/scripts/spoolnews.php @@ -149,7 +149,7 @@ if ($CONFIG['remote_server'] != '') { if ($ns2) { file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Updating threads for: " . $name[0] . "...", FILE_APPEND); echo 'Use ns2: ' . $ns2 . "\n"; - thread_load_newsserver($ns2, $name[0], 0); + // thread_load_newsserver($ns2, $name[0], 0); } } } @@ -216,31 +216,6 @@ function get_articles($ns, $group) if ($article < $detail[2]) { $article = $detail[2]; } - - // Create list of message-ids - $database = $spooldir . '/articles-overview.db3'; - $table = 'overview'; - $dbh = overview_db_open($database, $table); - $stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup"); - $stmt->bindParam(':newsgroup', $group); - $stmt->execute(); - while ($row = $stmt->fetch()) { - $msgids[$row['msgid']] = true; - } - $dbh = null; - - // Check history database for deleted message-ids - $database = $spooldir . '/history.db3'; - $table = 'history'; - $dbh = history_db_open($database, $table); - $stmt = $dbh->prepare("SELECT msgid FROM $table WHERE newsgroup=:newsgroup"); - $stmt->bindParam(':newsgroup', $group); - $stmt->execute(); - while ($row = $stmt->fetch()) { - $msgids[$row['msgid']] = true; - } - $dbh = null; - // Get overview from server $server_overview = array(); $re = false; @@ -277,9 +252,9 @@ function get_articles($ns, $group) if ($CONFIG['enable_nntp'] != true) { $local = $article; } - if ($msgids[$overview_msgid[$article]] == true) { - echo "\nDuplicate Message-ID for: " . $group . ":" . $local; - file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $article, FILE_APPEND); + if (check_duplicate_msgid($overview_msgid[$article], $group)) { + echo "\n(spoolnews)Duplicate Message-ID for: " . $group . ":" . $overview_msgid[$article]; + file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Duplicate Message-ID for: " . $group . ":" . $overview_msgid[$article], FILE_APPEND); $article ++; continue; } @@ -312,6 +287,9 @@ function get_articles($ns, $group) } if ($is_header == 1) { $response = str_replace("\t", " ", $response); + if (strpos($response, ': ') !== false) { + $ref = 0; + } // Find article date if (stripos($response, "Date: ") === 0) { $finddate = explode(': ', $response, 2); @@ -323,28 +301,24 @@ function get_articles($ns, $group) if (preg_match($msgid_filter, $mid[1])) { $banned = "msgid_filter"; } - $ref = 0; } if (stripos($response, "From: ") === 0) { $from = explode(': ', $response, 2); if (preg_match($from_filter, $from[1])) { $banned = "from_filter"; } - $ref = 0; } if (stripos($response, "Path: ") === 0) { $msgpath = explode(': ', $response, 2); if (preg_match($path_filter, $msgpath[1])) { $banned = "path_filter"; } - $ref = 0; } if (stripos($response, "Subject: ") === 0) { $subject = explode('Subject: ', $response, 2); if (preg_match($subject_filter, $subject[1])) { $banned = "subject_filter"; } - $ref = 0; } if (stripos($response, "Newsgroups: ") === 0) { $response = str_ireplace($group, $group, $response); @@ -367,14 +341,12 @@ function get_articles($ns, $group) $current_article['xref'] .= ' ' . $agroup . ':' . $artnum; } } - $ref = 0; } if (stripos($response, "Xref: ") === 0) { if (isset($CONFIG['enable_nntp']) && $CONFIG['enable_nntp'] == true) { $is_xref = true; } $xref = $response; - $ref = 0; } if (stripos($response, "Content-Type: ") === 0) { preg_match('/.*charset=.*/', $response, $te); @@ -385,10 +357,8 @@ function get_articles($ns, $group) $references = $this_references[1]; $ref = 1; } - if ((stripos($response, ':') === false) && (strpos($response, '>'))) { - if ($ref == 1) { - $references = $references . $response; - } + if (preg_match('/^\s/', $response) && $ref == 1) { + $references = $references . $response; } } else { $body .= $response . "\n"; @@ -400,7 +370,7 @@ function get_articles($ns, $group) $response = fgets($ns, 1200); if ($response == false) { file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Lost connection to " . $CONFIG['remote_server'] . ":" . $CONFIG['remote_port'] . " retrieving article " . $article, FILE_APPEND); - unlink($grouppath . "/" . $local); + unlink($articleHandle); break; // continue; } @@ -412,7 +382,7 @@ function get_articles($ns, $group) $bytes = $bytes + ($lines * 2); // Don't spool article if $banned != 0 if ($banned != false) { - unlink($grouppath . "/" . $local); + unlink($articleHandle); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " banned in " . $banned, FILE_APPEND); $article ++; } else { @@ -420,11 +390,11 @@ function get_articles($ns, $group) if (strpos($subject[1], $nocem_check) !== false) { $is_from = address_decode($from[1], 'nowhere'); $nocem_file = tempnam($spooldir . "/nocem", $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]"); - copy($grouppath . "/" . $local, $nocem_file); + copy($articleHandle, $nocem_file); chmod($nocem_file, 0644); if ($save_nocem_messages == true) { $saved_nocem_file = tempnam($nocem_dir, $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]-"); - copy($grouppath . "/" . $local, $saved_nocem_file); + copy(articleHandle, $saved_nocem_file); } } } @@ -432,20 +402,20 @@ function get_articles($ns, $group) if (strpos($subject[1], $bbsmail_check) !== false) { $bbsmail_file = preg_replace('/@@RSL /', '', $subject[1]); $bbsmail_filename = $spooldir . "/bbsmail/in/bbsmail-" . $bbsmail_file; - copy($grouppath . "/" . $local, $bbsmail_filename); + copy($articleHandle, $bbsmail_filename); } } + $this_article = file_get_contents($articleHandle); if ($CONFIG['article_database'] == '1') { - $this_article = file_get_contents($grouppath . "/" . $local); + unlink($articleHandle); // CREATE SEARCH SNIPPET $this_snippet = get_search_snippet($body, $content_type[1]); } else { if ($article_date > time()) { $article_date = time(); } - touch($grouppath . "/" . $local, $article_date); + touch($articleHandle, $article_date); } - $current_article['mid'] = $mid[1]; $current_article['epochdate'] = $article_date; $current_article['stringdate'] = $finddate[1]; @@ -460,14 +430,14 @@ function get_articles($ns, $group) // Check Spam $res = 0; if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($OVERRIDES['disable_spamassassin_spooling'] !== true)) { - $spam_result_array = check_spam($subject[1], $from[1], $groupnames[1], $references, $body, $mid[1]); + $spam_result_array = check_spam($subject[1], $from[1], $groupnames[1], $references, $this_article, $mid[1]); $res = $spam_result_array['res']; $spamresult = $spam_result_array['spamresult']; $spamcheckerversion = $spam_result_array['spamcheckerversion']; $spamlevel = $spam_result_array['spamlevel']; } if ($res === 1) { - unlink($grouppath . "/" . $local); + unlink($articleHandle); file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $article . " Exceeds Spam Score", FILE_APPEND); // $orig_newsgroups = $newsgroups; // $newsgroups = $CONFIG['spamgroup']; @@ -475,6 +445,7 @@ function get_articles($ns, $group) $i --; $local --; } else { + $pass = false; foreach ($allgroups as $agroup) { $agroup = trim($agroup); if ((! testGroup($agroup)) || $agroup == '') { @@ -483,11 +454,16 @@ function get_articles($ns, $group) $current_article['group'] = $agroup; if ($group == $agroup) { $current_article['local'] = $local; - insert_article_from_array($current_article, false); } else { $current_article['local'] = get_next_article_number($agroup); - insert_article_from_array($current_article, false); } + $tmp = insert_article_from_array($current_article, true); + if ($tmp[0] != "4") { + $pass = true; + } + } + if (! $pass) { + $i --; } }