From 2dda14f55a3374c75c6eeabdf4cc05a936b964cc Mon Sep 17 00:00:00 2001 From: Retro_Guy Date: Thu, 12 Dec 2024 05:53:25 -0700 Subject: [PATCH] Add feature to import articles from mbox files to maintenance.php. --- .../rslight/scripts/maintenance.php | 377 +++++++++++++++++- 1 file changed, 376 insertions(+), 1 deletion(-) diff --git a/Rocksolid_Light/rslight/scripts/maintenance.php b/Rocksolid_Light/rslight/scripts/maintenance.php index d9feeac..34b9688 100644 --- a/Rocksolid_Light/rslight/scripts/maintenance.php +++ b/Rocksolid_Light/rslight/scripts/maintenance.php @@ -36,7 +36,7 @@ if ($argv[1] != '-newsection') { /* Change to non root user */ change_identity($uinfo["uid"], $uinfo["gid"]); $processUser = posix_getpwuid(posix_geteuid()); - + if ($processUser['name'] != $CONFIG['webserver_user']) { echo "You are running as user: " . $processUser['name'] . "\n"; echo 'Please run this script as: ' . $CONFIG['webserver_user'] . "\n"; @@ -52,6 +52,12 @@ if ($argv[1] != '-newsection') { exit(); } + if(isset($argv[2])) { + $config_name = get_section_by_group($argv[2]); + } else { + $config_name = null; + } + $logfile = $logdir . '/spoolnews.log'; $lockfile = $lockdir . '/' . $config_name . '-spoolnews.lock'; @@ -117,6 +123,17 @@ if ($argv[1][0] == '-') { echo "Creating section: " . $argv[2] . "\n"; echo create_section($argv[2]); break; + case "-import-mbox-to-articles": + if (!isset($argv[2]) || !isset($argv[3])) { + echo "Please provide a group name followed by full path to mbox file\n"; + exit; + } + if(!isset($config_name) { + echo "Please add " . $argv[2] . " to groups.txt for a section\n"; + exit; + } + mbox_import_articles($argv[2], $argv[3]); + break; case "-clean": clean_spool(); break; @@ -130,6 +147,9 @@ if ($argv[1][0] == '-') { echo "-clear-diskcache: Remove all cache files if using Disk Caching\n"; echo "-import: Import articles from a .db3 file (-import alt.test-articles)\n"; echo " You must first add group name to /
/groups.txt manually\n"; + echo "-import-mbox-to-articles: Import articles from a mbox file\n"; + echo " -import-mbox-to-articles alt.test /path/to/file/alt.test.mbox\n"; + echo " You must first add group name to /
/groups.txt manually\n"; echo "-newsection: Create a new section for groups\n"; echo "-refill: Go back x articles and retrieve missing from remote server\n"; echo " -refill alt.test 3000 will retrive missing articles for alt.test\n"; @@ -426,6 +446,361 @@ function remove_articles($group) @unlink($spooldir . '/' . $group . '-overboard.dat'); } +function mbox_import_articles($group, $mbox) +{ + global $spooldir, $CONFIG, $workpath, $path, $config_name, $logfile; + # Prepare databases + + $database = $spooldir . '/' . $group . '-articles.db3'; + if (file_exists($database)) { + echo $database . ' already exists. Please "-remove ' . $group . '" to import this mbox data' . "\n"; + echo "Exiting...\n"; + exit; + } + $new_article_dbh = article_db_open($database); + + if (!file_exists($mbox)) { + echo 'Cannot open ' . $mbox . "\n"; + echo "Exiting...\n"; + exit; + } + + $article = 0; + $mbox_article = array(); + + $file = fopen($mbox, 'r'); + while (($mbox_line = fgets($file)) !== false) { + if ($article == 0 && trim($mbox_line == '')) { + continue; + } + if (preg_match("/^From /", $mbox_line)) { + if ($article > 0) { + mbox_get_one_article($group, $mbox_article); + echo "Retrieving " . $article . "\n"; + $mbox_article = array(); + } + $article++; + continue; + } + $mbox_article[] = rtrim($mbox_line); + } + fclose($file); + mbox_get_one_article($group, $mbox_article); + echo "Retrieving " . $article . "\n"; + exit; +} + +// GET INDIVIDUAL ARTICLE +function mbox_get_one_article($group, $article_array) +{ + global $CONFIG, $config_name, $rslight_gpg, $spooldir, $logfile, $debug_log; + $grouppath = $spooldir . '/articles/' . preg_replace('/\./', '/', $group); + if (!is_dir($grouppath)) { + mkdir($grouppath, 0777, 'recursive'); + } + // $banned_names = file($user_ban_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + $msgid_filter = get_config_value('header_filters.conf', 'Message-ID'); + $subject_filter = get_config_value('header_filters.conf', 'Subject'); + $from_filter = get_config_value('header_filters.conf', 'From'); + $path_filter = get_config_value('header_filters.conf', 'Path'); + + // Create array for article, then send to insert_article_from_array() + if (isset($current_article)) { + unset($current_article); + $current_article = array(); + } + + $local = get_next_article_number($group); + $articleHandle = $grouppath . "/" . $local; + unlink($articleHandle); + unset($references); + $lines = 0; + $bytes = 0; + $ref = 0; + $sub = 0; + $ng = 0; + $supersedes = false; + $boundary = false; + $banned = false; + $integrity = false; + $is_header = 1; + $body = ""; + $content_transfer_encoding = null; + foreach ($article_array as $response) { + $is_xref = false; + $bytes = $bytes + mb_strlen($response, '8bit'); + if (trim($response) == "" && $lines > 0) { + if ($is_header == 1) { + file_put_contents($articleHandle, $current_article['xref'] . "\n", FILE_APPEND); + } + $is_header = 0; + } + if ($is_header == 1) { + $response = str_replace("\t", " ", $response); + if (strpos($response, ': ') !== false) { + $ref = 0; + $sub = 0; + $ng = 0; + } + // Find article date + if (stripos($response, "Date: ") === 0) { + $finddate = explode(': ', $response, 2); + $artdate = strtotime($finddate[1]); + } + if (stripos($response, "Injection-Date: ") === 0) { + $finddate = explode(': ', $response, 2); + $injectiondate = strtotime($finddate[1]); + } + if (stripos($response, "Supersedes: ") === 0) { + $supersedes = explode(': ', $response, 2); + $supersedes = $supersedes[1]; + } + // Get overview data + if (stripos($response, "Message-ID: ") === 0) { + $mid = explode(': ', $response, 2); + if (preg_match($msgid_filter, $mid[1])) { + $banned = "msgid_filter"; + } + } + if (stripos($response, "From: ") === 0) { + $from = explode(': ', $response, 2); + if (preg_match($from_filter, $from[1])) { + $banned = "from_filter"; + } + } + if (stripos($response, "Path: ") === 0) { + $msgpath = explode(': ', $response, 2); + if (preg_match($path_filter, $msgpath[1])) { + $banned = "path_filter"; + } + } + if (stripos($response, "Subject: ") === 0) { + $this_subject = explode('Subject: ', $response, 2); + $subject = $this_subject[1]; + $sub = 1; + if (preg_match($subject_filter, $subject)) { + $banned = "subject_filter"; + } + } + // Transfer encoding + if (stripos($response, "Content-Transfer-Encoding: ") === 0) { + $enco = explode(': ', $response, 2); + $content_transfer_encoding = $enco[1]; + } + + if (stripos($response, "Newsgroups: ") === 0) { + $response = str_ireplace($group, $group, $response); + // Identify each group name for xref + $groupnames = explode("Newsgroups: ", $response); + $allgroups = preg_split("/\ |\,/", $groupnames[1]); + // Create Xref: header + $current_article['xref'] = "Xref: " . $CONFIG['pathhost']; + foreach ($allgroups as $agroup) { + $agroup = trim($agroup); + if ((! testGroup($agroup)) || $agroup == '') { + continue; + } + if ($group == $agroup) { + $artnum = $local; + } else { + $artnum = get_next_article_number($agroup); + } + if ($artnum > 0) { + $current_article['xref'] .= ' ' . $agroup . ':' . $artnum; + } + } + $ng = 1; + } + if (stripos($response, "Xref: ") === 0) { + if (isset($CONFIG['enable_nntp']) && $CONFIG['enable_nntp'] == true) { + $is_xref = true; + } + $xref = $response; + } + if (stripos($response, "Content-Type: ") === 0) { + preg_match('/.*charset=.*/', $response, $te); + $content_type = explode("Content-Type: text/plain; charset=", $te[0]); + if (preg_match('/.*boundary=.*/', $response, $be)) { + $boundary = explode("boundary=", $response, 2); + $boundary = trim($boundary[1], '\";'); + } + } + if (stripos($response, "References: ") === 0) { + $this_references = explode('References: ', $response); + $references = $this_references[1]; + $ref = 1; + } + if (preg_match('/^\s/', $response) && $ng == 1) { + $addgroups = preg_split("/\ |\,/", trim($response)); + $allgroups = array_merge($allgroups, $addgroups); + } + + if (preg_match('/^\s/', $response) && $ref == 1) { + $references = $references . $response; + } + if (preg_match('/^\s/', $response) && $sub == 1) { + $subject = $subject . $response; + } + } else { + $body .= $response . "\n"; + } + if (! $is_xref) { + file_put_contents($articleHandle, $response . "\n", FILE_APPEND); + } + $response = str_replace("\n", "", str_replace("\r", "", $response)); + $lines++; + } + file_put_contents($articleHandle, $response . "\n", FILE_APPEND); + $lines = $lines - 1; + $bytes = $bytes + ($lines * 2); + + // Prefer Injection-Date to Date header + // Some newsreaders (PiaoHong) produce a Date header that php does not like + if (isset($injectiondate)) { + $artdate = $injectiondate; + file_put_contents($debug_log, "\n" . format_log_date() . " " . $config_name . " Used Injection-Date " . $artdate . " for: " . $mid[1], FILE_APPEND); + } else { + file_put_contents($debug_log, "\n" . format_log_date() . " " . $config_name . " Used Date " . $artdate . " for: " . $mid[1], FILE_APPEND); + } + + // Check if date matches exactly another article and handle else sorting doesn't like it + while (isset($dates_used[$artdate])) { + $artdate = $artdate + 1; + $finddate[1] = date("D, j M Y G:i:s (T)", $artdate); + file_put_contents($debug_log, "\n" . format_log_date() . " " . $config_name . " Rewrote date to: " . $artdate . " " . $finddate[1] . " for " . $group . ":" . $local, FILE_APPEND); + } + $article_date = $artdate; + $dates_used[$article_date] = true; + + // Don't spool article if $banned or fails integrity test + $integrity = check_article_integrity(file($articleHandle), $artdate); + if (($banned !== false) || ($integrity !== false)) { + unlink($articleHandle); + if ($integrity) { + file_put_contents($logfile, "\n" . format_log_date() . " " . $integrity, FILE_APPEND); + } elseif ($banned) { + file_put_contents($spamlog, "\n" . format_log_date() . " " . $banned . " :\tSPAM\t" . $mid[1] . "\t" . $groupnames[1] . "\t" . $from[1], FILE_APPEND); + } + $article++; + } else { + if ((strpos($CONFIG['nocem_groups'], $group) !== false) && ($CONFIG['enable_nocem'] == true)) { + if (strpos($subject, $nocem_check) !== false) { + $is_from = address_decode($from[1], 'nowhere'); + $nocem_file = tempnam($spooldir . "/nocem", $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]"); + copy($articleHandle, $nocem_file); + chmod($nocem_file, 0644); + if ($save_nocem_messages == true) { + $saved_nocem_file = tempnam($nocem_dir, $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]-"); + copy($articleHandle, $saved_nocem_file); + } + } + } + if ((strpos($rslight_gpg['nntp_group'], $group) !== false) && ($rslight_gpg['enable'] == '1')) { + if (strpos($subject, $bbsmail_check) !== false) { + $bbsmail_file = preg_replace('/@@RSL /', '', $subject); + $bbsmail_filename = $spooldir . "/bbsmail/in/bbsmail-" . $bbsmail_file; + copy($articleHandle, $bbsmail_filename); + } + } + $this_article = file_get_contents($articleHandle); + if ($CONFIG['article_database'] == '1') { + unlink($articleHandle); + // CREATE SEARCH SNIPPET + if ($boundary !== false) { + $body_array = explode("\n", $body); + $found = false; + $start = false; + foreach ($body_array as $line) { + if ($found === false) { + if (strpos($line, $boundary) !== false) { + $found = true; + continue; + } else { + continue; + } + } + if (trim($line != '') && $start === false) { + continue; + } else { + if ($start === false) { + $start = true; + continue; + } + } + $newbody .= $line . "\n"; + } + file_put_contents($debug_log, "\n" . format_log_date() . " Created snippet from multipart article: " . $mid[1], FILE_APPEND); + } else { + $newbody = $body; + } + + $this_snippet = get_search_snippet($newbody, $content_type[1], $content_transfer_encoding); + unset($newbody); + } else { + touch($articleHandle, $article_date); + } + $current_article['mid'] = $mid[1]; + $current_article['epochdate'] = $article_date; + $current_article['stringdate'] = $finddate[1]; + $current_article['from'] = $from[1]; + $current_article['subject'] = $subject; + $current_article['references'] = $references; + $current_article['bytes'] = $bytes; + $current_article['lines'] = $lines; + $current_article['article'] = $this_article; + $current_article['snippet'] = $this_snippet; + + // Check Spam + $res = 0; + if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($OVERRIDES['disable_spamassassin_spooling'] !== true)) { + $spam_result_array = check_spam($subject, $from[1], $groupnames[1], $references, $this_article, $mid[1]); + $res = $spam_result_array['res']; + $spamresult = $spam_result_array['spamresult']; + $spamcheckerversion = $spam_result_array['spamcheckerversion']; + $spamlevel = $spam_result_array['spamlevel']; + } + if ($res === 1) { + unlink($articleHandle); + file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $mid[1] . " Exceeds Spam Score", FILE_APPEND); + // $orig_newsgroups = $newsgroups; + // $newsgroups = $CONFIG['spamgroup']; + // $group = $newsgroups; + $local--; + } else { + $pass = false; + foreach ($allgroups as $agroup) { + $agroup = trim($agroup); + if ((! testGroup($agroup)) || $agroup == '') { + continue; + } + $current_article['group'] = $agroup; + if ($group == $agroup) { + $current_article['local'] = $local; + } else { + $current_article['local'] = get_next_article_number($agroup); + } + file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Preparing to spool " . $group . ":" . $mid[1], FILE_APPEND); + $tmp = insert_article_from_array($current_article, false); + if ($tmp[0] != "4") { + $pass = true; + } else { + file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " " . $tmp, FILE_APPEND); + } + } + } + $local++; + } + if ($supersedes !== false) { + if (isset($OVERRIDES['enable_supersedes_support']) && $OVERRIDES['enable_supersedes_support'] == true) { + file_put_contents($debug_log, "\n" . format_log_date() . " Found Supersedes: " . $mid[1] . " for: " . $supersedes, FILE_APPEND); + if (!check_remote_for_msgid($supersedes)) { + file_put_contents($debug_log, "\n" . format_log_date() . " Will delete: " . $supersedes, FILE_APPEND); + delete_message($supersedes); + } + } + } +} + function import_articles($group) { global $spooldir, $CONFIG, $workpath, $path, $config_name, $logfile;