Add feature to import articles from mbox files to maintenance.php.
This commit is contained in:
parent
4209a8befb
commit
2dda14f55a
|
@ -36,7 +36,7 @@ if ($argv[1] != '-newsection') {
|
|||
/* Change to non root user */
|
||||
change_identity($uinfo["uid"], $uinfo["gid"]);
|
||||
$processUser = posix_getpwuid(posix_geteuid());
|
||||
|
||||
|
||||
if ($processUser['name'] != $CONFIG['webserver_user']) {
|
||||
echo "You are running as user: " . $processUser['name'] . "\n";
|
||||
echo 'Please run this script as: ' . $CONFIG['webserver_user'] . "\n";
|
||||
|
@ -52,6 +52,12 @@ if ($argv[1] != '-newsection') {
|
|||
exit();
|
||||
}
|
||||
|
||||
if(isset($argv[2])) {
|
||||
$config_name = get_section_by_group($argv[2]);
|
||||
} else {
|
||||
$config_name = null;
|
||||
}
|
||||
|
||||
$logfile = $logdir . '/spoolnews.log';
|
||||
$lockfile = $lockdir . '/' . $config_name . '-spoolnews.lock';
|
||||
|
||||
|
@ -117,6 +123,17 @@ if ($argv[1][0] == '-') {
|
|||
echo "Creating section: " . $argv[2] . "\n";
|
||||
echo create_section($argv[2]);
|
||||
break;
|
||||
case "-import-mbox-to-articles":
|
||||
if (!isset($argv[2]) || !isset($argv[3])) {
|
||||
echo "Please provide a group name followed by full path to mbox file\n";
|
||||
exit;
|
||||
}
|
||||
if(!isset($config_name) {
|
||||
echo "Please add " . $argv[2] . " to groups.txt for a section\n";
|
||||
exit;
|
||||
}
|
||||
mbox_import_articles($argv[2], $argv[3]);
|
||||
break;
|
||||
case "-clean":
|
||||
clean_spool();
|
||||
break;
|
||||
|
@ -130,6 +147,9 @@ if ($argv[1][0] == '-') {
|
|||
echo "-clear-diskcache: Remove all cache files if using Disk Caching\n";
|
||||
echo "-import: Import articles from a .db3 file (-import alt.test-articles)\n";
|
||||
echo " You must first add group name to <config_dir>/<section>/groups.txt manually\n";
|
||||
echo "-import-mbox-to-articles: Import articles from a mbox file\n";
|
||||
echo " -import-mbox-to-articles alt.test /path/to/file/alt.test.mbox\n";
|
||||
echo " You must first add group name to <config_dir>/<section>/groups.txt manually\n";
|
||||
echo "-newsection: Create a new section for groups\n";
|
||||
echo "-refill: Go back x articles and retrieve missing from remote server\n";
|
||||
echo " -refill alt.test 3000 will retrive missing articles for alt.test\n";
|
||||
|
@ -426,6 +446,361 @@ function remove_articles($group)
|
|||
@unlink($spooldir . '/' . $group . '-overboard.dat');
|
||||
}
|
||||
|
||||
function mbox_import_articles($group, $mbox)
|
||||
{
|
||||
global $spooldir, $CONFIG, $workpath, $path, $config_name, $logfile;
|
||||
# Prepare databases
|
||||
|
||||
$database = $spooldir . '/' . $group . '-articles.db3';
|
||||
if (file_exists($database)) {
|
||||
echo $database . ' already exists. Please "-remove ' . $group . '" to import this mbox data' . "\n";
|
||||
echo "Exiting...\n";
|
||||
exit;
|
||||
}
|
||||
$new_article_dbh = article_db_open($database);
|
||||
|
||||
if (!file_exists($mbox)) {
|
||||
echo 'Cannot open ' . $mbox . "\n";
|
||||
echo "Exiting...\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
$article = 0;
|
||||
$mbox_article = array();
|
||||
|
||||
$file = fopen($mbox, 'r');
|
||||
while (($mbox_line = fgets($file)) !== false) {
|
||||
if ($article == 0 && trim($mbox_line == '')) {
|
||||
continue;
|
||||
}
|
||||
if (preg_match("/^From /", $mbox_line)) {
|
||||
if ($article > 0) {
|
||||
mbox_get_one_article($group, $mbox_article);
|
||||
echo "Retrieving " . $article . "\n";
|
||||
$mbox_article = array();
|
||||
}
|
||||
$article++;
|
||||
continue;
|
||||
}
|
||||
$mbox_article[] = rtrim($mbox_line);
|
||||
}
|
||||
fclose($file);
|
||||
mbox_get_one_article($group, $mbox_article);
|
||||
echo "Retrieving " . $article . "\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
// GET INDIVIDUAL ARTICLE
|
||||
function mbox_get_one_article($group, $article_array)
|
||||
{
|
||||
global $CONFIG, $config_name, $rslight_gpg, $spooldir, $logfile, $debug_log;
|
||||
$grouppath = $spooldir . '/articles/' . preg_replace('/\./', '/', $group);
|
||||
if (!is_dir($grouppath)) {
|
||||
mkdir($grouppath, 0777, 'recursive');
|
||||
}
|
||||
// $banned_names = file($user_ban_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||
$msgid_filter = get_config_value('header_filters.conf', 'Message-ID');
|
||||
$subject_filter = get_config_value('header_filters.conf', 'Subject');
|
||||
$from_filter = get_config_value('header_filters.conf', 'From');
|
||||
$path_filter = get_config_value('header_filters.conf', 'Path');
|
||||
|
||||
// Create array for article, then send to insert_article_from_array()
|
||||
if (isset($current_article)) {
|
||||
unset($current_article);
|
||||
$current_article = array();
|
||||
}
|
||||
|
||||
$local = get_next_article_number($group);
|
||||
$articleHandle = $grouppath . "/" . $local;
|
||||
unlink($articleHandle);
|
||||
unset($references);
|
||||
$lines = 0;
|
||||
$bytes = 0;
|
||||
$ref = 0;
|
||||
$sub = 0;
|
||||
$ng = 0;
|
||||
$supersedes = false;
|
||||
$boundary = false;
|
||||
$banned = false;
|
||||
$integrity = false;
|
||||
$is_header = 1;
|
||||
$body = "";
|
||||
$content_transfer_encoding = null;
|
||||
foreach ($article_array as $response) {
|
||||
$is_xref = false;
|
||||
$bytes = $bytes + mb_strlen($response, '8bit');
|
||||
if (trim($response) == "" && $lines > 0) {
|
||||
if ($is_header == 1) {
|
||||
file_put_contents($articleHandle, $current_article['xref'] . "\n", FILE_APPEND);
|
||||
}
|
||||
$is_header = 0;
|
||||
}
|
||||
if ($is_header == 1) {
|
||||
$response = str_replace("\t", " ", $response);
|
||||
if (strpos($response, ': ') !== false) {
|
||||
$ref = 0;
|
||||
$sub = 0;
|
||||
$ng = 0;
|
||||
}
|
||||
// Find article date
|
||||
if (stripos($response, "Date: ") === 0) {
|
||||
$finddate = explode(': ', $response, 2);
|
||||
$artdate = strtotime($finddate[1]);
|
||||
}
|
||||
if (stripos($response, "Injection-Date: ") === 0) {
|
||||
$finddate = explode(': ', $response, 2);
|
||||
$injectiondate = strtotime($finddate[1]);
|
||||
}
|
||||
if (stripos($response, "Supersedes: ") === 0) {
|
||||
$supersedes = explode(': ', $response, 2);
|
||||
$supersedes = $supersedes[1];
|
||||
}
|
||||
// Get overview data
|
||||
if (stripos($response, "Message-ID: ") === 0) {
|
||||
$mid = explode(': ', $response, 2);
|
||||
if (preg_match($msgid_filter, $mid[1])) {
|
||||
$banned = "msgid_filter";
|
||||
}
|
||||
}
|
||||
if (stripos($response, "From: ") === 0) {
|
||||
$from = explode(': ', $response, 2);
|
||||
if (preg_match($from_filter, $from[1])) {
|
||||
$banned = "from_filter";
|
||||
}
|
||||
}
|
||||
if (stripos($response, "Path: ") === 0) {
|
||||
$msgpath = explode(': ', $response, 2);
|
||||
if (preg_match($path_filter, $msgpath[1])) {
|
||||
$banned = "path_filter";
|
||||
}
|
||||
}
|
||||
if (stripos($response, "Subject: ") === 0) {
|
||||
$this_subject = explode('Subject: ', $response, 2);
|
||||
$subject = $this_subject[1];
|
||||
$sub = 1;
|
||||
if (preg_match($subject_filter, $subject)) {
|
||||
$banned = "subject_filter";
|
||||
}
|
||||
}
|
||||
// Transfer encoding
|
||||
if (stripos($response, "Content-Transfer-Encoding: ") === 0) {
|
||||
$enco = explode(': ', $response, 2);
|
||||
$content_transfer_encoding = $enco[1];
|
||||
}
|
||||
|
||||
if (stripos($response, "Newsgroups: ") === 0) {
|
||||
$response = str_ireplace($group, $group, $response);
|
||||
// Identify each group name for xref
|
||||
$groupnames = explode("Newsgroups: ", $response);
|
||||
$allgroups = preg_split("/\ |\,/", $groupnames[1]);
|
||||
// Create Xref: header
|
||||
$current_article['xref'] = "Xref: " . $CONFIG['pathhost'];
|
||||
foreach ($allgroups as $agroup) {
|
||||
$agroup = trim($agroup);
|
||||
if ((! testGroup($agroup)) || $agroup == '') {
|
||||
continue;
|
||||
}
|
||||
if ($group == $agroup) {
|
||||
$artnum = $local;
|
||||
} else {
|
||||
$artnum = get_next_article_number($agroup);
|
||||
}
|
||||
if ($artnum > 0) {
|
||||
$current_article['xref'] .= ' ' . $agroup . ':' . $artnum;
|
||||
}
|
||||
}
|
||||
$ng = 1;
|
||||
}
|
||||
if (stripos($response, "Xref: ") === 0) {
|
||||
if (isset($CONFIG['enable_nntp']) && $CONFIG['enable_nntp'] == true) {
|
||||
$is_xref = true;
|
||||
}
|
||||
$xref = $response;
|
||||
}
|
||||
if (stripos($response, "Content-Type: ") === 0) {
|
||||
preg_match('/.*charset=.*/', $response, $te);
|
||||
$content_type = explode("Content-Type: text/plain; charset=", $te[0]);
|
||||
if (preg_match('/.*boundary=.*/', $response, $be)) {
|
||||
$boundary = explode("boundary=", $response, 2);
|
||||
$boundary = trim($boundary[1], '\";');
|
||||
}
|
||||
}
|
||||
if (stripos($response, "References: ") === 0) {
|
||||
$this_references = explode('References: ', $response);
|
||||
$references = $this_references[1];
|
||||
$ref = 1;
|
||||
}
|
||||
if (preg_match('/^\s/', $response) && $ng == 1) {
|
||||
$addgroups = preg_split("/\ |\,/", trim($response));
|
||||
$allgroups = array_merge($allgroups, $addgroups);
|
||||
}
|
||||
|
||||
if (preg_match('/^\s/', $response) && $ref == 1) {
|
||||
$references = $references . $response;
|
||||
}
|
||||
if (preg_match('/^\s/', $response) && $sub == 1) {
|
||||
$subject = $subject . $response;
|
||||
}
|
||||
} else {
|
||||
$body .= $response . "\n";
|
||||
}
|
||||
if (! $is_xref) {
|
||||
file_put_contents($articleHandle, $response . "\n", FILE_APPEND);
|
||||
}
|
||||
$response = str_replace("\n", "", str_replace("\r", "", $response));
|
||||
$lines++;
|
||||
}
|
||||
file_put_contents($articleHandle, $response . "\n", FILE_APPEND);
|
||||
$lines = $lines - 1;
|
||||
$bytes = $bytes + ($lines * 2);
|
||||
|
||||
// Prefer Injection-Date to Date header
|
||||
// Some newsreaders (PiaoHong) produce a Date header that php does not like
|
||||
if (isset($injectiondate)) {
|
||||
$artdate = $injectiondate;
|
||||
file_put_contents($debug_log, "\n" . format_log_date() . " " . $config_name . " Used Injection-Date " . $artdate . " for: " . $mid[1], FILE_APPEND);
|
||||
} else {
|
||||
file_put_contents($debug_log, "\n" . format_log_date() . " " . $config_name . " Used Date " . $artdate . " for: " . $mid[1], FILE_APPEND);
|
||||
}
|
||||
|
||||
// Check if date matches exactly another article and handle else sorting doesn't like it
|
||||
while (isset($dates_used[$artdate])) {
|
||||
$artdate = $artdate + 1;
|
||||
$finddate[1] = date("D, j M Y G:i:s (T)", $artdate);
|
||||
file_put_contents($debug_log, "\n" . format_log_date() . " " . $config_name . " Rewrote date to: " . $artdate . " " . $finddate[1] . " for " . $group . ":" . $local, FILE_APPEND);
|
||||
}
|
||||
$article_date = $artdate;
|
||||
$dates_used[$article_date] = true;
|
||||
|
||||
// Don't spool article if $banned or fails integrity test
|
||||
$integrity = check_article_integrity(file($articleHandle), $artdate);
|
||||
if (($banned !== false) || ($integrity !== false)) {
|
||||
unlink($articleHandle);
|
||||
if ($integrity) {
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $integrity, FILE_APPEND);
|
||||
} elseif ($banned) {
|
||||
file_put_contents($spamlog, "\n" . format_log_date() . " " . $banned . " :\tSPAM\t" . $mid[1] . "\t" . $groupnames[1] . "\t" . $from[1], FILE_APPEND);
|
||||
}
|
||||
$article++;
|
||||
} else {
|
||||
if ((strpos($CONFIG['nocem_groups'], $group) !== false) && ($CONFIG['enable_nocem'] == true)) {
|
||||
if (strpos($subject, $nocem_check) !== false) {
|
||||
$is_from = address_decode($from[1], 'nowhere');
|
||||
$nocem_file = tempnam($spooldir . "/nocem", $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]");
|
||||
copy($articleHandle, $nocem_file);
|
||||
chmod($nocem_file, 0644);
|
||||
if ($save_nocem_messages == true) {
|
||||
$saved_nocem_file = tempnam($nocem_dir, $is_from[0]['mailbox'] . "@" . $is_from[0]['host'] . "[" . date("Y.m.d.H.i.s") . "]-");
|
||||
copy($articleHandle, $saved_nocem_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((strpos($rslight_gpg['nntp_group'], $group) !== false) && ($rslight_gpg['enable'] == '1')) {
|
||||
if (strpos($subject, $bbsmail_check) !== false) {
|
||||
$bbsmail_file = preg_replace('/@@RSL /', '', $subject);
|
||||
$bbsmail_filename = $spooldir . "/bbsmail/in/bbsmail-" . $bbsmail_file;
|
||||
copy($articleHandle, $bbsmail_filename);
|
||||
}
|
||||
}
|
||||
$this_article = file_get_contents($articleHandle);
|
||||
if ($CONFIG['article_database'] == '1') {
|
||||
unlink($articleHandle);
|
||||
// CREATE SEARCH SNIPPET
|
||||
if ($boundary !== false) {
|
||||
$body_array = explode("\n", $body);
|
||||
$found = false;
|
||||
$start = false;
|
||||
foreach ($body_array as $line) {
|
||||
if ($found === false) {
|
||||
if (strpos($line, $boundary) !== false) {
|
||||
$found = true;
|
||||
continue;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (trim($line != '') && $start === false) {
|
||||
continue;
|
||||
} else {
|
||||
if ($start === false) {
|
||||
$start = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
$newbody .= $line . "\n";
|
||||
}
|
||||
file_put_contents($debug_log, "\n" . format_log_date() . " Created snippet from multipart article: " . $mid[1], FILE_APPEND);
|
||||
} else {
|
||||
$newbody = $body;
|
||||
}
|
||||
|
||||
$this_snippet = get_search_snippet($newbody, $content_type[1], $content_transfer_encoding);
|
||||
unset($newbody);
|
||||
} else {
|
||||
touch($articleHandle, $article_date);
|
||||
}
|
||||
$current_article['mid'] = $mid[1];
|
||||
$current_article['epochdate'] = $article_date;
|
||||
$current_article['stringdate'] = $finddate[1];
|
||||
$current_article['from'] = $from[1];
|
||||
$current_article['subject'] = $subject;
|
||||
$current_article['references'] = $references;
|
||||
$current_article['bytes'] = $bytes;
|
||||
$current_article['lines'] = $lines;
|
||||
$current_article['article'] = $this_article;
|
||||
$current_article['snippet'] = $this_snippet;
|
||||
|
||||
// Check Spam
|
||||
$res = 0;
|
||||
if (isset($CONFIG['spamassassin']) && ($CONFIG['spamassassin'] == true) && ($OVERRIDES['disable_spamassassin_spooling'] !== true)) {
|
||||
$spam_result_array = check_spam($subject, $from[1], $groupnames[1], $references, $this_article, $mid[1]);
|
||||
$res = $spam_result_array['res'];
|
||||
$spamresult = $spam_result_array['spamresult'];
|
||||
$spamcheckerversion = $spam_result_array['spamcheckerversion'];
|
||||
$spamlevel = $spam_result_array['spamlevel'];
|
||||
}
|
||||
if ($res === 1) {
|
||||
unlink($articleHandle);
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Skipping: " . $CONFIG['remote_server'] . " " . $group . ":" . $mid[1] . " Exceeds Spam Score", FILE_APPEND);
|
||||
// $orig_newsgroups = $newsgroups;
|
||||
// $newsgroups = $CONFIG['spamgroup'];
|
||||
// $group = $newsgroups;
|
||||
$local--;
|
||||
} else {
|
||||
$pass = false;
|
||||
foreach ($allgroups as $agroup) {
|
||||
$agroup = trim($agroup);
|
||||
if ((! testGroup($agroup)) || $agroup == '') {
|
||||
continue;
|
||||
}
|
||||
$current_article['group'] = $agroup;
|
||||
if ($group == $agroup) {
|
||||
$current_article['local'] = $local;
|
||||
} else {
|
||||
$current_article['local'] = get_next_article_number($agroup);
|
||||
}
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " Preparing to spool " . $group . ":" . $mid[1], FILE_APPEND);
|
||||
$tmp = insert_article_from_array($current_article, false);
|
||||
if ($tmp[0] != "4") {
|
||||
$pass = true;
|
||||
} else {
|
||||
file_put_contents($logfile, "\n" . format_log_date() . " " . $config_name . " " . $tmp, FILE_APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
$local++;
|
||||
}
|
||||
if ($supersedes !== false) {
|
||||
if (isset($OVERRIDES['enable_supersedes_support']) && $OVERRIDES['enable_supersedes_support'] == true) {
|
||||
file_put_contents($debug_log, "\n" . format_log_date() . " Found Supersedes: " . $mid[1] . " for: " . $supersedes, FILE_APPEND);
|
||||
if (!check_remote_for_msgid($supersedes)) {
|
||||
file_put_contents($debug_log, "\n" . format_log_date() . " Will delete: " . $supersedes, FILE_APPEND);
|
||||
delete_message($supersedes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function import_articles($group)
|
||||
{
|
||||
global $spooldir, $CONFIG, $workpath, $path, $config_name, $logfile;
|
||||
|
|
Loading…
Reference in New Issue