From c6400cd7916f13e7baf1a804d486f13c38c2508e Mon Sep 17 00:00:00 2001 From: Retro_Guy Date: Mon, 8 Mar 2021 01:43:06 -0700 Subject: [PATCH] Create search data when local insert of post also --- Rocksolid_Light/rocksolid/newsportal.php | 39 +++++++++++++++++++ .../rslight/scripts/rslight-lib.php | 10 +++-- Rocksolid_Light/rslight/scripts/spoolnews.php | 35 ++++++++++------- 3 files changed, 67 insertions(+), 17 deletions(-) diff --git a/Rocksolid_Light/rocksolid/newsportal.php b/Rocksolid_Light/rocksolid/newsportal.php index 92e6deb..a7abe9b 100644 --- a/Rocksolid_Light/rocksolid/newsportal.php +++ b/Rocksolid_Light/rocksolid/newsportal.php @@ -1275,6 +1275,31 @@ function get_date_interval($value) { return $variance; } +function get_search_snippet($body, $content_type) { + $body = quoted_printable_decode($body); + $mysnippet = recode_charset($body, $content_type, "utf8"); + if($bodyend=strrpos($mysnippet, "\n---\n")) { + $mysnippet = substr($mysnippet, 0, $bodyend); + } else { + if($bodyend=strrpos($mysnippet, "\n-- ")) { + $mysnippet = substr($mysnippet, 0, $bodyend); + } else { + if($bodyend=strrpos($mysnippet, "\n.")) { + $mysnippet = substr($mysnippet, 0, $bodyend); + } + } + } + $mysnippet = preg_replace('/\n.{0,5}>(.*)/', '', $mysnippet); + + $snipstart = strpos($mysnippet, ":\n"); + if(substr_count(trim(substr($mysnippet, 0, $snipstart)), "\n") < 2) { + $mysnippet = substr($mysnippet, $snipstart + 1); + } else { + $mysnippet = substr($mysnippet, 0); + } + return $mysnippet; +} + function rslight_db_open($database, $table='overview') { try { $dbh = new PDO('sqlite:'.$database); @@ -1317,9 +1342,23 @@ function article_db_open($database) { date TEXT, name TEXT, subject TEXT, + search_snippet TEXT, article TEXT)"); $stmt = $dbh->query('CREATE INDEX IF NOT EXISTS db_number on articles(number)'); $stmt->execute(); + $dbh->exec("CREATE VIRTUAL TABLE IF NOT EXISTS search_fts USING fts5( + newsgroup, + number, + date, + msgid, + subject, + search_snippet)"); + $dbh->exec("CREATE TRIGGER IF NOT EXISTS after_articles_insert AFTER INSERT ON articles BEGIN + INSERT INTO search_fts(newsgroup, number, date, msgid, subject, search_snippet) VALUES(new.newsgroup, new.number, new.date, new.msgid, new.subject, new.search_snippet); + END;"); + $dbh->exec("CREATE TRIGGER IF NOT EXISTS after_articles_delete AFTER DELETE ON articles BEGIN + DELETE FROM search_fts WHERE msgid = old.msgid; + END;"); return($dbh); } diff --git a/Rocksolid_Light/rslight/scripts/rslight-lib.php b/Rocksolid_Light/rslight/scripts/rslight-lib.php index fdb1ccb..5021e81 100755 --- a/Rocksolid_Light/rslight/scripts/rslight-lib.php +++ b/Rocksolid_Light/rslight/scripts/rslight-lib.php @@ -245,6 +245,7 @@ function process_post($filename) { $no_date=1; $no_org=1; $is_header=1; + $body=""; $ref=0; $response=""; $bytes=0; @@ -387,7 +388,7 @@ function process_post($filename) { fclose($group_overviewfp); } if($duplicate == 0) { - insert_article($section,$onegroup,$postfilename,$subject[1],$from[1],$article_date,$date_rep,$msgid,$references,$bytes,$lines,$xref); + insert_article($section,$onegroup,$postfilename,$subject[1],$from[1],$article_date,$date_rep,$msgid,$references,$bytes,$lines,$xref,$body); $response="240 Article received OK\r\n"; } else { $response="441 Posting failed\r\n"; @@ -1030,7 +1031,7 @@ function encode_subject($line) { } function insert_article($section,$nntp_group,$filename,$subject_i,$from_i,$article_date, -$date_i,$mid_i,$references_i,$bytes_i,$lines_i,$xref_i) { +$date_i,$mid_i,$references_i,$bytes_i,$lines_i,$xref_i,$body) { global $enable_rslight,$spooldir,$CONFIG,$logdir,$logfile; $sn_lockfile = sys_get_temp_dir() . '/'.$section.'-spoolnews.lock'; @@ -1114,11 +1115,12 @@ $date_i,$mid_i,$references_i,$bytes_i,$lines_i,$xref_i) { $dbh = null; } if($CONFIG['article_database'] == '1') { + $this_snippet = get_search_snippet($body, 'UTF8'); $article_dbh = article_db_open($spooldir.'/'.$nntp_group.'-articles.db3'); - $article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article) VALUES(?,?,?,?,?,?,?)'; + $article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)'; $article_stmt = $article_dbh->prepare($article_sql); $this_article = file_get_contents($grouppath."/".$local); - $article_stmt->execute([$nntp_group, $local, $mid_i, $article_date, $from_i, $subject_i, trim($this_article)]); + $article_stmt->execute([$nntp_group, $local, $mid_i, $article_date, $from_i, $subject_i, trim($this_article), $this_snippet]); unlink($grouppath."/".$local); $article_dbh = null; } diff --git a/Rocksolid_Light/rslight/scripts/spoolnews.php b/Rocksolid_Light/rslight/scripts/spoolnews.php index 82b73f1..916bce6 100755 --- a/Rocksolid_Light/rslight/scripts/spoolnews.php +++ b/Rocksolid_Light/rslight/scripts/spoolnews.php @@ -36,7 +36,7 @@ if(!isset($maxarticles_per_run)) { $maxarticles_per_run = 100; } if(!isset($maxfirstrequest)) { - $maxfirstrequest = 1000; + $maxfirstrequest = 100; } if(!isset($CONFIG['enable_nntp']) || $CONFIG['enable_nntp'] != true) { @@ -118,17 +118,6 @@ echo "\nSpoolnews Done\r\n"; function get_articles($ns, $group) { global $enable_rslight, $spooldir, $CONFIG, $maxarticles_per_run, $maxfirstrequest, $workpath, $path, $remote_groupfile, $local_groupfile, $local, $logdir, $config_name, $logfile; - # Prepare databases - $database = $spooldir.'/articles-overview.db3'; - $table = 'overview'; - $dbh = rslight_db_open($database, $table); - $sql = 'INSERT INTO '.$table.'(newsgroup, number, msgid, date, name, subject) VALUES(?,?,?,?,?,?)'; - $stmt = $dbh->prepare($sql); - if($CONFIG['article_database'] == '1') { - $article_dbh = article_db_open($spooldir.'/'.$group.'-articles.db3'); - $article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article) VALUES(?,?,?,?,?,?,?)'; - $article_stmt = $article_dbh->prepare($article_sql); - } if($ns == false) { file_put_contents($logfile, "\n".format_log_date()." ".$config_name." Lost connection to ".$CONFIG['remote_server'].":".$CONFIG['remote_port'], FILE_APPEND); exit(); @@ -188,6 +177,17 @@ function get_articles($ns, $group) { if($article > ($detail[3] + 1)) { $article = $detail[3]; } + # Prepare databases + $database = $spooldir.'/articles-overview.db3'; + $table = 'overview'; + $dbh = rslight_db_open($database, $table); + $sql = 'INSERT INTO '.$table.'(newsgroup, number, msgid, date, name, subject) VALUES(?,?,?,?,?,?)'; + $stmt = $dbh->prepare($sql); + if($CONFIG['article_database'] == '1') { + $article_dbh = article_db_open($spooldir.'/'.$group.'-articles.db3'); + $article_sql = 'INSERT INTO articles(newsgroup, number, msgid, date, name, subject, article, search_snippet) VALUES(?,?,?,?,?,?,?,?)'; + $article_stmt = $article_dbh->prepare($article_sql); + } # Pull articles and save them in our spool @mkdir($grouppath,0755,'recursive'); $i=0; @@ -239,6 +239,7 @@ function get_articles($ns, $group) { $ref=0; $banned=0; $is_header=1; + $body=""; while(strcmp($response,".") != 0) { $bytes = $bytes + mb_strlen($response, '8bit'); @@ -284,6 +285,10 @@ function get_articles($ns, $group) { $xref=$response; $ref=0; } + if(stripos($response, "Content-Type: ") === 0) { + preg_match('/.*charset=.*/', $response, $te); + $content_type = explode("Content-Type: text/plain; charset=", $te[0]); + } if(stripos($response, "References: ") === 0) { $this_references=explode('References: ', $response); $references = $this_references[1]; @@ -294,6 +299,8 @@ function get_articles($ns, $group) { $references=$references.$response; } } + } else { + $body.=$response."\n"; } fputs($articleHandle, $response."\n"); // Check here for broken $ns connection before continuing @@ -332,7 +339,9 @@ function get_articles($ns, $group) { $stmt->execute([$group, $local, $mid[1], $article_date, $from[1], $subject[1]]); if($CONFIG['article_database'] == '1') { $this_article = file_get_contents($grouppath."/".$local); - $article_stmt->execute([$group, $local, $mid[1], $article_date, $from[1], $subject[1], $this_article]); +// CREATE SEARCH SNIPPET + $this_snippet = get_search_snippet($body, $content_type[1]); + $article_stmt->execute([$group, $local, $mid[1], $article_date, $from[1], $subject[1], $this_article, $this_snippet]); unlink($grouppath."/".$local); } else { if($article_date > time())