Parse redis messages (#51)

* Initial version of Redis parsing fix

* Refactor parsing for more maintainability

* Add benchmarks for new parsing strategy

* Add tests for parsing
This commit is contained in:
Daniel Sockwell 2019-09-27 23:29:11 -04:00 committed by GitHub
parent ceb38c2689
commit 2ac08c5316
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 194 additions and 77 deletions

View File

@ -2,6 +2,7 @@ use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Criterion;
use flodgatt::redis_to_client_stream::redis_stream::RedisMsg;
use regex::Regex;
use serde_json::Value;
@ -62,6 +63,38 @@ fn print_next_str(mut end: usize, input: &str) -> (usize, String) {
let string = &input[start..end];
(end, string.to_string())
}
fn parse_with_stuct(input: String) -> Vec<(String, Value)> {
let mut output = Vec::new();
let mut incoming_raw_msg = input;
while incoming_raw_msg.len() > 0 {
let mut msg = RedisMsg::from_raw(incoming_raw_msg.clone());
let command = msg.get_next_item();
match command.as_str() {
"message" => {
let timeline = msg.get_next_item()["timeline:".len()..].to_string();
let message: Value = serde_json::from_str(&msg.get_next_item()).unwrap();
output.push((timeline, message));
}
"subscribe" | "unsubscribe" => {
// This returns a confirmation. We don't need to do anything with it,
// but we do need to advance the cursor past it
msg.get_next_item(); // name of channel (un)subscribed
msg.cursor += ":".len();
msg.process_number(); // The number of active subscriptions
msg.cursor += "\r\n".len();
}
cmd => panic!(
"Invariant violation: bad Redis input. Got {} as a command",
cmd
),
}
incoming_raw_msg = msg.raw[msg.cursor..].to_string();
}
output
}
fn criterion_benchmark(c: &mut Criterion) {
let input = "*3\r\n$7\r\nmessage\r\n$10\r\ntimeline:1\r\n$3790\r\n{\"event\":\"update\",\"payload\":{\"id\":\"102775370117886890\",\"created_at\":\"2019-09-11T18:42:19.000Z\",\"in_reply_to_id\":null,\"in_reply_to_account_id\":null,\"sensitive\":false,\"spoiler_text\":\"\",\"visibility\":\"unlisted\",\"language\":\"en\",\"uri\":\"https://mastodon.host/users/federationbot/statuses/102775346916917099\",\"url\":\"https://mastodon.host/@federationbot/102775346916917099\",\"replies_count\":0,\"reblogs_count\":0,\"favourites_count\":0,\"favourited\":false,\"reblogged\":false,\"muted\":false,\"content\":\"<p>Trending tags:<br><a href=\\\"https://mastodon.host/tags/neverforget\\\" class=\\\"mention hashtag\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\">#<span>neverforget</span></a><br><a href=\\\"https://mastodon.host/tags/4styles\\\" class=\\\"mention hashtag\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\">#<span>4styles</span></a><br><a href=\\\"https://mastodon.host/tags/newpipe\\\" class=\\\"mention hashtag\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\">#<span>newpipe</span></a><br><a href=\\\"https://mastodon.host/tags/uber\\\" class=\\\"mention hashtag\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\">#<span>uber</span></a><br><a href=\\\"https://mastodon.host/tags/mercredifiction\\\" class=\\\"mention hashtag\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\">#<span>mercredifiction</span></a></p>\",\"reblog\":null,\"account\":{\"id\":\"78\",\"username\":\"federationbot\",\"acct\":\"federationbot@mastodon.host\",\"display_name\":\"Federation Bot\",\"locked\":false,\"bot\":false,\"created_at\":\"2019-09-10T15:04:25.559Z\",\"note\":\"<p>Hello, I am mastodon.host official semi bot.</p><p>Follow me if you want to have some updates on the view of the fediverse from here ( I only post unlisted ). </p><p>I also randomly boost one of my followers toot every hour !</p><p>If you don\'t feel confortable with me following you, tell me: unfollow and I\'ll do it :)</p><p>If you want me to follow you, just tell me follow ! </p><p>If you want automatic follow for new users on your instance and you are an instance admin, contact me !</p><p>Other commands are private :)</p>\",\"url\":\"https://mastodon.host/@federationbot\",\"avatar\":\"https://instance.codesections.com/system/accounts/avatars/000/000/078/original/d9e2be5398629cf8.jpeg?1568127863\",\"avatar_static\":\"https://instance.codesections.com/system/accounts/avatars/000/000/078/original/d9e2be5398629cf8.jpeg?1568127863\",\"header\":\"https://instance.codesections.com/headers/original/missing.png\",\"header_static\":\"https://instance.codesections.com/headers/original/missing.png\",\"followers_count\":16636,\"following_count\":179532,\"statuses_count\":50554,\"emojis\":[],\"fields\":[{\"name\":\"More stats\",\"value\":\"<a href=\\\"https://mastodon.host/stats.html\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\"><span class=\\\"invisible\\\">https://</span><span class=\\\"\\\">mastodon.host/stats.html</span><span class=\\\"invisible\\\"></span></a>\",\"verified_at\":null},{\"name\":\"More infos\",\"value\":\"<a href=\\\"https://mastodon.host/about/more\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\"><span class=\\\"invisible\\\">https://</span><span class=\\\"\\\">mastodon.host/about/more</span><span class=\\\"invisible\\\"></span></a>\",\"verified_at\":null},{\"name\":\"Owner/Friend\",\"value\":\"<span class=\\\"h-card\\\"><a href=\\\"https://mastodon.host/@gled\\\" class=\\\"u-url mention\\\" rel=\\\"nofollow noopener\\\" target=\\\"_blank\\\">@<span>gled</span></a></span>\",\"verified_at\":null}]},\"media_attachments\":[],\"mentions\":[],\"tags\":[{\"name\":\"4styles\",\"url\":\"https://instance.codesections.com/tags/4styles\"},{\"name\":\"neverforget\",\"url\":\"https://instance.codesections.com/tags/neverforget\"},{\"name\":\"mercredifiction\",\"url\":\"https://instance.codesections.com/tags/mercredifiction\"},{\"name\":\"uber\",\"url\":\"https://instance.codesections.com/tags/uber\"},{\"name\":\"newpipe\",\"url\":\"https://instance.codesections.com/tags/newpipe\"}],\"emojis\":[],\"card\":null,\"poll\":null},\"queued_at\":1568227693541}\r\n".to_string();
@ -72,6 +105,9 @@ fn criterion_benchmark(c: &mut Criterion) {
group.bench_function("hand parse", |b| {
b.iter(|| hand_parse(black_box(input.clone())))
});
group.bench_function("stuct parse", |b| {
b.iter(|| parse_with_stuct(black_box(input.clone())))
});
}
criterion_group!(benches, criterion_benchmark);

View File

@ -2,6 +2,7 @@
pub mod client_agent;
pub mod receiver;
pub mod redis_cmd;
pub mod redis_stream;
use crate::config;
pub use client_agent::ClientAgent;

View File

@ -1,27 +1,26 @@
//! Receives data from Redis, sorts it by `ClientAgent`, and stores it until
//! polled by the correct `ClientAgent`. Also manages sububscriptions and
//! unsubscriptions to/from Redis.
use super::redis_cmd;
use super::{redis_cmd, redis_stream};
use crate::{config, pubsub_cmd};
use futures::{Async, Poll};
use log::info;
use regex::Regex;
use serde_json::Value;
use std::{collections, io::Read, io::Write, net, time};
use tokio::io::{AsyncRead, Error};
use std::{collections, io::Write, net, time};
use tokio::io::Error;
use uuid::Uuid;
/// The item that streams from Redis and is polled by the `ClientAgent`
#[derive(Debug)]
pub struct Receiver {
pubsub_connection: net::TcpStream,
pub pubsub_connection: net::TcpStream,
secondary_redis_connection: net::TcpStream,
redis_polled_at: time::Instant,
timeline: String,
manager_id: Uuid,
msg_queues: collections::HashMap<Uuid, MsgQueue>,
pub msg_queues: collections::HashMap<Uuid, MsgQueue>,
clients_per_timeline: collections::HashMap<String, i32>,
incoming_raw_msg: String,
pub incoming_raw_msg: String,
}
impl Receiver {
@ -157,7 +156,7 @@ impl futures::stream::Stream for Receiver {
if self.redis_polled_at.elapsed()
> time::Duration::from_millis(*config::REDIS_POLL_INTERVAL)
{
AsyncReadableStream::poll_redis(self);
redis_stream::AsyncReadableStream::poll_redis(self);
self.redis_polled_at = time::Instant::now();
}
@ -188,10 +187,10 @@ impl Drop for Receiver {
}
#[derive(Debug, Clone)]
struct MsgQueue {
messages: collections::VecDeque<Value>,
pub struct MsgQueue {
pub messages: collections::VecDeque<Value>,
last_polled_at: time::Instant,
redis_channel: String,
pub redis_channel: String,
}
impl MsgQueue {
@ -204,69 +203,3 @@ impl MsgQueue {
}
}
}
struct AsyncReadableStream<'a>(&'a mut net::TcpStream);
impl<'a> AsyncReadableStream<'a> {
fn new(stream: &'a mut net::TcpStream) -> Self {
AsyncReadableStream(stream)
}
/// Polls Redis for any new messages and adds them to the `MsgQueue` for
/// the appropriate `ClientAgent`.
fn poll_redis(receiver: &mut Receiver) {
let mut buffer = vec![0u8; 3000];
let mut async_stream = AsyncReadableStream::new(&mut receiver.pubsub_connection);
if let Async::Ready(num_bytes_read) = async_stream.poll_read(&mut buffer).unwrap() {
let raw_redis_response = &String::from_utf8_lossy(&buffer[..num_bytes_read]);
dbg!(&raw_redis_response);
receiver.incoming_raw_msg.push_str(raw_redis_response);
// Text comes in from redis as a raw stream, which could be more than one message
// and is not guaranteed to end on a message boundary. We need to break it down
// into messages. First, start by only acting if we end on a valid message boundary
if receiver.incoming_raw_msg.ends_with("}\r\n") {
// Every valid message is tagged with the string `message`. This means 3 things:
// 1) We can discard everything before the first `message` (with `skip(1)`)
// 2) We can split into separate messages by splitting on `message`
// 3) We can use a regex that discards everything after the *first* valid
// message (since the next message will have a new `message` tag)
let messages = receiver.incoming_raw_msg.as_str().split("message").skip(1);
let regex =
Regex::new(r"timeline:(?P<timeline>.*?)\r\n\$\d+\r\n(?P<value>.*?)\r\n")
.expect("Hard-codded");
for message in messages {
let timeline = regex.captures(message).expect("Hard-coded timeline regex")
["timeline"]
.to_string();
let redis_msg: Value = serde_json::from_str(
&regex.captures(message).expect("Hard-coded value regex")["value"],
)
.expect("Valid json");
for msg_queue in receiver.msg_queues.values_mut() {
if msg_queue.redis_channel == timeline {
msg_queue.messages.push_back(redis_msg.clone());
}
}
}
// We've processed this raw msg and can safely discard it
receiver.incoming_raw_msg.clear();
}
}
}
}
impl<'a> Read for AsyncReadableStream<'a> {
fn read(&mut self, buffer: &mut [u8]) -> Result<usize, std::io::Error> {
self.0.read(buffer)
}
}
impl<'a> AsyncRead for AsyncReadableStream<'a> {
fn poll_read(&mut self, buf: &mut [u8]) -> Poll<usize, std::io::Error> {
match self.read(buf) {
Ok(t) => Ok(Async::Ready(t)),
Err(_) => Ok(Async::NotReady),
}
}
}

View File

@ -0,0 +1,147 @@
use super::receiver::Receiver;
use futures::{Async, Poll};
use serde_json::Value;
use std::io::Read;
use std::net;
use tokio::io::AsyncRead;
pub struct AsyncReadableStream<'a>(&'a mut net::TcpStream);
impl<'a> AsyncReadableStream<'a> {
pub fn new(stream: &'a mut net::TcpStream) -> Self {
AsyncReadableStream(stream)
}
/// Polls Redis for any new messages and adds them to the `MsgQueue` for
/// the appropriate `ClientAgent`.
pub fn poll_redis(receiver: &mut Receiver) {
let mut buffer = vec![0u8; 3000];
let mut async_stream = AsyncReadableStream::new(&mut receiver.pubsub_connection);
if let Async::Ready(num_bytes_read) = async_stream.poll_read(&mut buffer).unwrap() {
let raw_redis_response = &String::from_utf8_lossy(&buffer[..num_bytes_read]);
receiver.incoming_raw_msg.push_str(raw_redis_response);
// Text comes in from redis as a raw stream, which could be more than one message
// and is not guaranteed to end on a message boundary. We need to break it down
// into messages. Incoming messages *are* guaranteed to be RESP arrays,
// https://redis.io/topics/protocol
// Only act if we have a full message (end on a msg boundary)
if !receiver.incoming_raw_msg.ends_with("}\r\n") {
return;
};
while receiver.incoming_raw_msg.len() > 0 {
let mut msg = RedisMsg::from_raw(receiver.incoming_raw_msg.clone());
let command = msg.get_next_item();
match command.as_str() {
"message" => {
let timeline = msg.get_next_item()["timeline:".len()..].to_string();
let message: Value = serde_json::from_str(&msg.get_next_item()).unwrap();
for msg_queue in receiver.msg_queues.values_mut() {
if msg_queue.redis_channel == timeline {
msg_queue.messages.push_back(message.clone());
}
}
}
"subscribe" | "unsubscribe" => {
// This returns a confirmation. We don't need to do anything with it,
// but we do need to advance the cursor past it
msg.get_next_item(); // name of channel (un)subscribed
msg.cursor += ":".len();
msg.process_number(); // The number of active subscriptions
msg.cursor += "\r\n".len();
}
cmd => panic!(
"Invariant violation: bad Redis input. Got {} as a command",
cmd
),
}
receiver.incoming_raw_msg = msg.raw[msg.cursor..].to_string();
}
}
}
}
impl<'a> Read for AsyncReadableStream<'a> {
fn read(&mut self, buffer: &mut [u8]) -> Result<usize, std::io::Error> {
self.0.read(buffer)
}
}
impl<'a> AsyncRead for AsyncReadableStream<'a> {
fn poll_read(&mut self, buf: &mut [u8]) -> Poll<usize, std::io::Error> {
match self.read(buf) {
Ok(t) => Ok(Async::Ready(t)),
Err(_) => Ok(Async::NotReady),
}
}
}
#[derive(Default)]
pub struct RedisMsg {
pub raw: String,
pub cursor: usize,
}
impl RedisMsg {
pub fn from_raw(raw: String) -> Self {
Self {
raw,
cursor: "*3\r\n".len(), //length of intro header
..Self::default()
}
}
/// Move the cursor from the beginning of a number through its end and return the number
pub fn process_number(&mut self) -> usize {
let mut selection_end = self.cursor + 1;
let mut chars = self.raw.chars();
chars.nth(self.cursor);
while chars.next().expect("still in str").is_digit(10) {
selection_end += 1;
}
let selected_number = self.raw[self.cursor..selection_end]
.parse::<usize>()
.expect("checked with `.is_digit(10)`");
self.cursor = selection_end;
selected_number
}
/// In a pubsub reply from Redis, an item can be either the name of the subscribed channel
/// or the msg payload. Either way, it follows the same format:
/// `$[LENGTH_OF_ITEM_BODY]\r\n[ITEM_BODY]\r\n`
pub fn get_next_item(&mut self) -> String {
self.cursor += "$".len();
let item_len = self.process_number();
self.cursor += "\r\n".len();
let item_start_position = self.cursor;
self.cursor += item_len;
let item = self.raw[item_start_position..self.cursor].to_string();
self.cursor += "\r\n".len();
item
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn simple_redis_parse() {
let input = "*3\r\n$9\r\nSUBSCRIBE\r\n$10\r\ntimeline:1\r\n:1\r\n";
let mut msg = RedisMsg::from_raw(input.to_string());
let cmd = msg.get_next_item();
assert_eq!(&cmd, "SUBSCRIBE");
let timeline = msg.get_next_item();
assert_eq!(&timeline, "timeline:1");
msg.cursor += ":1\r\n".len();
assert_eq!(msg.cursor, input.len());
}
#[test]
fn realistic_redis_parse() {
let input = "*3\r\n$7\r\nmessage\r\n$10\r\ntimeline:4\r\n$1386\r\n{\"event\":\"update\",\"payload\":{\"id\":\"102866835379605039\",\"created_at\":\"2019-09-27T22:29:02.590Z\",\"in_reply_to_id\":null,\"in_reply_to_account_id\":null,\"sensitive\":false,\"spoiler_text\":\"\",\"visibility\":\"public\",\"language\":\"en\",\"uri\":\"http://localhost:3000/users/admin/statuses/102866835379605039\",\"url\":\"http://localhost:3000/@admin/102866835379605039\",\"replies_count\":0,\"reblogs_count\":0,\"favourites_count\":0,\"favourited\":false,\"reblogged\":false,\"muted\":false,\"content\":\"<p><span class=\\\"h-card\\\"><a href=\\\"http://localhost:3000/@susan\\\" class=\\\"u-url mention\\\">@<span>susan</span></a></span> hi</p>\",\"reblog\":null,\"application\":{\"name\":\"Web\",\"website\":null},\"account\":{\"id\":\"1\",\"username\":\"admin\",\"acct\":\"admin\",\"display_name\":\"\",\"locked\":false,\"bot\":false,\"created_at\":\"2019-07-04T00:21:05.890Z\",\"note\":\"<p></p>\",\"url\":\"http://localhost:3000/@admin\",\"avatar\":\"http://localhost:3000/avatars/original/missing.png\",\"avatar_static\":\"http://localhost:3000/avatars/original/missing.png\",\"header\":\"http://localhost:3000/headers/original/missing.png\",\"header_static\":\"http://localhost:3000/headers/original/missing.png\",\"followers_count\":3,\"following_count\":3,\"statuses_count\":192,\"emojis\":[],\"fields\":[]},\"media_attachments\":[],\"mentions\":[{\"id\":\"4\",\"username\":\"susan\",\"url\":\"http://localhost:3000/@susan\",\"acct\":\"susan\"}],\"tags\":[],\"emojis\":[],\"card\":null,\"poll\":null},\"queued_at\":1569623342825}\r\n";
let mut msg = RedisMsg::from_raw(input.to_string());
let cmd = msg.get_next_item();
assert_eq!(&cmd, "message");
let timeline = msg.get_next_item();
assert_eq!(&timeline, "timeline:4");
let message_str = msg.get_next_item();
assert_eq!(message_str, input[41..input.len() - 2]);
assert_eq!(msg.cursor, input.len());
}
}