2020-03-25 22:50:32 +01:00
|
|
|
//! Methods for parsing input in the small subset of the Redis Serialization Protocol we
|
|
|
|
//! support.
|
|
|
|
//!
|
|
|
|
//! Every message Flodgatt receives from Redis is a Redis Array; the elements in the array
|
|
|
|
//! will be either Bulk Strings or Integers (as Redis defines those terms). See the
|
|
|
|
//! [Redis protocol documentation](https://redis.io/topics/protocol) for details. A raw
|
|
|
|
//! message might look slightly like this (simplified, with line brakes added between
|
|
|
|
//! fields):
|
|
|
|
//!
|
|
|
|
//! ```text
|
|
|
|
//! *3\r\n
|
2020-03-31 00:54:00 +02:00
|
|
|
//! $7\r\n
|
|
|
|
//! message\r\n
|
|
|
|
//! $10\r\n
|
|
|
|
//! timeline:4\r\n
|
2020-03-25 22:50:32 +01:00
|
|
|
//! $1386\r\n{\"event\":\"update\",\"payload\"...\"queued_at\":1569623342825}\r\n
|
|
|
|
//! ```
|
|
|
|
//!
|
|
|
|
//! Read that as: an array with three elements: the first element is a bulk string with
|
|
|
|
//! three characters, the second is a bulk string with ten characters, and the third is a
|
|
|
|
//! bulk string with 1,386 characters.
|
2020-03-31 00:54:00 +02:00
|
|
|
use self::RedisParseOutput::*;
|
2020-04-21 22:39:31 +02:00
|
|
|
pub use err::RedisParseErr;
|
|
|
|
use std::convert::{TryFrom, TryInto};
|
|
|
|
use std::str;
|
2020-04-01 21:35:24 +02:00
|
|
|
|
2020-04-21 22:39:31 +02:00
|
|
|
mod err;
|
2020-03-27 17:00:48 +01:00
|
|
|
|
2020-03-31 00:54:00 +02:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2020-04-21 22:39:31 +02:00
|
|
|
pub(crate) enum RedisParseOutput<'a> {
|
2020-03-31 00:54:00 +02:00
|
|
|
Msg(RedisMsg<'a>),
|
|
|
|
NonMsg(&'a str),
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2020-04-21 22:39:31 +02:00
|
|
|
pub(crate) struct RedisMsg<'a> {
|
|
|
|
pub(crate) timeline_txt: &'a str,
|
|
|
|
pub(crate) event_txt: &'a str,
|
|
|
|
pub(crate) leftover_input: &'a str,
|
2020-03-31 00:54:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TryFrom<&'a str> for RedisParseOutput<'a> {
|
|
|
|
type Error = RedisParseErr;
|
|
|
|
fn try_from(utf8: &'a str) -> Result<RedisParseOutput<'a>, Self::Error> {
|
|
|
|
let (structured_txt, leftover_utf8) = utf8_to_redis_data(utf8)?;
|
|
|
|
let structured_txt = RedisStructuredText {
|
|
|
|
structured_txt,
|
|
|
|
leftover_input: leftover_utf8,
|
|
|
|
};
|
|
|
|
Ok(structured_txt.try_into()?)
|
|
|
|
}
|
|
|
|
}
|
2020-03-25 22:50:32 +01:00
|
|
|
|
2020-03-31 00:54:00 +02:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
|
|
struct RedisStructuredText<'a> {
|
|
|
|
structured_txt: RedisData<'a>,
|
|
|
|
leftover_input: &'a str,
|
|
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
|
|
enum RedisData<'a> {
|
|
|
|
RedisArray(Vec<RedisData<'a>>),
|
|
|
|
BulkString(&'a str),
|
|
|
|
Integer(usize),
|
|
|
|
Uninitilized,
|
2020-03-25 22:50:32 +01:00
|
|
|
}
|
|
|
|
|
2020-03-31 00:54:00 +02:00
|
|
|
use RedisData::*;
|
2020-03-27 17:00:48 +01:00
|
|
|
use RedisParseErr::*;
|
2020-03-31 00:54:00 +02:00
|
|
|
type RedisParser<'a, Item> = Result<Item, RedisParseErr>;
|
|
|
|
fn utf8_to_redis_data<'a>(s: &'a str) -> Result<(RedisData, &'a str), RedisParseErr> {
|
|
|
|
if s.len() < 4 {
|
|
|
|
Err(Incomplete)?
|
|
|
|
};
|
|
|
|
let (first_char, s) = s.split_at(1);
|
|
|
|
match first_char {
|
|
|
|
":" => parse_redis_int(s),
|
|
|
|
"$" => parse_redis_bulk_string(s),
|
|
|
|
"*" => parse_redis_array(s),
|
2020-04-15 02:37:49 +02:00
|
|
|
e => Err(InvalidLineStart(e.to_string())),
|
2020-03-31 00:54:00 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-01 21:35:24 +02:00
|
|
|
fn after_newline_at(s: &str, start: usize) -> RedisParser<&str> {
|
2020-03-31 00:54:00 +02:00
|
|
|
let s = s.get(start..).ok_or(Incomplete)?;
|
Improve handling of large Redis input (#143)
* Implement faster buffered input
This commit implements a modified ring buffer for input from Redis.
Specifically, Flodgatt now limits the amount of data it fetches from
Redis in one syscall to 8 KiB (two pages on most systems). Flodgatt
will process all complete messages it receives from Redis and then
re-use the same buffer for the next time it retrieves data. If
Flodgatt received a partial message, it will copy the partial message
to the beginning of the buffer before its next read.
This change has little effect on Flodgatt under light load (because it
was rare for Redis to have more than 8 KiB of messages available at
any one time). However, my hope is that this will significantly
reduce memory use on the largest instances.
* Improve handling of backpresure
This commit alters how Flodgatt behaves if it receives enough messages
for a single client to fill that clients channel. (Because the clients
regularly send their messages, should only occur if a single client
receives a large number of messages nearly simultaneously; this is
rare, but could occur, especially on large instances).
Previously, Flodgatt would drop messages in the rare case when the
client's channel was full. Now, Flodgatt will pause the current Redis
poll and yield control back to the client streams, allowing the
clients to empty their channels; Flodgatt will then resume polling
Redis/sending the messages it previously received. With the approach,
Flodgatt will never drop messages.
However, the risk to this approach is that, by never dropping
messages, Flodgatt does not have any way to reduce the amount of work
it needs to do when under heavy load – it delays the work slightly,
but doesn't reduce it. What this means is that it would be
*theoretically* possible for Flodgatt to fall increasingly behind, if
it is continuously receiving more messages than it can process. Due
to how quickly Flodgatt can process messages, though, I suspect this
would only come up if an admin were running Flodgatt in a
*significantly* resource constrained environment, but I wanted to
mention it for the sake of completeness.
This commit also adds a new /status/backpressure endpoint that
displays the current length of the Redis input buffer (which should
typically be low or 0). Like the other /status endpoints, this
endpoint is only enabled when Flodgatt is compiled with the
`stub_status` feature.
2020-04-27 22:03:05 +02:00
|
|
|
if s.len() < 2 {
|
|
|
|
Err(Incomplete)?;
|
|
|
|
}
|
2020-03-31 00:54:00 +02:00
|
|
|
if !s.starts_with("\r\n") {
|
Improve handling of large Redis input (#143)
* Implement faster buffered input
This commit implements a modified ring buffer for input from Redis.
Specifically, Flodgatt now limits the amount of data it fetches from
Redis in one syscall to 8 KiB (two pages on most systems). Flodgatt
will process all complete messages it receives from Redis and then
re-use the same buffer for the next time it retrieves data. If
Flodgatt received a partial message, it will copy the partial message
to the beginning of the buffer before its next read.
This change has little effect on Flodgatt under light load (because it
was rare for Redis to have more than 8 KiB of messages available at
any one time). However, my hope is that this will significantly
reduce memory use on the largest instances.
* Improve handling of backpresure
This commit alters how Flodgatt behaves if it receives enough messages
for a single client to fill that clients channel. (Because the clients
regularly send their messages, should only occur if a single client
receives a large number of messages nearly simultaneously; this is
rare, but could occur, especially on large instances).
Previously, Flodgatt would drop messages in the rare case when the
client's channel was full. Now, Flodgatt will pause the current Redis
poll and yield control back to the client streams, allowing the
clients to empty their channels; Flodgatt will then resume polling
Redis/sending the messages it previously received. With the approach,
Flodgatt will never drop messages.
However, the risk to this approach is that, by never dropping
messages, Flodgatt does not have any way to reduce the amount of work
it needs to do when under heavy load – it delays the work slightly,
but doesn't reduce it. What this means is that it would be
*theoretically* possible for Flodgatt to fall increasingly behind, if
it is continuously receiving more messages than it can process. Due
to how quickly Flodgatt can process messages, though, I suspect this
would only come up if an admin were running Flodgatt in a
*significantly* resource constrained environment, but I wanted to
mention it for the sake of completeness.
This commit also adds a new /status/backpressure endpoint that
displays the current length of the Redis input buffer (which should
typically be low or 0). Like the other /status endpoints, this
endpoint is only enabled when Flodgatt is compiled with the
`stub_status` feature.
2020-04-27 22:03:05 +02:00
|
|
|
Err(InvalidLineEnd)?;
|
2019-10-09 20:46:56 +02:00
|
|
|
}
|
2020-03-31 00:54:00 +02:00
|
|
|
Ok(s.get("\r\n".len()..).ok_or(Incomplete)?)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_number_at<'a>(s: &'a str) -> RedisParser<(usize, &'a str)> {
|
2020-04-01 21:35:24 +02:00
|
|
|
let len = s.chars().position(|c| !c.is_numeric()).ok_or(Incomplete)?;
|
2020-03-31 00:54:00 +02:00
|
|
|
Ok((s[..len].parse()?, after_newline_at(s, len)?))
|
2020-03-25 22:50:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Parse a Redis bulk string and return the content of that string and the unparsed remainder.
|
|
|
|
///
|
|
|
|
/// All bulk strings have the format `$[LENGTH_OF_ITEM_BODY]\r\n[ITEM_BODY]\r\n`
|
2020-03-31 00:54:00 +02:00
|
|
|
fn parse_redis_bulk_string<'a>(s: &'a str) -> RedisParser<(RedisData, &'a str)> {
|
|
|
|
let (len, rest) = parse_number_at(s)?;
|
|
|
|
let content = rest.get(..len).ok_or(Incomplete)?;
|
|
|
|
Ok((BulkString(content), after_newline_at(&rest, len)?))
|
2020-03-25 22:50:32 +01:00
|
|
|
}
|
|
|
|
|
2020-03-31 00:54:00 +02:00
|
|
|
fn parse_redis_int<'a>(s: &'a str) -> RedisParser<(RedisData, &'a str)> {
|
|
|
|
let (number, rest) = parse_number_at(s)?;
|
|
|
|
Ok((Integer(number), rest))
|
2020-03-25 22:50:32 +01:00
|
|
|
}
|
|
|
|
|
2020-03-31 00:54:00 +02:00
|
|
|
fn parse_redis_array<'a>(s: &'a str) -> RedisParser<(RedisData, &'a str)> {
|
|
|
|
let (number_of_elements, mut rest) = parse_number_at(s)?;
|
|
|
|
|
|
|
|
let mut inner = Vec::with_capacity(number_of_elements);
|
|
|
|
inner.resize(number_of_elements, RedisData::Uninitilized);
|
|
|
|
|
|
|
|
for i in (0..number_of_elements).rev() {
|
|
|
|
let (next_el, new_rest) = utf8_to_redis_data(rest)?;
|
|
|
|
rest = new_rest;
|
|
|
|
inner[i] = next_el;
|
|
|
|
}
|
|
|
|
Ok((RedisData::RedisArray(inner), rest))
|
2020-03-25 22:50:32 +01:00
|
|
|
}
|
2019-10-09 20:46:56 +02:00
|
|
|
|
2020-03-31 00:54:00 +02:00
|
|
|
impl<'a> TryFrom<RedisData<'a>> for &'a str {
|
|
|
|
type Error = RedisParseErr;
|
|
|
|
|
|
|
|
fn try_from(val: RedisData<'a>) -> Result<Self, Self::Error> {
|
|
|
|
match val {
|
|
|
|
RedisData::BulkString(inner) => Ok(inner),
|
|
|
|
_ => Err(IncorrectRedisType),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TryFrom<RedisStructuredText<'a>> for RedisParseOutput<'a> {
|
|
|
|
type Error = RedisParseErr;
|
|
|
|
|
|
|
|
fn try_from(input: RedisStructuredText<'a>) -> Result<RedisParseOutput<'a>, Self::Error> {
|
|
|
|
if let RedisData::RedisArray(mut redis_strings) = input.structured_txt {
|
|
|
|
let command = redis_strings.pop().ok_or(MissingField)?.try_into()?;
|
|
|
|
match command {
|
|
|
|
// subscription statuses look like:
|
|
|
|
// $14\r\ntimeline:local\r\n
|
|
|
|
// :47\r\n
|
|
|
|
"subscribe" | "unsubscribe" => Ok(NonMsg(input.leftover_input)),
|
|
|
|
// Messages look like;
|
|
|
|
// $10\r\ntimeline:4\r\n
|
|
|
|
// $1386\r\n{\"event\":\"update\",\"payload\"...\"queued_at\":1569623342825}\r\n
|
|
|
|
"message" => Ok(Msg(RedisMsg {
|
|
|
|
timeline_txt: redis_strings.pop().ok_or(MissingField)?.try_into()?,
|
|
|
|
event_txt: redis_strings.pop().ok_or(MissingField)?.try_into()?,
|
|
|
|
leftover_input: input.leftover_input,
|
|
|
|
})),
|
|
|
|
_cmd => Err(Incomplete),
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Err(IncorrectRedisType)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2020-03-31 15:05:51 +02:00
|
|
|
mod test;
|