2019-07-08 13:31:42 +02:00
|
|
|
//! Receives data from Redis, sorts it by `ClientAgent`, and stores it until
|
|
|
|
//! polled by the correct `ClientAgent`. Also manages sububscriptions and
|
|
|
|
//! unsubscriptions to/from Redis.
|
2019-10-09 20:46:56 +02:00
|
|
|
mod message_queues;
|
2020-03-27 17:00:48 +01:00
|
|
|
|
|
|
|
pub use message_queues::{MessageQueues, MsgQueue};
|
|
|
|
|
2019-10-09 20:46:56 +02:00
|
|
|
use crate::{
|
2020-03-27 17:00:48 +01:00
|
|
|
config,
|
|
|
|
err::RedisParseErr,
|
2020-03-25 22:50:32 +01:00
|
|
|
messages::Event,
|
2020-03-27 17:00:48 +01:00
|
|
|
parse_client_request::{Stream, Timeline},
|
2019-10-09 20:46:56 +02:00
|
|
|
pubsub_cmd,
|
2020-03-27 17:00:48 +01:00
|
|
|
redis_to_client_stream::redis::redis_msg::RedisMsg,
|
|
|
|
redis_to_client_stream::redis::{redis_cmd, RedisConn},
|
2019-10-09 02:35:26 +02:00
|
|
|
};
|
2019-04-30 15:44:51 +02:00
|
|
|
use futures::{Async, Poll};
|
2020-03-19 01:37:10 +01:00
|
|
|
use lru::LruCache;
|
2020-03-27 17:00:48 +01:00
|
|
|
use tokio::io::AsyncRead;
|
|
|
|
|
|
|
|
use std::{
|
|
|
|
collections::HashMap,
|
|
|
|
io::Read,
|
|
|
|
net, str,
|
|
|
|
time::{Duration, Instant},
|
|
|
|
};
|
2019-09-28 05:29:11 +02:00
|
|
|
use tokio::io::Error;
|
2019-05-10 07:47:29 +02:00
|
|
|
use uuid::Uuid;
|
2019-05-09 05:02:01 +02:00
|
|
|
|
2019-07-08 21:21:02 +02:00
|
|
|
/// The item that streams from Redis and is polled by the `ClientAgent`
|
2019-04-30 15:44:51 +02:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Receiver {
|
2020-03-27 17:00:48 +01:00
|
|
|
pub pubsub_connection: net::TcpStream,
|
2019-07-06 02:08:50 +02:00
|
|
|
secondary_redis_connection: net::TcpStream,
|
2020-03-27 17:00:48 +01:00
|
|
|
redis_poll_interval: Duration,
|
2020-03-25 22:50:32 +01:00
|
|
|
redis_polled_at: Instant,
|
2020-03-19 01:37:10 +01:00
|
|
|
timeline: Timeline,
|
2019-05-09 05:02:01 +02:00
|
|
|
manager_id: Uuid,
|
2019-10-09 20:46:56 +02:00
|
|
|
pub msg_queues: MessageQueues,
|
2020-03-19 01:37:10 +01:00
|
|
|
clients_per_timeline: HashMap<Timeline, i32>,
|
|
|
|
cache: Cache,
|
2020-03-27 17:00:48 +01:00
|
|
|
redis_input: Vec<u8>,
|
|
|
|
redis_namespace: Option<String>,
|
2020-03-19 01:37:10 +01:00
|
|
|
}
|
2020-03-27 17:00:48 +01:00
|
|
|
|
2020-03-19 01:37:10 +01:00
|
|
|
#[derive(Debug)]
|
2020-03-25 22:50:32 +01:00
|
|
|
pub struct Cache {
|
2020-03-27 17:00:48 +01:00
|
|
|
// TODO: eventually, it might make sense to have Mastodon publish to timelines with
|
|
|
|
// the tag number instead of the tag name. This would save us from dealing
|
|
|
|
// with a cache here and would be consistent with how lists/users are handled.
|
2020-03-19 01:37:10 +01:00
|
|
|
id_to_hashtag: LruCache<i64, String>,
|
2020-03-25 22:50:32 +01:00
|
|
|
pub hashtag_to_id: LruCache<String, i64>,
|
2020-03-19 01:37:10 +01:00
|
|
|
}
|
2020-03-27 17:00:48 +01:00
|
|
|
|
2019-04-30 15:44:51 +02:00
|
|
|
impl Receiver {
|
2019-07-06 02:08:50 +02:00
|
|
|
/// Create a new `Receiver`, with its own Redis connections (but, as yet, no
|
|
|
|
/// active subscriptions).
|
2020-03-27 17:00:48 +01:00
|
|
|
pub fn new(redis_cfg: config::RedisConfig) -> Self {
|
|
|
|
let redis_namespace = redis_cfg.namespace.clone();
|
|
|
|
|
2019-10-03 06:34:41 +02:00
|
|
|
let RedisConn {
|
|
|
|
primary: pubsub_connection,
|
|
|
|
secondary: secondary_redis_connection,
|
2019-10-04 00:02:23 +02:00
|
|
|
polling_interval: redis_poll_interval,
|
|
|
|
} = RedisConn::new(redis_cfg);
|
2019-10-03 06:34:41 +02:00
|
|
|
|
2019-04-30 15:44:51 +02:00
|
|
|
Self {
|
2020-03-27 17:00:48 +01:00
|
|
|
pubsub_connection,
|
2019-05-10 07:47:29 +02:00
|
|
|
secondary_redis_connection,
|
2019-10-04 00:02:23 +02:00
|
|
|
redis_poll_interval,
|
2020-03-25 22:50:32 +01:00
|
|
|
redis_polled_at: Instant::now(),
|
2020-03-19 01:37:10 +01:00
|
|
|
timeline: Timeline::empty(),
|
2019-07-06 02:08:50 +02:00
|
|
|
manager_id: Uuid::default(),
|
2020-03-19 01:37:10 +01:00
|
|
|
msg_queues: MessageQueues(HashMap::new()),
|
|
|
|
clients_per_timeline: HashMap::new(),
|
2020-03-27 17:00:48 +01:00
|
|
|
cache: Cache {
|
|
|
|
id_to_hashtag: LruCache::new(1000),
|
|
|
|
hashtag_to_id: LruCache::new(1000),
|
|
|
|
}, // should these be run-time options?
|
|
|
|
redis_input: Vec::new(),
|
|
|
|
redis_namespace,
|
2019-04-30 15:44:51 +02:00
|
|
|
}
|
|
|
|
}
|
2019-05-10 07:47:29 +02:00
|
|
|
|
2019-07-06 02:08:50 +02:00
|
|
|
/// Assigns the `Receiver` a new timeline to monitor and runs other
|
|
|
|
/// first-time setup.
|
|
|
|
///
|
2019-07-08 13:31:42 +02:00
|
|
|
/// Note: this method calls `subscribe_or_unsubscribe_as_needed`,
|
2019-07-06 02:08:50 +02:00
|
|
|
/// so Redis PubSub subscriptions are only updated when a new timeline
|
|
|
|
/// comes under management for the first time.
|
2020-03-27 17:00:48 +01:00
|
|
|
pub fn manage_new_timeline(&mut self, id: Uuid, tl: Timeline, hashtag: Option<String>) {
|
|
|
|
self.timeline = tl;
|
|
|
|
if let (Some(hashtag), Timeline(Stream::Hashtag(id), _, _)) = (hashtag, tl) {
|
|
|
|
self.cache.id_to_hashtag.put(id, hashtag.clone());
|
|
|
|
self.cache.hashtag_to_id.put(hashtag, id);
|
|
|
|
};
|
|
|
|
|
|
|
|
self.msg_queues.insert(id, MsgQueue::new(tl));
|
|
|
|
self.subscribe_or_unsubscribe_as_needed(tl);
|
2019-04-30 15:44:51 +02:00
|
|
|
}
|
2019-05-10 07:47:29 +02:00
|
|
|
|
2019-07-08 21:21:02 +02:00
|
|
|
/// Set the `Receiver`'s manager_id and target_timeline fields to the appropriate
|
2019-07-06 02:08:50 +02:00
|
|
|
/// value to be polled by the current `StreamManager`.
|
2020-03-19 01:37:10 +01:00
|
|
|
pub fn configure_for_polling(&mut self, manager_id: Uuid, timeline: Timeline) {
|
2019-07-06 02:08:50 +02:00
|
|
|
self.manager_id = manager_id;
|
2020-03-19 01:37:10 +01:00
|
|
|
self.timeline = timeline;
|
|
|
|
}
|
|
|
|
|
2019-07-06 02:08:50 +02:00
|
|
|
/// Drop any PubSub subscriptions that don't have active clients and check
|
|
|
|
/// that there's a subscription to the current one. If there isn't, then
|
|
|
|
/// subscribe to it.
|
2020-03-19 01:37:10 +01:00
|
|
|
fn subscribe_or_unsubscribe_as_needed(&mut self, timeline: Timeline) {
|
2020-03-25 22:50:32 +01:00
|
|
|
let start_time = Instant::now();
|
2020-03-19 01:37:10 +01:00
|
|
|
let timelines_to_modify = self.msg_queues.calculate_timelines_to_add_or_drop(timeline);
|
2019-05-09 05:02:01 +02:00
|
|
|
|
2019-05-10 07:47:29 +02:00
|
|
|
// Record the lower number of clients subscribed to that channel
|
2019-07-08 13:31:42 +02:00
|
|
|
for change in timelines_to_modify {
|
2020-03-19 01:37:10 +01:00
|
|
|
let timeline = change.timeline;
|
2020-03-27 17:00:48 +01:00
|
|
|
let hashtag = match timeline {
|
|
|
|
Timeline(Stream::Hashtag(id), _, _) => self.cache.id_to_hashtag.get(&id),
|
|
|
|
_non_hashtag_timeline => None,
|
|
|
|
};
|
2020-03-19 01:37:10 +01:00
|
|
|
|
2019-05-09 05:02:01 +02:00
|
|
|
let count_of_subscribed_clients = self
|
2019-05-10 07:47:29 +02:00
|
|
|
.clients_per_timeline
|
2020-03-19 01:37:10 +01:00
|
|
|
.entry(timeline)
|
2019-10-02 06:03:18 +02:00
|
|
|
.and_modify(|n| *n += change.in_subscriber_number)
|
|
|
|
.or_insert_with(|| 1);
|
2020-03-19 01:37:10 +01:00
|
|
|
|
2019-05-10 07:47:29 +02:00
|
|
|
// If no clients, unsubscribe from the channel
|
2019-05-09 05:02:01 +02:00
|
|
|
if *count_of_subscribed_clients <= 0 {
|
2020-03-25 22:50:32 +01:00
|
|
|
pubsub_cmd!("unsubscribe", self, timeline.to_redis_raw_timeline(hashtag));
|
2019-10-02 06:03:18 +02:00
|
|
|
} else if *count_of_subscribed_clients == 1 && change.in_subscriber_number == 1 {
|
2020-03-25 22:50:32 +01:00
|
|
|
pubsub_cmd!("subscribe", self, timeline.to_redis_raw_timeline(hashtag));
|
2019-07-08 13:31:42 +02:00
|
|
|
}
|
|
|
|
}
|
2019-09-28 23:57:37 +02:00
|
|
|
if start_time.elapsed().as_millis() > 1 {
|
|
|
|
log::warn!("Sending cmd to Redis took: {:?}", start_time.elapsed());
|
|
|
|
};
|
2019-07-06 02:08:50 +02:00
|
|
|
}
|
|
|
|
}
|
2019-07-08 13:31:42 +02:00
|
|
|
|
|
|
|
/// The stream that the ClientAgent polls to learn about new messages.
|
2019-07-06 02:08:50 +02:00
|
|
|
impl futures::stream::Stream for Receiver {
|
2020-03-25 22:50:32 +01:00
|
|
|
type Item = Event;
|
2019-04-30 15:44:51 +02:00
|
|
|
type Error = Error;
|
|
|
|
|
2019-07-08 13:31:42 +02:00
|
|
|
/// Returns the oldest message in the `ClientAgent`'s queue (if any).
|
|
|
|
///
|
|
|
|
/// Note: This method does **not** poll Redis every time, because polling
|
2020-03-25 22:50:32 +01:00
|
|
|
/// Redis is significantly more time consuming that simply returning the
|
2019-07-08 13:31:42 +02:00
|
|
|
/// message already in a queue. Thus, we only poll Redis if it has not
|
|
|
|
/// been polled lately.
|
2020-03-25 22:50:32 +01:00
|
|
|
fn poll(&mut self) -> Poll<Option<Self::Item>, Self::Error> {
|
2019-10-09 20:46:56 +02:00
|
|
|
let (timeline, id) = (self.timeline.clone(), self.manager_id);
|
2020-03-25 22:50:32 +01:00
|
|
|
|
2020-03-27 17:00:48 +01:00
|
|
|
if self.redis_polled_at.elapsed() > self.redis_poll_interval {
|
|
|
|
let mut buffer = vec![0u8; 6000];
|
|
|
|
if let Ok(Async::Ready(bytes_read)) = self.poll_read(&mut buffer) {
|
|
|
|
let binary_input = buffer[..bytes_read].to_vec();
|
|
|
|
let (input, extra_bytes) = match str::from_utf8(&binary_input) {
|
|
|
|
Ok(input) => (input, "".as_bytes()),
|
|
|
|
Err(e) => {
|
|
|
|
let (valid, after_valid) = binary_input.split_at(e.valid_up_to());
|
|
|
|
let input = str::from_utf8(valid).expect("Guaranteed by `.valid_up_to`");
|
|
|
|
(input, after_valid)
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
let (cache, namespace) = (&mut self.cache.hashtag_to_id, &self.redis_namespace);
|
|
|
|
|
|
|
|
let remaining_input =
|
|
|
|
process_messages(input, cache, namespace, &mut self.msg_queues);
|
|
|
|
|
|
|
|
self.redis_input.extend_from_slice(remaining_input);
|
|
|
|
self.redis_input.extend_from_slice(extra_bytes);
|
|
|
|
}
|
2019-07-08 13:31:42 +02:00
|
|
|
}
|
2019-04-30 15:44:51 +02:00
|
|
|
|
2019-05-10 07:47:29 +02:00
|
|
|
// Record current time as last polled time
|
2019-10-09 20:46:56 +02:00
|
|
|
self.msg_queues.update_time_for_target_queue(id);
|
2019-04-30 15:44:51 +02:00
|
|
|
|
2019-05-10 07:47:29 +02:00
|
|
|
// If the `msg_queue` being polled has any new messages, return the first (oldest) one
|
2019-10-09 20:46:56 +02:00
|
|
|
match self.msg_queues.oldest_msg_in_target_queue(id, timeline) {
|
2019-09-28 23:57:37 +02:00
|
|
|
Some(value) => Ok(Async::Ready(Some(value))),
|
2019-05-10 07:47:29 +02:00
|
|
|
_ => Ok(Async::NotReady),
|
2019-04-30 15:44:51 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-03-27 17:00:48 +01:00
|
|
|
|
|
|
|
impl Read for Receiver {
|
|
|
|
fn read(&mut self, buffer: &mut [u8]) -> Result<usize, std::io::Error> {
|
|
|
|
self.pubsub_connection.read(buffer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsyncRead for Receiver {
|
|
|
|
fn poll_read(&mut self, buf: &mut [u8]) -> Poll<usize, std::io::Error> {
|
|
|
|
match self.read(buf) {
|
|
|
|
Ok(t) => Ok(Async::Ready(t)),
|
|
|
|
Err(_) => Ok(Async::NotReady),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[must_use]
|
|
|
|
pub fn process_messages<'a>(
|
|
|
|
input: &'a str,
|
|
|
|
mut cache: &mut LruCache<String, i64>,
|
|
|
|
namespace: &Option<String>,
|
|
|
|
msg_queues: &mut MessageQueues,
|
|
|
|
) -> &'a [u8] {
|
|
|
|
let mut remaining_input = input;
|
|
|
|
use RedisMsg::*;
|
|
|
|
loop {
|
|
|
|
match RedisMsg::from_raw(&mut remaining_input, &mut cache, namespace) {
|
|
|
|
Ok((EventMsg(timeline, event), rest)) => {
|
|
|
|
for msg_queue in msg_queues.values_mut() {
|
|
|
|
if msg_queue.timeline == timeline {
|
|
|
|
msg_queue.messages.push_back(event.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
remaining_input = rest;
|
|
|
|
}
|
|
|
|
Ok((SubscriptionMsg, rest)) | Ok((MsgForDifferentNamespace, rest)) => {
|
|
|
|
remaining_input = rest;
|
|
|
|
}
|
|
|
|
Err(RedisParseErr::Incomplete) => break,
|
|
|
|
Err(RedisParseErr::Unrecoverable) => {
|
|
|
|
panic!("Failed parsing Redis msg: {}", &remaining_input)
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
remaining_input.as_bytes()
|
|
|
|
}
|