2021-02-01 11:06:29 +01:00
|
|
|
//
|
2021-04-01 08:39:15 +02:00
|
|
|
// MastodonStatusContent.swift
|
2021-02-01 11:06:29 +01:00
|
|
|
// Mastodon
|
|
|
|
//
|
|
|
|
// Created by MainasuK Cirno on 2021/2/1.
|
|
|
|
//
|
|
|
|
|
|
|
|
import Foundation
|
2021-06-17 10:31:34 +02:00
|
|
|
import Combine
|
2021-02-01 11:06:29 +01:00
|
|
|
import Kanna
|
|
|
|
import ActiveLabel
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
enum MastodonStatusContent {
|
2021-02-01 11:06:29 +01:00
|
|
|
|
2021-05-07 12:25:57 +02:00
|
|
|
typealias EmojiShortcode = String
|
|
|
|
typealias EmojiDict = [EmojiShortcode: URL]
|
|
|
|
|
2021-06-18 12:57:35 +02:00
|
|
|
static let workingQueue = DispatchQueue(label: "org.joinmastodon.app.ActiveLabel.working-queue", qos: .userInteractive)
|
2021-06-17 10:31:34 +02:00
|
|
|
|
|
|
|
static func parseResult(content: String, emojiDict: MastodonStatusContent.EmojiDict) -> AnyPublisher<MastodonStatusContent.ParseResult?, Never> {
|
|
|
|
return Future { promise in
|
|
|
|
self.workingQueue.async {
|
|
|
|
let parseResult = try? MastodonStatusContent.parse(content: content, emojiDict: emojiDict)
|
|
|
|
promise(.success(parseResult))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
.eraseToAnyPublisher()
|
|
|
|
}
|
|
|
|
|
2021-05-07 12:25:57 +02:00
|
|
|
static func parse(content: String, emojiDict: EmojiDict) throws -> MastodonStatusContent.ParseResult {
|
|
|
|
let document: String = {
|
|
|
|
var content = content
|
|
|
|
for (shortcode, url) in emojiDict {
|
|
|
|
let emojiNode = "<span class=\"emoji\" href=\"\(url.absoluteString)\">\(shortcode)</span>"
|
|
|
|
let pattern = ":\(shortcode):"
|
|
|
|
content = content.replacingOccurrences(of: pattern, with: emojiNode)
|
|
|
|
}
|
2021-05-27 07:56:55 +02:00
|
|
|
return content.trimmingCharacters(in: .whitespacesAndNewlines)
|
2021-05-07 12:25:57 +02:00
|
|
|
}()
|
|
|
|
let rootNode = try Node.parse(document: document)
|
2021-02-01 11:06:29 +01:00
|
|
|
let text = String(rootNode.text)
|
|
|
|
|
|
|
|
var activeEntities: [ActiveEntity] = []
|
2021-04-01 08:39:15 +02:00
|
|
|
let entities = MastodonStatusContent.Node.entities(in: rootNode)
|
2021-02-01 11:06:29 +01:00
|
|
|
for entity in entities {
|
|
|
|
let range = NSRange(entity.text.startIndex..<entity.text.endIndex, in: text)
|
|
|
|
|
|
|
|
switch entity.type {
|
|
|
|
case .url:
|
|
|
|
guard let href = entity.href else { continue }
|
2021-02-04 09:29:03 +01:00
|
|
|
let text = String(entity.text)
|
2021-05-07 12:25:57 +02:00
|
|
|
activeEntities.append(ActiveEntity(range: range, type: .url(text, trimmed: entity.hrefEllipsis ?? text, url: href, userInfo: nil)))
|
2021-02-01 11:06:29 +01:00
|
|
|
case .hashtag:
|
|
|
|
var userInfo: [AnyHashable: Any] = [:]
|
|
|
|
entity.href.flatMap { href in
|
|
|
|
userInfo["href"] = href
|
|
|
|
}
|
|
|
|
let hashtag = String(entity.text).deletingPrefix("#")
|
|
|
|
activeEntities.append(ActiveEntity(range: range, type: .hashtag(hashtag, userInfo: userInfo)))
|
|
|
|
case .mention:
|
|
|
|
var userInfo: [AnyHashable: Any] = [:]
|
|
|
|
entity.href.flatMap { href in
|
|
|
|
userInfo["href"] = href
|
|
|
|
}
|
|
|
|
let mention = String(entity.text).deletingPrefix("@")
|
|
|
|
activeEntities.append(ActiveEntity(range: range, type: .mention(mention, userInfo: userInfo)))
|
2021-05-07 12:25:57 +02:00
|
|
|
case .emoji:
|
|
|
|
var userInfo: [AnyHashable: Any] = [:]
|
|
|
|
guard let href = entity.href else { continue }
|
|
|
|
userInfo["href"] = href
|
|
|
|
let emoji = String(entity.text)
|
|
|
|
activeEntities.append(ActiveEntity(range: range, type: .emoji(emoji, url: href, userInfo: userInfo)))
|
|
|
|
case .none:
|
2021-02-01 11:06:29 +01:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var trimmed = text
|
|
|
|
for activeEntity in activeEntities {
|
2021-05-07 12:42:49 +02:00
|
|
|
MastodonStatusContent.trimEntity(status: &trimmed, activeEntity: activeEntity, activeEntities: activeEntities)
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return ParseResult(
|
2021-05-07 12:25:57 +02:00
|
|
|
document: document,
|
2021-02-01 11:06:29 +01:00
|
|
|
original: text,
|
|
|
|
trimmed: trimmed,
|
2021-05-07 12:25:57 +02:00
|
|
|
activeEntities: activeEntities
|
2021-02-01 11:06:29 +01:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-05-07 12:42:49 +02:00
|
|
|
static func trimEntity(status: inout String, activeEntity: ActiveEntity, activeEntities: [ActiveEntity]) {
|
2021-05-07 12:25:57 +02:00
|
|
|
let text: String
|
|
|
|
let trimmed: String
|
|
|
|
switch activeEntity.type {
|
|
|
|
case .url(let _text, let _trimmed, _, _):
|
|
|
|
text = _text
|
|
|
|
trimmed = _trimmed
|
|
|
|
case .emoji(let _text, _, _):
|
|
|
|
text = _text
|
|
|
|
trimmed = " "
|
|
|
|
default:
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-02-01 11:06:29 +01:00
|
|
|
guard let index = activeEntities.firstIndex(where: { $0.range == activeEntity.range }) else { return }
|
2021-05-07 12:42:49 +02:00
|
|
|
guard let range = Range(activeEntity.range, in: status) else { return }
|
|
|
|
status.replaceSubrange(range, with: trimmed)
|
2021-02-01 11:06:29 +01:00
|
|
|
|
2021-02-04 09:29:03 +01:00
|
|
|
let offset = trimmed.count - text.count
|
2021-02-01 11:06:29 +01:00
|
|
|
activeEntity.range.length += offset
|
|
|
|
|
|
|
|
let moveActiveEntities = Array(activeEntities[index...].dropFirst())
|
|
|
|
for moveActiveEntity in moveActiveEntities {
|
|
|
|
moveActiveEntity.range.location += offset
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
extension String {
|
|
|
|
// ref: https://www.hackingwithswift.com/example-code/strings/how-to-remove-a-prefix-from-a-string
|
|
|
|
func deletingPrefix(_ prefix: String) -> String {
|
|
|
|
guard self.hasPrefix(prefix) else { return self }
|
|
|
|
return String(self.dropFirst(prefix.count))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
extension MastodonStatusContent {
|
2021-06-17 10:31:34 +02:00
|
|
|
struct ParseResult: Hashable {
|
2021-02-01 11:06:29 +01:00
|
|
|
let document: String
|
|
|
|
let original: String
|
|
|
|
let trimmed: String
|
|
|
|
let activeEntities: [ActiveEntity]
|
2021-06-17 10:31:34 +02:00
|
|
|
|
|
|
|
static func == (lhs: MastodonStatusContent.ParseResult, rhs: MastodonStatusContent.ParseResult) -> Bool {
|
|
|
|
return lhs.document == rhs.document
|
|
|
|
&& lhs.original == rhs.original
|
|
|
|
&& lhs.trimmed == rhs.trimmed
|
|
|
|
&& lhs.activeEntities.count == rhs.activeEntities.count // FIXME:
|
|
|
|
}
|
|
|
|
|
|
|
|
func hash(into hasher: inout Hasher) {
|
|
|
|
hasher.combine(document)
|
|
|
|
hasher.combine(original)
|
|
|
|
hasher.combine(trimmed)
|
|
|
|
hasher.combine(activeEntities.count) // FIXME:
|
|
|
|
}
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-07 12:25:57 +02:00
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
extension MastodonStatusContent {
|
2021-02-01 11:06:29 +01:00
|
|
|
|
|
|
|
class Node {
|
|
|
|
|
|
|
|
let level: Int
|
|
|
|
let type: Type?
|
|
|
|
|
|
|
|
// substring text
|
|
|
|
let text: Substring
|
|
|
|
|
|
|
|
// range in parent String
|
|
|
|
var range: Range<String.Index> {
|
|
|
|
return text.startIndex..<text.endIndex
|
|
|
|
}
|
|
|
|
|
|
|
|
let tagName: String?
|
|
|
|
let classNames: Set<String>
|
|
|
|
let href: String?
|
|
|
|
let hrefEllipsis: String?
|
|
|
|
|
|
|
|
let children: [Node]
|
|
|
|
|
|
|
|
init(
|
|
|
|
level: Int,
|
|
|
|
text: Substring,
|
|
|
|
tagName: String?,
|
|
|
|
className: String?,
|
|
|
|
href: String?,
|
|
|
|
hrefEllipsis: String?,
|
|
|
|
children: [Node]
|
|
|
|
) {
|
|
|
|
let _classNames: Set<String> = {
|
|
|
|
guard let className = className else { return Set() }
|
|
|
|
return Set(className.components(separatedBy: " "))
|
|
|
|
}()
|
|
|
|
let _type: Type? = {
|
|
|
|
if tagName == "a" && !_classNames.contains("mention") {
|
|
|
|
return .url
|
|
|
|
}
|
|
|
|
|
|
|
|
if _classNames.contains("mention") {
|
|
|
|
if _classNames.contains("u-url") {
|
|
|
|
return .mention
|
|
|
|
} else if _classNames.contains("hashtag") {
|
|
|
|
return .hashtag
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-07 12:25:57 +02:00
|
|
|
if _classNames.contains("emoji") {
|
|
|
|
return .emoji
|
|
|
|
}
|
|
|
|
|
2021-02-01 11:06:29 +01:00
|
|
|
return nil
|
|
|
|
}()
|
|
|
|
self.level = level
|
|
|
|
self.type = _type
|
|
|
|
self.text = text
|
|
|
|
self.tagName = tagName
|
|
|
|
self.classNames = _classNames
|
|
|
|
self.href = href
|
|
|
|
self.hrefEllipsis = hrefEllipsis
|
|
|
|
self.children = children
|
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
static func parse(document: String) throws -> MastodonStatusContent.Node {
|
2021-02-01 11:06:29 +01:00
|
|
|
let html = try HTML(html: document, encoding: .utf8)
|
2021-06-15 11:39:35 +02:00
|
|
|
|
|
|
|
// add `\r\n` explicit due to Kanna text missing it after convert to text
|
|
|
|
// ref: https://github.com/tid-kijyun/Kanna/issues/150
|
|
|
|
let brNodes = html.css("br").makeIterator()
|
|
|
|
while let brNode = brNodes.next() {
|
|
|
|
brNode.addNextSibling(try! HTML(html: "<span>\r\n</span>", encoding: .utf8).body!)
|
|
|
|
}
|
|
|
|
|
2021-02-01 11:06:29 +01:00
|
|
|
let body = html.body ?? nil
|
|
|
|
let text = body?.text ?? ""
|
|
|
|
let level = 0
|
2021-04-01 08:39:15 +02:00
|
|
|
let children: [MastodonStatusContent.Node] = body.flatMap { body in
|
2021-02-01 11:06:29 +01:00
|
|
|
return Node.parse(element: body, parentText: text[...], parentLevel: level + 1)
|
|
|
|
} ?? []
|
|
|
|
let node = Node(
|
|
|
|
level: level,
|
|
|
|
text: text[...],
|
|
|
|
tagName: body?.tagName,
|
|
|
|
className: body?.className,
|
|
|
|
href: nil,
|
|
|
|
hrefEllipsis: nil,
|
|
|
|
children: children
|
|
|
|
)
|
|
|
|
|
|
|
|
return node
|
|
|
|
}
|
|
|
|
|
|
|
|
static func parse(element: XMLElement, parentText: Substring, parentLevel: Int) -> [Node] {
|
|
|
|
let parent = element
|
|
|
|
let scanner = Scanner(string: String(parentText))
|
|
|
|
scanner.charactersToBeSkipped = .none
|
|
|
|
|
|
|
|
var element = parent.at_css(":first-child")
|
|
|
|
var children: [Node] = []
|
|
|
|
|
|
|
|
while let _element = element {
|
|
|
|
let _text = _element.text ?? ""
|
|
|
|
|
|
|
|
// scan element text
|
|
|
|
_ = scanner.scanUpToString(_text)
|
|
|
|
let startIndexOffset = scanner.currentIndex.utf16Offset(in: scanner.string)
|
|
|
|
guard scanner.scanString(_text) != nil else {
|
|
|
|
assertionFailure()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
let endIndexOffset = scanner.currentIndex.utf16Offset(in: scanner.string)
|
|
|
|
|
|
|
|
// locate substring
|
|
|
|
let startIndex = parentText.utf16.index(parentText.utf16.startIndex, offsetBy: startIndexOffset)
|
|
|
|
let endIndex = parentText.utf16.index(parentText.utf16.startIndex, offsetBy: endIndexOffset)
|
|
|
|
let text = Substring(parentText.utf16[startIndex..<endIndex])
|
|
|
|
|
|
|
|
let href = _element["href"]
|
|
|
|
let hrefEllipsis = href.flatMap { _ in _element.at_css(".ellipsis")?.text }
|
|
|
|
|
|
|
|
let level = parentLevel + 1
|
|
|
|
let node = Node(
|
|
|
|
level: level,
|
|
|
|
text: text,
|
|
|
|
tagName: _element.tagName,
|
|
|
|
className: _element.className,
|
|
|
|
href: href,
|
|
|
|
hrefEllipsis: hrefEllipsis,
|
|
|
|
children: Node.parse(element: _element, parentText: text, parentLevel: level + 1)
|
|
|
|
)
|
|
|
|
children.append(node)
|
|
|
|
element = _element.nextSibling
|
|
|
|
}
|
|
|
|
|
|
|
|
return children
|
|
|
|
}
|
|
|
|
|
|
|
|
static func collect(
|
|
|
|
node: Node,
|
|
|
|
where predicate: (Node) -> Bool
|
|
|
|
) -> [Node] {
|
|
|
|
var nodes: [Node] = []
|
|
|
|
|
|
|
|
if predicate(node) {
|
|
|
|
nodes.append(node)
|
|
|
|
}
|
|
|
|
|
|
|
|
for child in node.children {
|
|
|
|
nodes.append(contentsOf: Node.collect(node: child, where: predicate))
|
|
|
|
}
|
|
|
|
return nodes
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
extension MastodonStatusContent.Node {
|
2021-02-01 11:06:29 +01:00
|
|
|
enum `Type` {
|
|
|
|
case url
|
|
|
|
case mention
|
|
|
|
case hashtag
|
2021-05-07 12:25:57 +02:00
|
|
|
case emoji
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
static func entities(in node: MastodonStatusContent.Node) -> [MastodonStatusContent.Node] {
|
|
|
|
return MastodonStatusContent.Node.collect(node: node) { node in node.type != nil }
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
static func hashtags(in node: MastodonStatusContent.Node) -> [MastodonStatusContent.Node] {
|
|
|
|
return MastodonStatusContent.Node.collect(node: node) { node in node.type == .hashtag }
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
static func mentions(in node: MastodonStatusContent.Node) -> [MastodonStatusContent.Node] {
|
|
|
|
return MastodonStatusContent.Node.collect(node: node) { node in node.type == .mention }
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
static func urls(in node: MastodonStatusContent.Node) -> [MastodonStatusContent.Node] {
|
|
|
|
return MastodonStatusContent.Node.collect(node: node) { node in node.type == .url }
|
2021-02-01 11:06:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-04-01 08:39:15 +02:00
|
|
|
extension MastodonStatusContent.Node: CustomDebugStringConvertible {
|
2021-02-01 11:06:29 +01:00
|
|
|
var debugDescription: String {
|
|
|
|
let linkInfo: String = {
|
|
|
|
switch (href, hrefEllipsis) {
|
|
|
|
case (nil, nil):
|
|
|
|
return ""
|
|
|
|
case (let href, let hrefEllipsis):
|
|
|
|
return "(\(href ?? "nil") - \(hrefEllipsis ?? "nil"))"
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
let classNamesInfo: String = {
|
|
|
|
guard !classNames.isEmpty else { return "" }
|
|
|
|
let names = Array(classNames)
|
|
|
|
.sorted()
|
|
|
|
.joined(separator: ", ")
|
|
|
|
return "@[\(names)]"
|
|
|
|
}()
|
|
|
|
let nodeDescription = String(
|
|
|
|
format: "<%@>%@%@: %@",
|
|
|
|
tagName ?? "",
|
|
|
|
classNamesInfo,
|
|
|
|
linkInfo,
|
|
|
|
String(text)
|
|
|
|
)
|
|
|
|
guard !children.isEmpty else {
|
|
|
|
return nodeDescription
|
|
|
|
}
|
|
|
|
|
|
|
|
let indent = Array(repeating: " ", count: level).joined()
|
|
|
|
let childrenDescription = children
|
|
|
|
.map { indent + $0.debugDescription }
|
|
|
|
.joined(separator: "\n")
|
|
|
|
|
|
|
|
return nodeDescription + "\n" + childrenDescription
|
|
|
|
}
|
|
|
|
}
|