//
// TootContent.swift
// Mastodon
//
// Created by MainasuK Cirno on 2021/2/1.
//
import Foundation
import Kanna
import ActiveLabel
enum TootContent {
static func parse(toot: String) throws -> TootContent.ParseResult {
let toot = toot.replacingOccurrences(of: "
", with: "\n")
let rootNode = try Node.parse(document: toot)
let text = String(rootNode.text)
var activeEntities: [ActiveEntity] = []
let entities = TootContent.Node.entities(in: rootNode)
for entity in entities {
let range = NSRange(entity.text.startIndex.. Bool {
for activeEntity in activeEntities {
let count = text.utf16.count
let endIndex = activeEntity.range.location + activeEntity.range.length
guard endIndex <= count else {
assertionFailure("Please file issue")
return false
}
}
return true
}
}
extension String {
// ref: https://www.hackingwithswift.com/example-code/strings/how-to-remove-a-prefix-from-a-string
func deletingPrefix(_ prefix: String) -> String {
guard self.hasPrefix(prefix) else { return self }
return String(self.dropFirst(prefix.count))
}
}
extension TootContent {
struct ParseResult {
let document: String
let original: String
let trimmed: String
let activeEntities: [ActiveEntity]
}
}
extension TootContent {
class Node {
let level: Int
let type: Type?
// substring text
let text: Substring
// range in parent String
var range: Range {
return text.startIndex..
let href: String?
let hrefEllipsis: String?
let children: [Node]
init(
level: Int,
text: Substring,
tagName: String?,
className: String?,
href: String?,
hrefEllipsis: String?,
children: [Node]
) {
let _classNames: Set = {
guard let className = className else { return Set() }
return Set(className.components(separatedBy: " "))
}()
let _type: Type? = {
if tagName == "a" && !_classNames.contains("mention") {
return .url
}
if _classNames.contains("mention") {
if _classNames.contains("u-url") {
return .mention
} else if _classNames.contains("hashtag") {
return .hashtag
}
}
return nil
}()
self.level = level
self.type = _type
self.text = text
self.tagName = tagName
self.classNames = _classNames
self.href = href
self.hrefEllipsis = hrefEllipsis
self.children = children
}
static func parse(document: String) throws -> TootContent.Node {
let html = try HTML(html: document, encoding: .utf8)
let body = html.body ?? nil
let text = body?.text ?? ""
let level = 0
let children: [TootContent.Node] = body.flatMap { body in
return Node.parse(element: body, parentText: text[...], parentLevel: level + 1)
} ?? []
let node = Node(
level: level,
text: text[...],
tagName: body?.tagName,
className: body?.className,
href: nil,
hrefEllipsis: nil,
children: children
)
return node
}
static func parse(element: XMLElement, parentText: Substring, parentLevel: Int) -> [Node] {
let parent = element
let scanner = Scanner(string: String(parentText))
scanner.charactersToBeSkipped = .none
var element = parent.at_css(":first-child")
var children: [Node] = []
while let _element = element {
let _text = _element.text ?? ""
// scan element text
_ = scanner.scanUpToString(_text)
let startIndexOffset = scanner.currentIndex.utf16Offset(in: scanner.string)
guard scanner.scanString(_text) != nil else {
assertionFailure()
continue
}
let endIndexOffset = scanner.currentIndex.utf16Offset(in: scanner.string)
// locate substring
let startIndex = parentText.utf16.index(parentText.utf16.startIndex, offsetBy: startIndexOffset)
let endIndex = parentText.utf16.index(parentText.utf16.startIndex, offsetBy: endIndexOffset)
let text = Substring(parentText.utf16[startIndex.. Bool
) -> [Node] {
var nodes: [Node] = []
if predicate(node) {
nodes.append(node)
}
for child in node.children {
nodes.append(contentsOf: Node.collect(node: child, where: predicate))
}
return nodes
}
}
}
extension TootContent.Node {
enum `Type` {
case url
case mention
case hashtag
}
static func entities(in node: TootContent.Node) -> [TootContent.Node] {
return TootContent.Node.collect(node: node) { node in node.type != nil }
}
static func hashtags(in node: TootContent.Node) -> [TootContent.Node] {
return TootContent.Node.collect(node: node) { node in node.type == .hashtag }
}
static func mentions(in node: TootContent.Node) -> [TootContent.Node] {
return TootContent.Node.collect(node: node) { node in node.type == .mention }
}
static func urls(in node: TootContent.Node) -> [TootContent.Node] {
return TootContent.Node.collect(node: node) { node in node.type == .url }
}
}
extension TootContent.Node: CustomDebugStringConvertible {
var debugDescription: String {
let linkInfo: String = {
switch (href, hrefEllipsis) {
case (nil, nil):
return ""
case (let href, let hrefEllipsis):
return "(\(href ?? "nil") - \(hrefEllipsis ?? "nil"))"
}
}()
let classNamesInfo: String = {
guard !classNames.isEmpty else { return "" }
let names = Array(classNames)
.sorted()
.joined(separator: ", ")
return "@[\(names)]"
}()
let nodeDescription = String(
format: "<%@>%@%@: %@",
tagName ?? "",
classNamesInfo,
linkInfo,
String(text)
)
guard !children.isEmpty else {
return nodeDescription
}
let indent = Array(repeating: " ", count: level).joined()
let childrenDescription = children
.map { indent + $0.debugDescription }
.joined(separator: "\n")
return nodeDescription + "\n" + childrenDescription
}
}