Preserve the formatting of parentless text nodes

This commit is contained in:
Stefan Ceriu
2025-09-10 12:18:48 +03:00
committed by Stefan Ceriu
parent 7d59589892
commit 8e257b40c5
3 changed files with 57 additions and 40 deletions

View File

@@ -72,7 +72,7 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
}
var listIndex = 1
let mutableAttributedString = attributedString(from: body, preserveFormatting: false, listTag: nil, listIndex: &listIndex, indentLevel: 0)
let mutableAttributedString = attributedString(element: body, documentBody: body, preserveFormatting: false, listTag: nil, listIndex: &listIndex, indentLevel: 0)
detectPhishingAttempts(mutableAttributedString)
addLinksAndMentions(mutableAttributedString)
addMatrixEntityPermalinkAttributesTo(mutableAttributedString)
@@ -86,7 +86,9 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
// MARK: - Private
// swiftlint:disable:next function_body_length
func attributedString(from element: Element, preserveFormatting: Bool,
func attributedString(element: Element,
documentBody: Element,
preserveFormatting: Bool,
listTag: String?,
listIndex: inout Int,
indentLevel: Int) -> NSMutableAttributedString {
@@ -94,6 +96,12 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
for node in element.getChildNodes() {
if let textNode = node as? TextNode {
// If this node is plain text just append its preformatted contents
if node.parent() == documentBody {
result.append(NSAttributedString(string: textNode.getWholeText()))
continue
}
var text = preserveFormatting ? textNode.getWholeText() : textNode.text()
// There seem to be sibling TextNodes following every </br> tag that
@@ -121,50 +129,50 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
case "h1", "h2", "h3", "h4", "h5", "h6":
let level = max(3, Int(String(tag.dropFirst())) ?? 1)
let size: CGFloat = fontPointSize + CGFloat(6 - level) * 2
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.append(NSAttributedString(string: "\n"))
content.setFontPreservingSymbolicTraits(UIFont.boldSystemFont(ofSize: size))
case "p", "div":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.append(NSAttributedString(string: "\n"))
case "br":
content = NSMutableAttributedString(string: "\n")
case "b", "strong":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.setFontPreservingSymbolicTraits(UIFont.boldSystemFont(ofSize: fontPointSize))
case "i", "em":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.setFontPreservingSymbolicTraits(UIFont.italicSystemFont(ofSize: fontPointSize))
case "u":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.addAttribute(.underlineStyle, value: NSUnderlineStyle.single.rawValue, range: NSRange(location: 0, length: content.length))
case "s", "del":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.addAttribute(.strikethroughStyle, value: NSUnderlineStyle.single.rawValue, range: NSRange(location: 0, length: content.length))
case "sup":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.addAttribute(.baselineOffset, value: 6, range: NSRange(location: 0, length: content.length))
content.setFontPreservingSymbolicTraits(UIFont.systemFont(ofSize: fontPointSize * 0.7))
case "sub":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.addAttribute(.baselineOffset, value: -4, range: NSRange(location: 0, length: content.length))
content.setFontPreservingSymbolicTraits(UIFont.systemFont(ofSize: fontPointSize * 0.7))
case "blockquote":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.addAttribute(.MatrixBlockquote, value: true, range: NSRange(location: 0, length: content.length))
case "code", "pre":
let preserveFormatting = preserveFormatting || tag == "pre"
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content.setFontPreservingSymbolicTraits(UIFont.monospacedSystemFont(ofSize: fontPointSize, weight: .regular))
content.addAttribute(.CodeBlock, value: true, range: NSRange(location: 0, length: content.length))
content.addAttribute(.backgroundColor, value: UIColor.compound._bgCodeBlock as Any, range: NSRange(location: 0, length: content.length))
@@ -182,19 +190,19 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
content = NSMutableAttributedString(string: "\n")
case "a":
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
if let href = try? childElement.attr("href"), let url = URL(string: href) {
content.addAttribute(.link, value: url, range: NSRange(location: 0, length: content.length))
}
case "span":
if childElement.dataset()[Self.attributeMSC4286] != nil {
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
}
case "ul", "ol":
var listIndex = 1
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: tag, listIndex: &listIndex, indentLevel: indentLevel + 1)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: tag, listIndex: &listIndex, indentLevel: indentLevel + 1)
if listTag == nil { // If not within another list
content.insert(NSAttributedString(string: "\n"), at: 0)
}
@@ -208,7 +216,7 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
bullet = ""
}
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel + 1)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel + 1)
content.insert(NSAttributedString(string: bullet), at: 0)
content.append(NSAttributedString(string: "\n"))
@@ -218,7 +226,7 @@ struct AttributedStringBuilderV2: AttributedStringBuilderProtocol {
content.addAttribute(.paragraphStyle, value: paragraphStyle, range: NSRange(location: 0, length: content.length))
default:
content = attributedString(from: childElement, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
content = attributedString(element: childElement, documentBody: documentBody, preserveFormatting: preserveFormatting, listTag: listTag, listIndex: &childIndex, indentLevel: indentLevel)
}
result.append(content)

View File

@@ -37,10 +37,10 @@ enum HTMLFixtures: String, CaseIterable {
"""
case .paragraphs:
"""
<p>This is a paragraph.</p><p>And this is another one.</p>
<div>And this is a division.</div>
New lines are ignored.\n\nLike so.</br>
But this line comes after a line break.</br>
<p>This is a paragraph.</p><p>And this is another one.</p>\
<div>And this is a division.</div>\
New lines outside of tags are not ignored.\n\nLike so.</br>\
This line comes after a line break.</br>
"""
case .matrixIdentifiers:
"""
@@ -54,10 +54,9 @@ enum HTMLFixtures: String, CaseIterable {
"""
case .textFormatting:
"""
<b>Text</b> <i>formatting</i> <u>should</u> <s>work</s> properly.</br>
<strong>Text</strong> <em>formatting</em> does <del>work!</del>.</br>
<b>Text</b> <i>formatting</i> <u>should</u> <s>work</s> properly.
<strong>Text</strong> <em>formatting</em> does <del>work!</del>.
<b>And <i>mixed</i></b> <em><s>formatting</s></em> <del><strong>works</strong></del> <u><b>too!!1!</b></u>.
<br>
<sup>Thumbs</sup> if you liked it, <sub>sub</sub> if you loved it!
"""
case .groupedBlockQuotes:
@@ -68,10 +67,10 @@ enum HTMLFixtures: String, CaseIterable {
"""
case .separatedBlockQuotes:
"""
Text before blockquote
<blockquote>Some blockquote</blockquote>
Text after first blockquote
<blockquote>Some other blockquote</blockquote>
Text before blockquote\
<blockquote>Some blockquote</blockquote>\
Text after first blockquote\
<blockquote>Some other blockquote</blockquote>\
Text after second blockquote
"""
case .codeBlocks:

View File

@@ -29,14 +29,16 @@ class AttributedStringBuilderV1Tests: XCTestCase {
return
}
XCTAssertEqual(String(attributedString.characters), "H1 Header\nH2 Header\nH3 Header\nH4 Header\nH5 Header\nH6 Header")
if AttributedStringBuilder.useNextGenHTMLParser {
XCTAssertEqual(String(attributedString.characters), "H1 Header\n\nH2 Header\n\nH3 Header\n\nH4 Header\n\nH5 Header\n\nH6 Header\n")
XCTAssertEqual(attributedString.runs.count, 11) // newlines hold no attributes
let pointSizes = attributedString.runs.compactMap(\.uiKit.font?.pointSize)
XCTAssertEqual(pointSizes, [23, 23, 23, 21, 19, 17])
} else {
XCTAssertEqual(String(attributedString.characters), "H1 Header\nH2 Header\nH3 Header\nH4 Header\nH5 Header\nH6 Header")
XCTAssert(attributedString.runs.count == 6)
let firstRun = attributedString.runs[attributedString.runs.startIndex]
@@ -72,7 +74,7 @@ class AttributedStringBuilderV1Tests: XCTestCase {
}
if AttributedStringBuilder.useNextGenHTMLParser {
XCTAssertEqual(regex.numberOfMatches(in: string, options: [], range: .init(location: 0, length: string.count)), 13)
XCTAssertEqual(regex.numberOfMatches(in: string, options: [], range: .init(location: 0, length: string.count)), 18)
} else {
XCTAssertEqual(regex.numberOfMatches(in: string, options: [], range: .init(location: 0, length: string.count)), 10)
}
@@ -158,14 +160,6 @@ class AttributedStringBuilderV1Tests: XCTestCase {
return
}
XCTAssertEqual(String(h1AttributedString.characters), "Matrix.org")
XCTAssertEqual(String(h2AttributedString.characters), "Matrix.org")
XCTAssertEqual(String(h3AttributedString.characters), "Matrix.org")
XCTAssertEqual(h1AttributedString.runs.count, 1)
XCTAssertEqual(h2AttributedString.runs.count, 1)
XCTAssertEqual(h3AttributedString.runs.count, 1)
guard let h1Font = h1AttributedString.runs.first?.uiKit.font,
let h2Font = h2AttributedString.runs.first?.uiKit.font,
let h3Font = h3AttributedString.runs.first?.uiKit.font else {
@@ -174,12 +168,28 @@ class AttributedStringBuilderV1Tests: XCTestCase {
}
if AttributedStringBuilder.useNextGenHTMLParser {
XCTAssertEqual(String(h1AttributedString.characters), "Matrix.org\n")
XCTAssertEqual(String(h2AttributedString.characters), "Matrix.org\n")
XCTAssertEqual(String(h3AttributedString.characters), "Matrix.org\n")
XCTAssertEqual(h1AttributedString.runs.count, 2)
XCTAssertEqual(h2AttributedString.runs.count, 2)
XCTAssertEqual(h3AttributedString.runs.count, 2)
XCTAssertEqual(h1Font, h2Font)
XCTAssertEqual(h2Font, h3Font)
XCTAssert(h1Font.pointSize > UIFont.preferredFont(forTextStyle: .body).pointSize)
XCTAssert(h1Font.pointSize <= 23)
} else {
XCTAssertEqual(String(h1AttributedString.characters), "Matrix.org")
XCTAssertEqual(String(h2AttributedString.characters), "Matrix.org")
XCTAssertEqual(String(h3AttributedString.characters), "Matrix.org")
XCTAssertEqual(h1AttributedString.runs.count, 1)
XCTAssertEqual(h2AttributedString.runs.count, 1)
XCTAssertEqual(h3AttributedString.runs.count, 1)
XCTAssertEqual(h1Font, h2Font)
XCTAssertEqual(h2Font, h3Font)