Skip to content

Commit

Permalink
(#322) Telegram: do not split parts of surrogate pairs during flattening
Browse files Browse the repository at this point in the history
  • Loading branch information
ForNeVeR committed Jan 15, 2025
1 parent 1de0fec commit a9b8af8
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 7 deletions.
26 changes: 20 additions & 6 deletions Emulsion.Telegram/Funogram.fs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2024 Emulsion contributors <https://github.com/codingteam/emulsion>
// SPDX-FileCopyrightText: 2025 Emulsion contributors <https://github.com/codingteam/emulsion>
//
// SPDX-License-Identifier: MIT

Expand Down Expand Up @@ -105,16 +105,30 @@ module MessageConverter =
pos <- linkEndOffset
result.Append(text.Substring(pos, text.Length - pos)).ToString()

let private applyLimits limits text =
let private applyLimits limits (text: string) =
let applyMessageLengthLimit (original: {| text: string; wasLimited: bool |}) =
match limits.messageLengthLimit with
| None -> original
| Some limit when original.text.Length <= limit -> original
| Some limit ->
let newText = original.text.Substring(0,
Math.Clamp(limit - limits.dataRedactedMessage.Length,
0,
original.text.Length))
assert (limit >= limits.dataRedactedMessage.Length)

let mutable newTextLength = Math.Clamp(
limit - limits.dataRedactedMessage.Length,
0,
original.text.Length
)

// We should never split surrogate pairs present in the initial message. So, if the message ends with a
// high part of such a pair, cut it more, to remove the part of the pair.
//
// Technically, this will also strip a part of an invalid Unicode sequence if the message originally
// contained such an orphan part of the pair without even following it by a high surrogate. But we don't
// care.
if newTextLength > 0 && Char.IsHighSurrogate(text[newTextLength - 1]) then
newTextLength <- newTextLength - 1

let newText = original.text.Substring(0, newTextLength)
{| text = newText; wasLimited = true |}

let applyLineLimit (original: {| text: string; wasLimited: bool |}) =
Expand Down
17 changes: 16 additions & 1 deletion Emulsion.Tests/Telegram/FunogramTests.fs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2024 Emulsion contributors <https://github.com/codingteam/emulsion>
// SPDX-FileCopyrightText: 2025 Emulsion contributors <https://github.com/codingteam/emulsion>
//
// SPDX-License-Identifier: MIT

Expand Down Expand Up @@ -736,6 +736,21 @@ module FlattenMessageTests =
flattenMessage replyMessage
)

[<Fact>]
let ``Flattening should not split surrogate pairs``() =
let originalMessage = authoredTelegramMessage "@originalUser" "🐙🐙🐙🐙"
let limit = 6
let replyMessage = authoredTelegramReplyMessage "@replyingUser" "Reply text" originalMessage.main
let flattener = MessageConverter.flatten {
MessageConverter.DefaultQuoteSettings with
limits.messageLengthLimit = Some limit
}
let flattened = flattener replyMessage
Assert.Equal(
Authored { author = "@replyingUser"; text = ">> <@originalUser> 🐙[…]\n\nReply text" },
flattened
)

[<Fact>]
let flattenReplyEventMessage() =
let originalMessage = eventTelegramMessage "@originalUser has entered the chat"
Expand Down

0 comments on commit a9b8af8

Please sign in to comment.