copy_and_paste: Refactor code block turndown code to reduce duplication.

So far, there were 2 separate turndown rules for code blocks; one for
general ones, and the other for Zulip message code blocks.

Now the filter rule has been generalised to handle both cases together.
As a side effect, the bug where partially copied Zulip code blocks
lost formatting on pasting has been fixed.
This commit is contained in:
N-Shar-ma
2024-02-02 05:52:13 +05:30
committed by Tim Abbott
parent c8e77b6cac
commit dbd2c80108
2 changed files with 21 additions and 58 deletions

View File

@@ -385,55 +385,6 @@ export function paste_handler_converter(paste_html) {
return prefix + content + (node.nextSibling && !/\n$/.test(content) ? "\n" : "");
},
});
turndownService.addRule("zulipCodeBlock", {
// We create a new rule to exclusively handle code blocks in Zulip messages since
// the `fencedCodeBlock` rule in upstream won't work for them. The reason is that
// `fencedCodeBlock` only works for `pre` elements that have `code` elements as
// their 1st child, while Zulip code blocks have an empty span as the 1st child
// of the `pre` element, and then the `code` element. This new rule is a variation
// of upstream's `fencedCodeBlock` rule.
// We modify the filter of upstream's `fencedCodeBlock` rule to only apply to
// Zulip code blocks with the Zulip specific class of `zulip-code-block`.
filter(node, options) {
return (
options.codeBlockStyle === "fenced" &&
node.nodeName === "CODE" &&
node.parentElement?.nodeName === "PRE" &&
node.parentElement.parentElement?.classList.contains("zulip-code-block")
);
},
// We modify the replacement of upstream's `fencedCodeBlock` rule only slightly
// to extract and add the language of the code block (if any) to the fence.
replacement(content, node, options) {
const language = node.closest(".codehilite")?.dataset?.codeLanguage || "";
const fenceChar = options.fence.charAt(0);
let fenceSize = 3;
const fenceInCodeRegex = new RegExp("^" + fenceChar + "{3,}", "gm");
let match;
while ((match = fenceInCodeRegex.exec(content))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
const fence = fenceChar.repeat(fenceSize);
return (
"\n\n" +
fence +
language +
"\n" +
content.replace(/\n$/, "") +
"\n" +
fence +
"\n\n"
);
},
});
turndownService.addRule("zulipImagePreview", {
filter(node) {
// select image previews in Zulip messages
@@ -481,20 +432,30 @@ export function paste_handler_converter(paste_html) {
},
});
// We override the original upstream implementation of this rule to turn any
// single line code blocks into inline markdown code. Everything else is the same.
// We override the original upstream implementation of this rule to make
// several tweaks:
// - We turn any single line code blocks into inline markdown code.
// - We generalise the filter condition to allow a `pre` element with a
// `code` element as its only non-empty child, which applies to Zulip code
// blocks too.
// - For Zulip code blocks, we extract the language of the code block (if
// any) correctly.
// Everything else works the same.
turndownService.addRule("fencedCodeBlock", {
filter(node, options) {
return (
options.codeBlockStyle === "fenced" &&
node.nodeName === "PRE" &&
node.firstChild &&
node.firstChild.nodeName === "CODE"
[...node.childNodes].filter((child) => child.textContent.trim() !== "").length ===
1 &&
[...node.childNodes].find((child) => child.textContent.trim() !== "").nodeName ===
"CODE"
);
},
replacement(_content, node, options) {
const code = node.firstChild.textContent;
const codeElement = [...node.childNodes].find((child) => child.nodeName === "CODE");
const code = codeElement.textContent;
// We convert single line code inside a code block to inline markdown code,
// and the code for this is taken from upstream's `code` rule.
@@ -515,8 +476,10 @@ export function paste_handler_converter(paste_html) {
return delimiter + extraSpace + code + extraSpace + delimiter;
}
const className = node.firstChild.getAttribute("class") || "";
const language = (className.match(/language-(\S+)/) || [null, ""])[1];
const className = codeElement.getAttribute("class") || "";
const language = node.parentElement?.classList.contains("zulip-code-block")
? node.closest(".codehilite")?.dataset?.codeLanguage || ""
: (className.match(/language-(\S+)/) || [null, ""])[1];
const fenceChar = options.fence.charAt(0);
let fenceSize = 3;

View File

@@ -29,10 +29,10 @@ run_test("paste_handler_converter", () => {
assert.equal(copy_and_paste.paste_handler_converter(input), "The `JSDOM` constructor");
// A python code block
input = `<meta http-equiv="content-type" content="text/html; charset=utf-8"><p style="margin: 3px 0px; color: rgb(221, 222, 238); font-family: &quot;Source Sans 3&quot;, sans-serif; font-size: 14px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(33, 45, 59); text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;">zulip code block in python</p><div class="codehilite zulip-code-block" data-code-language="Python" style="background-color: rgb(33, 45, 59); display: block !important; border: none !important; background-image: none !important; background-position: initial !important; background-size: initial !important; background-repeat: initial !important; background-attachment: initial !important; background-origin: initial !important; background-clip: initial !important; color: rgb(221, 222, 238); font-family: &quot;Source Sans 3&quot;, sans-serif; font-size: 14px; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial;"><pre style="padding: 5px 7px 3px; font-family: &quot;Source Code Pro&quot;, monospace; font-size: 0.825em; color: rgb(163, 206, 255); border-radius: 4px; display: block; margin: 5px 0px; line-height: 1.4; word-break: break-all; overflow-wrap: normal; white-space: pre; background-color: rgb(29, 38, 48); border: 1px solid rgba(0, 0, 0, 0.15); direction: ltr; overflow-x: auto;"><span></span><code style="font-family: &quot;Source Code Pro&quot;, monospace; font-size: inherit; unicode-bidi: embed; direction: ltr; color: rgb(163, 206, 255); white-space: inherit; padding: 0px; background-color: rgb(29, 38, 48); border: 0px rgba(0, 0, 0, 0.5); border-radius: 3px; overflow-x: scroll;"><span class="nb" style="color: rgb(239, 239, 143);">print</span><span class="p" style="color: rgb(65, 113, 113);">(</span><span class="s2" style="color: rgb(204, 147, 147);">"hello world"</span><span class="p" style="color: rgb(65, 113, 113);">)</span></code></pre></div></meta>`;
input = `<meta http-equiv="content-type" content="text/html; charset=utf-8"><p>zulip code block in python</p><div class="codehilite zulip-code-block" data-code-language="Python"><pre><span></span><code><span class="nb">print</span><span class="p">(</span><span class="s2">"hello"</span><span class="p">)</span>\n<span class="nb">print</span><span class="p">(</span><span class="s2">"world"</span><span class="p">)</span></code></pre></div></meta>`;
assert.equal(
copy_and_paste.paste_handler_converter(input),
'zulip code block in python\n\n```Python\nprint("hello world")\n```',
'zulip code block in python\n\n```Python\nprint("hello")\nprint("world")\n```',
);
// Single line in a code block