compose_paste: Paste as image for LibreOffice Calc content.

Fixes: https://chat.zulip.org/#narrow/channel/9-issues/topic/bug.3A.20pasting.20cells.20from.20libreoffice.20calc.20is.20only.20image/near/2176558.

Co-authored-by: Puneeth Chaganti <punchagan@muse-amuse.in>
Signed-off-by: apoorvapendse <apoorvavpendse@gmail.com>
This commit is contained in:
apoorvapendse
2025-05-30 18:57:38 +05:30
committed by Tim Abbott
parent 6ce089990e
commit 407b6f64c4
2 changed files with 53 additions and 0 deletions

View File

@@ -162,6 +162,38 @@ function is_from_excel(html_fragment: HTMLBodyElement): boolean {
return true;
}
// There might be some false positives while pasting only tables from
// something like LibreOffice Writer.
function is_from_libreoffice_calc(body_tag: HTMLBodyElement): boolean {
const html_tag = body_tag.parentElement;
if (!html_tag || html_tag.nodeName !== "HTML") {
return false;
}
const has_libreoffice_metadata = [...html_tag.querySelectorAll("meta")].some(
(meta) => meta.name === "generator" && meta.content.startsWith("LibreOffice"),
);
if (!has_libreoffice_metadata) {
return false;
}
// This is done to narrow the possible false positives such as pasting
// text from LibreOffice Writer, that also contains the same meta data.
if (
!body_tag ||
// Check that <body> has only one child element to avoid
// misclassification. Multiple children may appear when
// pasting mixed content from something like LibreOffice
// Writer,but Calc always pastes a single <table> element.
body_tag.children.length > 1 ||
body_tag.firstElementChild?.tagName !== "TABLE"
) {
return false;
}
return true;
}
export function is_single_image(paste_html: string): boolean {
const html_fragment = new DOMParser()
.parseFromString(paste_html, "text/html")
@@ -169,6 +201,7 @@ export function is_single_image(paste_html: string): boolean {
assert(html_fragment !== null);
return (
is_from_excel(html_fragment) ||
is_from_libreoffice_calc(html_fragment) ||
(html_fragment.childNodes.length === 1 &&
html_fragment.firstElementChild !== null &&
html_fragment.firstElementChild.nodeName === "IMG")

View File

@@ -288,6 +288,26 @@ run_test("paste_handler_converter", () => {
// Pasting from Excel using ^⇧V should paste formatted text.
assert.equal(compose_paste.paste_handler_converter(input), " \n\n$ 20.00\n\n$ 7.00");
// Pasting from LibreOffice Calc should paste an image.
input = `<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"><title></title><meta name="generator" content="LibreOffice 25.2.3.2 (Windows)"><style type="text/css"> body,div,table,thead,tbody,tfoot,tr,th,td,p { font-family:"Arial"; font-size:x-small } a.comment-indicator:hover + comment { background:#ffd; position:absolute; display:block; border:1px solid black; padding:0.5em; } a.comment-indicator { background:red; display:inline-block; border:1px solid black; width:0.5em; height:0.5em; } comment { display:none; } </style></head><body><table cellspacing="0" border="0"><colgroup span="4" width="107"></colgroup><tbody><tr><td height="24" align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;Kathleen&quot;}">Kathleen</td><td align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;Hanner&quot;}">Hanner</td><td align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;Female&quot;}">Female</td><td align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;United States&quot;}">United States</td></tr><tr><td height="24" align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;Nereida&quot;}">Nereida</td><td align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;Magwood&quot;}">Magwood</td><td align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;Female&quot;}">Female</td><td align="left" data-sheets-value="{ &quot;1&quot;: 2, &quot;2&quot;: &quot;United States&quot;}">United States</td></tr></tbody></table></body></html>`;
assert.ok(compose_paste.is_single_image(input));
// This contains three child elements inside the body tag, pasted
// from LibreOffice Writer, which is correctly classified as not an image.
input = `<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"/><title></title><meta name="generator" content="LibreOffice 25.2.3.2 (Windows)"/><style type="text/css">@page { size: 8.5in 11in; margin: 0.79in }td p { orphans: 0; widows: 0; background: transparent }p { line-height: 115%; margin-bottom: 0.1in; background: transparent }</style></head><body lang="en-US" link="#000080" vlink="#800000" dir="ltr"><p style="line-height: 100%; margin-bottom: 0in">ello world</p><table width="100%" cellpadding="0" cellspacing="0"><col width="51*"/><col width="51*"/><col width="51*"/><col width="51*"/><col width="51*"/><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>X</p></td><td width="20%" style="border: none; padding: 0in"><p>as</p></td><td width="20%" style="border: none; padding: 0in"><p>Jak</p></td><td width="20%" style="border: none; padding: 0in"><p>J</p></td><td width="20%" style="border: none; padding: 0in"><p>Nm</p></td></tr><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>,mn</p></td><td width="20%" style="border: none; padding: 0in"><p>,nnf</p></td><td width="20%" style="border: none; padding: 0in"><p>Adlk</p></td><td width="20%" style="border: none; padding: 0in"><p>Asn</p></td><td width="20%" style="border: none; padding: 0in"><p>,amns</p></td></tr><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>Nm</p></td><td width="20%" style="border: none; padding: 0in"><p>Oi</p></td><td width="20%" style="border: none; padding: 0in"><p>Poi</p></td><td width="20%" style="border: none; padding: 0in"><p>B</p></td><td width="20%" style="border: none; padding: 0in"><p>Ijo</p></td></tr><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>,mn,</p></td><td width="20%" style="border: none; padding: 0in"><p>;ih</p></td><td width="20%" style="border: none; padding: 0in"><p>Oug</p></td><td width="20%" style="border: none; padding: 0in"><p>Iu</p></td><td width="20%" style="border: none; padding: 0in"><p>G</p></td></tr><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>Ug</p></td><td width="20%" style="border: none; padding: 0in"><p>Bkjb</p></td><td width="20%" style="border: none; padding: 0in"><p>Kjbk</p></td><td width="20%" style="border: none; padding: 0in"><p>;jbj</p></td><td width="20%" style="border: none; padding: 0in"><p>;jb;</p></td></tr><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>Bkjb</p></td><td width="20%" style="border: none; padding: 0in"><p>Ugug</p></td><td width="20%" style="border: none; padding: 0in"><p>I9</p></td><td width="20%" style="border: none; padding: 0in"><p>68</p></td><td width="20%" style="border: none; padding: 0in"><p>0</p></td></tr><tr valign="top"><td width="20%" style="border: none; padding: 0in"><p>90kjb</p></td><td width="20%" style="border: none; padding: 0in"><p>,bnbiu</p></td><td width="20%" style="border: none; padding: 0in"><p>Ofif</p></td><td width="20%" style="border: none; padding: 0in"><p>P8gp</p></td><td width="20%" style="border: none; padding: 0in"><p>pugp</p></td></tr></table><p style="line-height: 100%; margin-bottom: 0in"><br/></p></body></html>`;
assert.ok(!compose_paste.is_single_image(input));
// <body> has a single child element which is not a <table> pasted
// from LibreOffice Writer should get pasted normally.
input = `<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"/><title></title><meta name="generator" content="LibreOffice 25.2.3.2 (Windows)"/><style type="text/css">@page { size: 8.5in 11in; margin: 0.79in }p { line-height: 115%; margin-bottom: 0.1in; background: transparent }</style></head><body lang="en-US" link="#000080" vlink="#800000" dir="ltr"><p style="line-height: 100%; margin-bottom: 0in">Hello world this is some random text.</p></body></html>`;
assert.ok(!compose_paste.is_single_image(input));
// A single table pasted from LibreOffice Writer is incorrectly
// detected as a LibreOffice Calc table.
// See https://github.com/zulip/zulip/pull/34752/#discussion_r2113598064
input = `<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"/><title></title><meta name="generator" content="LibreOffice 25.2.3.2 (Windows)"/><style type="text/css">@page { size: 8.5in 11in; margin: 0.79in }p { line-height: 115%; margin-bottom: 0.1in; background: transparent }</style></head><body lang="en-US" link="#000080" vlink="#800000" dir="ltr"><table width="258" cellpadding="2" cellspacing="0"><col width="83"/><col width="81"/><col width="81"/><tr valign="bottom"><td width="83" height="16" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">Melgar</font></p></td><td width="81" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">Female</font></p></td><td width="81" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">UnitedStates</font></p></td></tr><tr valign="bottom"><td width="83" height="16" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">Weiland</font></p></td><td width="81" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">Female</font></p></td><td width="81" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">UnitedStates</font></p></td></tr><tr valign="bottom"><td width="83" height="16" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">Winward</font></p></td><td width="81" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">Female</font></p></td><td width="81" style="border: none; padding: 0in"><p align="left"><font face="Arial, serif">GreatBritain</font></p></td></tr></table></body></html>`;
assert.ok(compose_paste.is_single_image(input));
// Pasting from the mac terminal
input =
'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta http-equiv="Content-Style-Type" content="text/css"><title></title><meta name="Generator" content="Cocoa HTML Writer"><meta name="CocoaVersion" content="2575.4"><style type="text/css">p.p1 {margin: 0.0px 0.0px 0.0px 0.0px; font: 11.0px Menlo; color: #000000}span.s1 {font-variant-ligatures: no-common-ligatures}</style></head><body><p class="p1"><span class="s1">insertions</span></p></body></html>';