mirror of
https://github.com/zulip/zulip.git
synced 2025-11-05 06:23:38 +00:00
Before this change, we would use **some** options relating to parsing messages, but not all of them. The reason for this was completely unintentional. It's mostly a moot point, since the server sends back pretty generic messages when you do something like invoke the "/dark" command, and the message would parse the same way whether or not the parser was looking for things like user mentions or stream links. In order to make this code predictable, I had to decide whether we do a completely vanilla parse or a full message parse. My decision now is mostly tactical. It's a trivial one-line change to just use all the options for message parsing, whereas it requires a major overhaul to allow a vanilla parse. I also predict that we will eventually want to parse these server responses as if they were messages. I doubt the zcommand responses would ever take advantage of it, but I could imagine things like nag messages wanting to use user mentions. Even if my predictions are wrong, my decisions here are pretty easy to reverse once we learn more. For the particular case of zcommands, it is puzzling to me why the server doesn't just send back HTML, but I don't want to open that can of worms yet, as that would technically be an API change. For now I am happy with the one-line fix.
575 lines
21 KiB
JavaScript
575 lines
21 KiB
JavaScript
import {isValid} from "date-fns";
|
|
import katex from "katex"; // eslint-disable-line import/no-unresolved
|
|
import _ from "lodash";
|
|
|
|
import * as fenced_code from "../shared/js/fenced_code";
|
|
import marked from "../third/marked/lib/marked";
|
|
|
|
import * as blueslip from "./blueslip";
|
|
import * as emoji from "./emoji";
|
|
import * as linkifiers from "./linkifiers";
|
|
|
|
// This contains zulip's frontend Markdown implementation; see
|
|
// docs/subsystems/markdown.md for docs on our Markdown syntax. The other
|
|
// main piece in rendering Markdown client-side is
|
|
// static/third/marked/lib/marked.js, which we have significantly
|
|
// modified from the original implementation.
|
|
|
|
// Docs: https://zulip.readthedocs.io/en/latest/subsystems/markdown.html
|
|
|
|
// This should be initialized with a struct
|
|
// similar to markdown_config.get_helpers().
|
|
// See the call to markdown.initialize() in ui_init
|
|
// for example usage.
|
|
let helpers;
|
|
|
|
// If we see preview-related syntax in our content, we will need the
|
|
// backend to render it.
|
|
const preview_regexes = [
|
|
// Inline image previews, check for contiguous chars ending in image suffix
|
|
// To keep the below regexes simple, split them out for the end-of-message case
|
|
|
|
/\S*(?:\.bmp|\.gif|\.jpg|\.jpeg|\.png|\.webp)\)?\s+/m,
|
|
/\S*(?:\.bmp|\.gif|\.jpg|\.jpeg|\.png|\.webp)\)?$/m,
|
|
|
|
// Twitter and youtube links are given previews
|
|
|
|
/\S*(?:twitter|youtube)\.com\/\S*/,
|
|
];
|
|
|
|
function contains_preview_link(content) {
|
|
return preview_regexes.some((re) => re.test(content));
|
|
}
|
|
|
|
export function translate_emoticons_to_names(text) {
|
|
// Translates emoticons in a string to their colon syntax.
|
|
let translated = text;
|
|
let replacement_text;
|
|
const terminal_symbols = ",.;?!()[] \"'\n\t"; // From composebox_typeahead
|
|
const symbols_except_space = terminal_symbols.replace(" ", "");
|
|
|
|
const emoticon_replacer = function (match, g1, offset, str) {
|
|
const prev_char = str[offset - 1];
|
|
const next_char = str[offset + match.length];
|
|
|
|
const symbol_at_start = terminal_symbols.includes(prev_char);
|
|
const symbol_at_end = terminal_symbols.includes(next_char);
|
|
const non_space_at_start = symbols_except_space.includes(prev_char);
|
|
const non_space_at_end = symbols_except_space.includes(next_char);
|
|
const valid_start = symbol_at_start || offset === 0;
|
|
const valid_end = symbol_at_end || offset === str.length - match.length;
|
|
|
|
if (non_space_at_start && non_space_at_end) {
|
|
// Hello!:)?
|
|
return match;
|
|
}
|
|
if (valid_start && valid_end) {
|
|
return replacement_text;
|
|
}
|
|
return match;
|
|
};
|
|
|
|
for (const translation of emoji.get_emoticon_translations()) {
|
|
// We can't pass replacement_text directly into
|
|
// emoticon_replacer, because emoticon_replacer is
|
|
// a callback for `replace()`. Instead we just mutate
|
|
// the `replacement_text` that the function closes on.
|
|
replacement_text = translation.replacement_text;
|
|
translated = translated.replace(translation.regex, emoticon_replacer);
|
|
}
|
|
|
|
return translated;
|
|
}
|
|
|
|
function contains_problematic_linkifier(content) {
|
|
// If a linkifier doesn't start with some specified characters
|
|
// then don't render it locally. It is workaround for the fact that
|
|
// javascript regex doesn't support lookbehind.
|
|
for (const re of linkifiers.get_linkifier_map().keys()) {
|
|
const pattern = /[^\s"'(,:<]/.source + re.source + /(?!\w)/.source;
|
|
const regex = new RegExp(pattern);
|
|
if (regex.test(content)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
export function contains_backend_only_syntax(content) {
|
|
// Try to guess whether or not a message contains syntax that only the
|
|
// backend Markdown processor can correctly handle.
|
|
// If it doesn't, we can immediately render it client-side for local echo.
|
|
return contains_preview_link(content) || contains_problematic_linkifier(content);
|
|
}
|
|
|
|
export function parse({raw_content, helper_config}) {
|
|
// Given the raw markdown content of a message (raw_content)
|
|
// we return the HTML content (content) and flags.
|
|
// Our caller passes a helper_config object that has several
|
|
// helper functions for getting info about users, streams, etc.
|
|
|
|
helpers = helper_config;
|
|
|
|
let mentioned = false;
|
|
let mentioned_group = false;
|
|
let mentioned_wildcard = false;
|
|
|
|
const options = {
|
|
userMentionHandler(mention, silently) {
|
|
if (mention === "all" || mention === "everyone" || mention === "stream") {
|
|
let classes;
|
|
let display_text;
|
|
if (silently) {
|
|
classes = "user-mention silent";
|
|
display_text = mention;
|
|
} else {
|
|
// Wildcard mention
|
|
mentioned_wildcard = true;
|
|
display_text = "@" + mention;
|
|
classes = "user-mention";
|
|
}
|
|
|
|
return `<span class="${classes}" data-user-id="*">${_.escape(display_text)}</span>`;
|
|
}
|
|
|
|
let full_name;
|
|
let user_id;
|
|
|
|
const id_regex = /^(.+)?\|(\d+)$/; // For @**user|id** and @**|id** syntax
|
|
const match = id_regex.exec(mention);
|
|
|
|
if (match) {
|
|
/*
|
|
If we have two users named Alice, we want
|
|
users to provide mentions like this:
|
|
|
|
alice|42
|
|
alice|99
|
|
|
|
The autocomplete feature will help users
|
|
send correct mentions for duplicate names,
|
|
but we also have to consider the possibility
|
|
that the user will hand-type something
|
|
incorrectly, in which case we'll fall
|
|
through to the other code (which may be a
|
|
misfeature).
|
|
*/
|
|
full_name = match[1];
|
|
user_id = Number.parseInt(match[2], 10);
|
|
|
|
if (full_name === undefined) {
|
|
// For @**|id** syntax
|
|
if (!helpers.is_valid_user_id(user_id)) {
|
|
// silently ignore invalid user id.
|
|
user_id = undefined;
|
|
} else {
|
|
full_name = helpers.get_actual_name_from_user_id(user_id);
|
|
}
|
|
} else {
|
|
// For @**user|id** syntax
|
|
if (!helpers.is_valid_full_name_and_user_id(full_name, user_id)) {
|
|
user_id = undefined;
|
|
full_name = undefined;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (user_id === undefined) {
|
|
// Handle normal syntax
|
|
full_name = mention;
|
|
user_id = helpers.get_user_id_from_name(full_name);
|
|
}
|
|
|
|
if (user_id === undefined) {
|
|
// This is nothing to be concerned about--the users
|
|
// are allowed to hand-type mentions and they may
|
|
// have had a typo in the name.
|
|
return undefined;
|
|
}
|
|
|
|
// HAPPY PATH! Note that we not only need to return the
|
|
// appropriate HTML snippet here; we also want to update
|
|
// flags on the message itself that get used by the message
|
|
// view code and possibly our filtering code.
|
|
|
|
// If I mention "@aLiCe sMITH", I still want "Alice Smith" to
|
|
// show in the pill.
|
|
let display_text = helpers.get_actual_name_from_user_id(user_id);
|
|
let classes;
|
|
if (silently) {
|
|
classes = "user-mention silent";
|
|
} else {
|
|
if (helpers.my_user_id() === user_id) {
|
|
// Personal mention of current user.
|
|
mentioned = true;
|
|
}
|
|
classes = "user-mention";
|
|
display_text = "@" + display_text;
|
|
}
|
|
|
|
return `<span class="${classes}" data-user-id="${_.escape(user_id)}">${_.escape(
|
|
display_text,
|
|
)}</span>`;
|
|
},
|
|
groupMentionHandler(name, silently) {
|
|
const group = helpers.get_user_group_from_name(name);
|
|
if (group !== undefined) {
|
|
let display_text;
|
|
let classes;
|
|
if (silently) {
|
|
display_text = group.name;
|
|
classes = "user-group-mention silent";
|
|
} else {
|
|
display_text = "@" + group.name;
|
|
classes = "user-group-mention";
|
|
if (helpers.is_member_of_user_group(group.id, helpers.my_user_id())) {
|
|
// Mentioned the current user's group.
|
|
mentioned_group = true;
|
|
}
|
|
}
|
|
|
|
return `<span class="${classes}" data-user-group-id="${_.escape(
|
|
group.id,
|
|
)}">${_.escape(display_text)}</span>`;
|
|
}
|
|
return undefined;
|
|
},
|
|
silencedMentionHandler(quote) {
|
|
// Silence quoted mentions.
|
|
quote = quote.replace(
|
|
/(<span class="user-mention)(" data-user-id="(\d+|\*)">)@/g,
|
|
"$1 silent$2",
|
|
);
|
|
|
|
// Silence quoted user group mentions.
|
|
quote = quote.replace(
|
|
/(<span class="user-group-mention)(" data-user-group-id="\d+">)@/g,
|
|
"$1 silent$2",
|
|
);
|
|
|
|
// In most cases, if you are being mentioned in the message you're quoting, you wouldn't
|
|
// mention yourself outside of the blockquote (and, above it). If that you do that, the
|
|
// following mentioned status is false; the backend rendering is authoritative and the
|
|
// only side effect is the lack red flash on immediately sending the message.
|
|
//
|
|
// A better parser would be able to just ignore mentions
|
|
// inside; we just set all flags to False and let the
|
|
// server rendering correct the message flags, to avoid a
|
|
// flash of mention styling.
|
|
mentioned = false;
|
|
mentioned_group = false;
|
|
mentioned_wildcard = false;
|
|
return quote;
|
|
},
|
|
};
|
|
|
|
// Our Python-Markdown processor appends two \n\n to input
|
|
const content = marked(raw_content + "\n\n", options).trim();
|
|
|
|
// Simulate message flags for our locally rendered
|
|
// message. Messages the user themselves sent via the browser are
|
|
// always marked as read.
|
|
const flags = ["read"];
|
|
if (mentioned || mentioned_group) {
|
|
flags.push("mentioned");
|
|
}
|
|
if (mentioned_wildcard) {
|
|
flags.push("wildcard_mentioned");
|
|
}
|
|
|
|
return {content, flags};
|
|
}
|
|
|
|
export function add_topic_links(message) {
|
|
if (message.type !== "stream") {
|
|
message.topic_links = [];
|
|
return;
|
|
}
|
|
const topic = message.topic;
|
|
const links = [];
|
|
|
|
for (const [pattern, url] of linkifiers.get_linkifier_map().entries()) {
|
|
let match;
|
|
while ((match = pattern.exec(topic)) !== null) {
|
|
let link_url = url;
|
|
const matched_groups = match.slice(1);
|
|
let i = 0;
|
|
while (i < matched_groups.length) {
|
|
const matched_group = matched_groups[i];
|
|
const current_group = i + 1;
|
|
const back_ref = "\\" + current_group;
|
|
link_url = link_url.replace(back_ref, matched_group);
|
|
i += 1;
|
|
}
|
|
// We store the starting index as well, to sort the order of occurrence of the links
|
|
// in the topic, similar to the logic implemented in zerver/lib/markdown/__init__.py
|
|
links.push({url: link_url, text: match[0], index: topic.indexOf(match[0])});
|
|
}
|
|
}
|
|
|
|
// Also make raw URLs navigable
|
|
const url_re = /\b(https?:\/\/[^\s<]+[^\s"'),.:;<\]])/g; // Slightly modified from third/marked.js
|
|
const matches = topic.match(url_re);
|
|
if (matches) {
|
|
for (const match of matches) {
|
|
links.push({url: match, text: match, index: topic.indexOf(match)});
|
|
}
|
|
}
|
|
links.sort((a, b) => a.index - b.index);
|
|
for (const match of links) {
|
|
delete match.index;
|
|
}
|
|
message.topic_links = links;
|
|
}
|
|
|
|
export function is_status_message(raw_content) {
|
|
return raw_content.startsWith("/me ");
|
|
}
|
|
|
|
function make_emoji_span(codepoint, title, alt_text) {
|
|
return `<span aria-label="${_.escape(title)}" class="emoji emoji-${_.escape(
|
|
codepoint,
|
|
)}" role="img" title="${_.escape(title)}">${_.escape(alt_text)}</span>`;
|
|
}
|
|
|
|
function handleUnicodeEmoji(unicode_emoji) {
|
|
const codepoint = unicode_emoji.codePointAt(0).toString(16);
|
|
const emoji_name = emoji.get_emoji_name(codepoint);
|
|
|
|
if (emoji_name) {
|
|
const alt_text = ":" + emoji_name + ":";
|
|
const title = emoji_name.replace(/_/g, " ");
|
|
return make_emoji_span(codepoint, title, alt_text);
|
|
}
|
|
|
|
return unicode_emoji;
|
|
}
|
|
|
|
function handleEmoji(emoji_name) {
|
|
const alt_text = ":" + emoji_name + ":";
|
|
const title = emoji_name.replace(/_/g, " ");
|
|
|
|
// Zulip supports both standard/Unicode emoji, served by a
|
|
// spritesheet and custom realm-specific emoji (served by URL).
|
|
// We first check if this is a realm emoji, and if so, render it.
|
|
//
|
|
// Otherwise we'll look at Unicode emoji to render with an emoji
|
|
// span using the spritesheet; and if it isn't one of those
|
|
// either, we pass through the plain text syntax unmodified.
|
|
const emoji_url = emoji.get_realm_emoji_url(emoji_name);
|
|
|
|
if (emoji_url) {
|
|
return `<img alt="${_.escape(alt_text)}" class="emoji" src="${_.escape(
|
|
emoji_url,
|
|
)}" title="${_.escape(title)}">`;
|
|
}
|
|
|
|
const codepoint = emoji.get_emoji_codepoint(emoji_name);
|
|
if (codepoint) {
|
|
return make_emoji_span(codepoint, title, alt_text);
|
|
}
|
|
|
|
return alt_text;
|
|
}
|
|
|
|
function handleLinkifier(pattern, matches) {
|
|
let url = linkifiers.get_linkifier_map().get(pattern);
|
|
|
|
let current_group = 1;
|
|
|
|
for (const match of matches) {
|
|
const back_ref = "\\" + current_group;
|
|
url = url.replace(back_ref, match);
|
|
current_group += 1;
|
|
}
|
|
|
|
return url;
|
|
}
|
|
|
|
function handleTimestamp(time) {
|
|
let timeobject;
|
|
if (Number.isNaN(Number(time))) {
|
|
timeobject = new Date(time); // not a Unix timestamp
|
|
} else {
|
|
// JavaScript dates are in milliseconds, Unix timestamps are in seconds
|
|
timeobject = new Date(time * 1000);
|
|
}
|
|
|
|
const escaped_time = _.escape(time);
|
|
if (!isValid(timeobject)) {
|
|
// Unsupported time format: rerender accordingly.
|
|
|
|
// We do not show an error on these formats in local echo because
|
|
// there is a chance that the server would interpret it successfully
|
|
// and if it does, the jumping from the error message to a rendered
|
|
// timestamp doesn't look good.
|
|
return `<span>${escaped_time}</span>`;
|
|
}
|
|
|
|
// Use html5 <time> tag for valid timestamps.
|
|
// render time without milliseconds.
|
|
const escaped_isotime = _.escape(timeobject.toISOString().split(".")[0] + "Z");
|
|
return `<time datetime="${escaped_isotime}">${escaped_time}</time>`;
|
|
}
|
|
|
|
function handleStream(stream_name) {
|
|
const stream = helpers.get_stream_by_name(stream_name);
|
|
if (stream === undefined) {
|
|
return undefined;
|
|
}
|
|
const href = helpers.stream_hash(stream.stream_id);
|
|
return `<a class="stream" data-stream-id="${_.escape(stream.stream_id)}" href="/${_.escape(
|
|
href,
|
|
)}">#${_.escape(stream.name)}</a>`;
|
|
}
|
|
|
|
function handleStreamTopic(stream_name, topic) {
|
|
const stream = helpers.get_stream_by_name(stream_name);
|
|
if (stream === undefined || !topic) {
|
|
return undefined;
|
|
}
|
|
const href = helpers.stream_topic_hash(stream.stream_id, topic);
|
|
const text = `#${stream.name} > ${topic}`;
|
|
return `<a class="stream-topic" data-stream-id="${_.escape(
|
|
stream.stream_id,
|
|
)}" href="/${_.escape(href)}">${_.escape(text)}</a>`;
|
|
}
|
|
|
|
function handleTex(tex, fullmatch) {
|
|
try {
|
|
return katex.renderToString(tex);
|
|
} catch (error) {
|
|
if (error.message.startsWith("KaTeX parse error")) {
|
|
// TeX syntax error
|
|
return `<span class="tex-error">${_.escape(fullmatch)}</span>`;
|
|
}
|
|
blueslip.error(error);
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
export function set_linkifier_regexes(regexes) {
|
|
// This needs to be called any time we modify our linkifier regexes,
|
|
// until we find a less clumsy way to handle this.
|
|
marked.InlineLexer.rules.zulip.linkifiers = regexes;
|
|
}
|
|
|
|
export function setup() {
|
|
// Once we focus on supporting other platforms such as mobile,
|
|
// we will export this function.
|
|
|
|
function disable_markdown_regex(rules, name) {
|
|
rules[name] = {
|
|
exec() {
|
|
return false;
|
|
},
|
|
};
|
|
}
|
|
|
|
// Configure the marked Markdown parser for our usage
|
|
const r = new marked.Renderer();
|
|
|
|
// No <code> around our code blocks instead a codehilite <div> and disable
|
|
// class-specific highlighting.
|
|
r.code = (code) => fenced_code.wrap_code(code) + "\n\n";
|
|
|
|
// Prohibit empty links for some reason.
|
|
const old_link = r.link;
|
|
r.link = (href, title, text) => old_link.call(r, href, title, text.trim() ? text : href);
|
|
|
|
// Put a newline after a <br> in the generated HTML to match Markdown
|
|
r.br = function () {
|
|
return "<br>\n";
|
|
};
|
|
|
|
function preprocess_code_blocks(src) {
|
|
return fenced_code.process_fenced_code(src);
|
|
}
|
|
|
|
function preprocess_translate_emoticons(src) {
|
|
if (!helpers.should_translate_emoticons()) {
|
|
return src;
|
|
}
|
|
|
|
// In this scenario, the message has to be from the user, so the only
|
|
// requirement should be that they have the setting on.
|
|
return translate_emoticons_to_names(src);
|
|
}
|
|
|
|
// Disable headings
|
|
// We only keep the # Heading format.
|
|
disable_markdown_regex(marked.Lexer.rules.tables, "lheading");
|
|
|
|
// Disable __strong__ (keeping **strong**)
|
|
marked.InlineLexer.rules.zulip.strong = /^\*\*([\S\s]+?)\*\*(?!\*)/;
|
|
|
|
// Make sure <del> syntax matches the backend processor
|
|
marked.InlineLexer.rules.zulip.del = /^(?!<~)~~([^~]+)~~(?!~)/;
|
|
|
|
// Disable _emphasis_ (keeping *emphasis*)
|
|
// Text inside ** must start and end with a word character
|
|
// to prevent mis-parsing things like "char **x = (char **)y"
|
|
marked.InlineLexer.rules.zulip.em = /^\*(?!\s+)((?:\*\*|[\S\s])+?)(\S)\*(?!\*)/;
|
|
|
|
// Disable autolink as (a) it is not used in our backend and (b) it interferes with @mentions
|
|
disable_markdown_regex(marked.InlineLexer.rules.zulip, "autolink");
|
|
|
|
// Tell our fenced code preprocessor how to insert arbitrary
|
|
// HTML into the output. This generated HTML is safe to not escape
|
|
fenced_code.set_stash_func((html) => marked.stashHtml(html, true));
|
|
|
|
marked.setOptions({
|
|
gfm: true,
|
|
tables: true,
|
|
breaks: true,
|
|
pedantic: false,
|
|
sanitize: true,
|
|
smartLists: true,
|
|
smartypants: false,
|
|
zulip: true,
|
|
emojiHandler: handleEmoji,
|
|
linkifierHandler: handleLinkifier,
|
|
unicodeEmojiHandler: handleUnicodeEmoji,
|
|
streamHandler: handleStream,
|
|
streamTopicHandler: handleStreamTopic,
|
|
texHandler: handleTex,
|
|
timestampHandler: handleTimestamp,
|
|
renderer: r,
|
|
preprocessors: [preprocess_code_blocks, preprocess_translate_emoticons],
|
|
});
|
|
}
|
|
|
|
// NOTE: Everything below this line is likely to be webapp-specific
|
|
// and won't be used by future platforms such as mobile.
|
|
// We may eventually move this code to a new file, but we want
|
|
// to wait till the dust settles a bit on some other changes first.
|
|
|
|
let webapp_helpers;
|
|
|
|
export function initialize(helper_config) {
|
|
// This is generally only intended to be called by the webapp. Most
|
|
// other platforms should call setup().
|
|
webapp_helpers = helper_config;
|
|
helpers = helper_config;
|
|
setup();
|
|
}
|
|
|
|
export function apply_markdown(message) {
|
|
// This is generally only intended to be called by the webapp. Most
|
|
// other platforms should call parse().
|
|
const raw_content = message.raw_content;
|
|
const {content, flags} = parse({raw_content, helper_config: webapp_helpers});
|
|
message.content = content;
|
|
message.flags = flags;
|
|
message.is_me_message = is_status_message(raw_content);
|
|
}
|
|
|
|
export function parse_non_message(raw_content) {
|
|
// Occasionally we get markdown from the server that is not technically
|
|
// a message, but we want to convert it to HTML. Note that we parse
|
|
// raw_content exactly as if it were a Zulip message, so we will
|
|
// handle things like mentions, stream links, and linkifiers.
|
|
return parse({raw_content, helper_config: webapp_helpers}).content;
|
|
}
|