Files
zulip/zerver/webhooks/jotform/view.py
Varun-Kolanu 93913dd6e7 integrations: Support colon and comma characters in jotform payloads.
The payload might contain colons and commas within keys and
values entered by the user. Simply splitting the fields using
", " is therefore unreliable and can lead to incorrect parsing.
Hence, this commit updates the logic for splitting fields in
the pretty data of the Jotform payload.

Fixes #32461.
2025-07-08 11:00:06 -07:00

170 lines
6.2 KiB
Python

# Webhooks for external integrations.
import json
from typing import Any
from urllib.parse import unquote
from django.http import HttpRequest, HttpResponse
from django.utils.translation import gettext as _
from zerver.decorator import webhook_view
from zerver.lib.exceptions import JsonableError
from zerver.lib.response import json_success
from zerver.lib.typed_endpoint import typed_endpoint_without_parameters
from zerver.lib.webhooks.common import check_send_webhook_message
from zerver.models import UserProfile
def format_uploaded_file_links(file_question_value: str, file_url_mappings: dict[str, str]) -> str:
words = file_question_value.split()
url_markdown_list = []
current_file_name_words = []
for word in words:
current_file_name_words.append(word)
# Greedy approach to find the filenames in file_url_mappings.
potential_file_name = " ".join(current_file_name_words)
if potential_file_name in file_url_mappings:
file_url = file_url_mappings.get(potential_file_name)
url_markdown_list.append(f"[{potential_file_name}]({file_url})")
current_file_name_words = []
return ", ".join(url_markdown_list)
def is_non_file_upload_question_with_response(field: str, raw_request: dict[str, Any]) -> bool:
"""
File upload questions are in raw_request["temp_upload"].
So, the other questions are the keys in raw_request starting
with "q". We exclude any questions with a zero length value,
because they are not in the "pretty" data of the payload.
"""
field_value = raw_request.get(field)
return bool(
field
and field[0] == "q"
and field_value
and (
isinstance(field_value, str)
or (
isinstance(field_value, dict)
# Non-empty Appointment, Date fields
and "" not in field_value.values()
# Non-empty Input Table field
and not (
len(field_value) == 2 and "colIds" in field_value and "rowIds" in field_value
)
)
# Multiple choice questions
or isinstance(field_value, list)
)
)
def format_value(value: str | list[str] | dict[str, str]) -> str:
if isinstance(value, str):
return value
elif isinstance(value, list):
return " ".join(value)
return " ".join(value.values())
def get_pretty_fields(pretty: str, values: list[str]) -> list[tuple[str, str]]:
"""
The format of "pretty" data in the payload is "key1:value1, key2:value2, ..."
The parameter "values" contains all substrings that appear as values
in the pretty data. Extracts each key-value pair by locating the position
of each value in the string and separating the corresponding key and value,
by tracking the starting position of the next key-value pair.
This approach is necessary because keys and values may contain colons
or commas, making it unreliable to simply split the string using ', '.
Returns a list of (key, value) pairs to preserve order and allow duplicate keys.
"""
pretty_fields = []
pair_starting_index = 0
for value in values:
value_length = len(value)
possible_match_index = pretty.find(value + ":", pair_starting_index)
# If the current pair starts with a key that is equal to the value
if possible_match_index == pair_starting_index:
key = value
val = value
# 3 characters for colon, comma and space.
pair_starting_index += 2 * value_length + 3
else:
value_occurence_index = pretty.find(value, pair_starting_index)
value_ending_index = value_occurence_index + value_length
# -1 for colon, separating key and value
key = pretty[pair_starting_index : value_occurence_index - 1]
val = pretty[value_occurence_index:value_ending_index]
# 2 characters for comma and space, which separate the fields
pair_starting_index = value_ending_index + 2
pretty_fields.append((key, val))
return pretty_fields
@webhook_view("Jotform")
@typed_endpoint_without_parameters
def api_jotform_webhook(
request: HttpRequest,
user_profile: UserProfile,
) -> HttpResponse:
payload = request.POST
topic_name = payload.get("formTitle")
raw_request = json.loads(payload.get("rawRequest", "{}"))
non_file_upload_values_in_pretty_data = [
format_value(value)
for field, value in raw_request.items()
if is_non_file_upload_question_with_response(field, raw_request)
]
url_mappings = {}
file_values_in_pretty_data = []
if raw_request.get("temp_upload"):
upload_keys = raw_request.get("temp_upload").keys()
file_questions_keys = [key.split("_", 1)[-1] for key in upload_keys]
file_values_in_pretty_data = [
" ".join(unquote(url.split("/")[-1]) for url in raw_request.get(key, []))
for key in file_questions_keys
]
url_mappings = {
unquote(url.split("/")[-1]): url
for key in file_questions_keys
for url in raw_request.get(key, [])
}
# Non file values come earlier than file values in the "pretty" data of the payload.
pretty_values = non_file_upload_values_in_pretty_data + list(file_values_in_pretty_data)
pretty_fields = get_pretty_fields(payload.get("pretty", ""), pretty_values)
if not topic_name or not pretty_fields:
raise JsonableError(_("Unable to handle Jotform payload"))
form_response = ""
for index, (label, value) in enumerate(pretty_fields):
separator = " " if label.endswith("?") else ": "
# File upload fields are last in the "pretty" payload data.
if index >= len(non_file_upload_values_in_pretty_data):
value = format_uploaded_file_links(value, url_mappings)
# We add a new line so that image files are rendered
# correctly in the message.
form_response += "\n"
form_response += f"* **{label}**{separator}{value}\n"
message = form_response.strip()
check_send_webhook_message(request, user_profile, topic_name, message)
return json_success(request)