Files
zulip/zerver/data_import/hipchat_attachment.py
Anders Kaseorg 365fe0b3d5 python: Sort imports with isort.
Fixes #2665.

Regenerated by tabbott with `lint --fix` after a rebase and change in
parameters.

Note from tabbott: In a few cases, this converts technical debt in the
form of unsorted imports into different technical debt in the form of
our largest files having very long, ugly import sequences at the
start.  I expect this change will increase pressure for us to split
those files, which isn't a bad thing.

Signed-off-by: Anders Kaseorg <anders@zulip.com>
2020-06-11 16:45:32 -07:00

137 lines
4.5 KiB
Python

import logging
import os
import shutil
from typing import Any, Dict, List, Optional
from zerver.data_import.import_util import build_attachment, create_converted_data_files
class AttachmentHandler:
def __init__(self) -> None:
self.info_dict: Dict[str, Dict[str, Any]] = dict()
def handle_message_data(self,
realm_id: int,
message_id: int,
sender_id: int,
attachment: Dict[str, Any],
files_dir: str) -> Optional[str]:
if not attachment:
return None
name = attachment['name']
if 'path' not in attachment:
logging.info('Skipping HipChat attachment with missing path data: ' + name)
return None
size = attachment['size']
path = attachment['path']
local_fn = os.path.join(files_dir, path)
if not os.path.exists(local_fn):
# HipChat has an option to not include these in its
# exports, since file uploads can be very large.
logging.info('Skipping attachment with no file data: ' + local_fn)
return None
target_path = os.path.join(
str(realm_id),
'HipChatImportAttachment',
path,
)
if target_path in self.info_dict:
logging.info("file used multiple times: " + path)
info = self.info_dict[target_path]
info['message_ids'].add(message_id)
return info['content']
# HipChat provides size info, but it's not
# completely trustworthy, so we we just
# ask the OS for file details.
size = os.path.getsize(local_fn)
mtime = os.path.getmtime(local_fn)
content = f'[{name}](/user_uploads/{target_path})'
info = dict(
message_ids={message_id},
sender_id=sender_id,
local_fn=local_fn,
target_path=target_path,
name=name,
size=size,
mtime=mtime,
content=content,
)
self.info_dict[target_path] = info
return content
def write_info(self, output_dir: str, realm_id: int) -> None:
attachments: List[Dict[str, Any]] = []
uploads_records: List[Dict[str, Any]] = []
def add_attachment(info: Dict[str, Any]) -> None:
build_attachment(
realm_id=realm_id,
message_ids=info['message_ids'],
user_id=info['sender_id'],
fileinfo=dict(
created=info['mtime'], # minor lie
size=info['size'],
name=info['name'],
),
s3_path=info['target_path'],
zerver_attachment=attachments,
)
def add_upload(info: Dict[str, Any]) -> None:
target_path = info['target_path']
upload_rec = dict(
size=info['size'],
user_profile_id=info['sender_id'],
realm_id=realm_id,
s3_path=target_path,
path=target_path,
content_type=None,
)
uploads_records.append(upload_rec)
def make_full_target_path(info: Dict[str, Any]) -> str:
target_path = info['target_path']
full_target_path = os.path.join(
output_dir,
'uploads',
target_path,
)
full_target_path = os.path.abspath(full_target_path)
os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
return full_target_path
def copy_file(info: Dict[str, Any]) -> None:
source_path = info['local_fn']
target_path = make_full_target_path(info)
shutil.copyfile(source_path, target_path)
logging.info('Start processing attachment files')
for info in self.info_dict.values():
add_attachment(info)
add_upload(info)
copy_file(info)
uploads_folder = os.path.join(output_dir, 'uploads')
os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
attachment = dict(
zerver_attachment=attachments,
)
create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
create_converted_data_files(attachment, output_dir, '/attachment.json')
logging.info('Done processing attachment files')