diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 27d33c8..8ae6e02 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -10,7 +10,6 @@ on: branches: ["main"] workflow_dispatch: env: - GHCR_IMAGE: ghcr.io/c4illin/convertx IMAGE_NAME: ${{ github.repository }} DOCKERHUB_USERNAME: c4illin @@ -53,11 +52,15 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 + - name: downcase REPO + run: | + echo "REPO=${GITHUB_REPOSITORY@L}" >> "${GITHUB_ENV}" + - name: Docker meta default id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.GHCR_IMAGE }} + images: ghcr.io/${{ env.REPO }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -82,7 +85,7 @@ jobs: platforms: ${{ matrix.platform }} labels: ${{ steps.meta.outputs.labels }} annotations: ${{ steps.meta.outputs.annotations }} - outputs: type=image,name=${{ env.GHCR_IMAGE }},push-by-digest=true,name-canonical=true,push=true,oci-mediatypes=true + outputs: type=image,name=ghcr.io/${{ env.REPO }},push-by-digest=true,name-canonical=true,push=true,oci-mediatypes=true cache-from: type=gha,scope=${{ matrix.platform }} cache-to: type=gha,mode=max,scope=${{ matrix.platform }} @@ -119,12 +122,16 @@ jobs: pattern: digests-* merge-multiple: true + - name: downcase REPO + run: | + echo "REPO=${GITHUB_REPOSITORY@L}" >> "${GITHUB_ENV}" + - name: Extract Docker metadata id: meta uses: docker/metadata-action@v5 with: images: | - ${{ env.GHCR_IMAGE }} + ghcr.io/${{ env.REPO }} ${{ env.IMAGE_NAME }} - name: Set up Docker Buildx @@ -157,8 +164,8 @@ jobs: --annotation='index:org.opencontainers.image.created=${{ steps.timestamp.outputs.timestamp }}' \ --annotation='index:org.opencontainers.image.url=${{ github.event.repository.url }}' \ --annotation='index:org.opencontainers.image.source=${{ github.event.repository.url }}' \ - $(printf '${{ env.GHCR_IMAGE }}@sha256:%s ' *) + $(printf 'ghcr.io/${{ env.REPO }}@sha256:%s ' *) - name: Inspect image run: | - docker buildx imagetools inspect '${{ env.GHCR_IMAGE }}:${{ steps.meta.outputs.version }}' + docker buildx imagetools inspect 'ghcr.io/${{ env.REPO }}:${{ steps.meta.outputs.version }}' diff --git a/Dockerfile b/Dockerfile index 1e8f424..306c5bd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,8 +56,10 @@ RUN apt-get update && apt-get install -y \ inkscape \ libheif-examples \ libjxl-tools \ + libreoffice \ libva2 \ libvips-tools \ + libemail-outlook-message-perl \ lmodern \ mupdf-tools \ pandoc \ diff --git a/README.md b/README.md index 23bfa76..0d65690 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ services: - "3000:3000" environment: - JWT_SECRET=aLongAndSecretStringUsedToSignTheJSONWebToken1234 # will use randomUUID() if unset + # - HTTP_ALLOWED=true # uncomment this if accessing it over a non-https connection volumes: - ./data:/app/data ``` @@ -80,17 +81,19 @@ If you get unable to open database file run `chown -R $USER:$USER path` on the p All are optional, JWT_SECRET is recommended to be set. -| Name | Default | Description | -| ------------------------- | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | -| JWT_SECRET | when unset it will use the value from randomUUID() | A long and secret string used to sign the JSON Web Token | -| ACCOUNT_REGISTRATION | false | Allow users to register accounts | -| HTTP_ALLOWED | false | Allow HTTP connections, only set this to true locally | -| ALLOW_UNAUTHENTICATED | false | Allow unauthenticated users to use the service, only set this to true locally | -| AUTO_DELETE_EVERY_N_HOURS | 24 | Checks every n hours for files older then n hours and deletes them, set to 0 to disable | -| WEBROOT | | The address to the root path setting this to "/convert" will serve the website on "example.com/convert/" | -| FFMPEG_ARGS | | Arguments to pass to ffmpeg, e.g. `-preset veryfast` | -| HIDE_HISTORY | false | Hide the history page | -| LANGUAGE | en | Language to format date strings in, specified as a [BCP 47 language tag](https://en.wikipedia.org/wiki/IETF_language_tag) | +| Name | Default | Description | +| ---------------------------- | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | +| JWT_SECRET | when unset it will use the value from randomUUID() | A long and secret string used to sign the JSON Web Token | +| ACCOUNT_REGISTRATION | false | Allow users to register accounts | +| HTTP_ALLOWED | false | Allow HTTP connections, only set this to true locally | +| ALLOW_UNAUTHENTICATED | false | Allow unauthenticated users to use the service, only set this to true locally | +| AUTO_DELETE_EVERY_N_HOURS | 24 | Checks every n hours for files older then n hours and deletes them, set to 0 to disable | +| WEBROOT | | The address to the root path setting this to "/convert" will serve the website on "example.com/convert/" | +| FFMPEG_ARGS | | Arguments to pass to ffmpeg, e.g. `-preset veryfast` | +| HIDE_HISTORY | false | Hide the history page | +| LANGUAGE | en | Language to format date strings in, specified as a [BCP 47 language tag](https://en.wikipedia.org/wiki/IETF_language_tag) | +| UNAUTHENTICATED_USER_SHARING | false | Shares conversion history between all unauthenticated users | + ### Docker images diff --git a/bun.lock b/bun.lock index ec30c1f..4bdeb22 100644 --- a/bun.lock +++ b/bun.lock @@ -10,6 +10,7 @@ "@kitajs/html": "^4.2.9", "elysia": "^1.3.4", "sanitize-filename": "^1.6.3", + "tar": "^7.4.3", }, "devDependencies": { "@eslint/js": "^9.28.0", diff --git a/compose.yaml b/compose.yaml index e129451..5b26ff8 100644 --- a/compose.yaml +++ b/compose.yaml @@ -15,5 +15,6 @@ services: # - WEBROOT=/convertx # the root path of the web interface, leave empty to disable # - HIDE_HISTORY=true # hides the history tab in the web interface, defaults to false - TZ=Europe/Stockholm # set your timezone, defaults to UTC + # - UNAUTHENTICATED_USER_SHARING=true # for use with ALLOW_UNAUTHENTICATED=true to share history with all unauthenticated users / devices ports: - 3000:3000 diff --git a/package.json b/package.json index e53b75a..571ad9f 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,8 @@ "@elysiajs/static": "^1.3.0", "@kitajs/html": "^4.2.9", "elysia": "^1.3.4", - "sanitize-filename": "^1.6.3" + "sanitize-filename": "^1.6.3", + "tar": "^7.4.3" }, "module": "src/index.tsx", "type": "module", diff --git a/public/results.js b/public/results.js index 7659edb..b125e93 100644 --- a/public/results.js +++ b/public/results.js @@ -1,18 +1,4 @@ const webroot = document.querySelector("meta[name='webroot']").content; - -window.downloadAll = function () { - // Get all download links - const downloadLinks = document.querySelectorAll("a[download]"); - - // Trigger download for each link - downloadLinks.forEach((link, index) => { - // We add a delay for each download to prevent them from starting at the same time - setTimeout(() => { - const event = new MouseEvent("click"); - link.dispatchEvent(event); - }, index * 100); - }); -}; const jobId = window.location.pathname.split("/").pop(); const main = document.querySelector("main"); let progressElem = document.querySelector("progress"); diff --git a/src/converters/imagemagick.ts b/src/converters/imagemagick.ts index 4122a24..919308b 100644 --- a/src/converters/imagemagick.ts +++ b/src/converters/imagemagick.ts @@ -460,6 +460,13 @@ export function convert( } } + // Handle EMF files specifically to avoid LibreOffice delegate issues + if (fileType === "emf") { + // Use direct conversion without delegates for EMF files + inputArgs.push("-define", "emf:delegate=false", "-density", "300"); + outputArgs.push("-background", "white", "-alpha", "remove"); + } + return new Promise((resolve, reject) => { execFile( "magick", diff --git a/src/converters/libreoffice.ts b/src/converters/libreoffice.ts new file mode 100644 index 0000000..d5c47af --- /dev/null +++ b/src/converters/libreoffice.ts @@ -0,0 +1,176 @@ +import { execFile } from "node:child_process"; + +export const properties = { + from: { + text: [ + "602", + "abw", + "csv", + "cwk", + "doc", + "docm", + "docx", + "dot", + "dotx", + "dotm", + "epub", + "fb2", + "fodt", + "htm", + "html", + "hwp", + "mcw", + "mw", + "mwd", + "lwp", + "lrf", + "odt", + "ott", + "pages", + "pdf", + "psw", + "rtf", + "sdw", + "stw", + "sxw", + "tab", + "tsv", + "txt", + "wn", + "wpd", + "wps", + "wpt", + "wri", + "xhtml", + "xml", + "zabw", + ], + }, + to: { + text: [ + "csv", + "doc", + "docm", + "docx", + "dot", + "dotx", + "dotm", + "epub", + "fodt", + "htm", + "html", + "odt", + "ott", + "pdf", + "rtf", + "tab", + "tsv", + "txt", + "wps", + "wpt", + "xhtml", + "xml", + ], + }, +}; + +type FileCategories = "text" | "calc"; + +const filters: Record> = { + text: { + "602": "T602Document", + abw: "AbiWord", + csv: "Text", + doc: "MS Word 97", + docm: "MS Word 2007 XML VBA", + docx: "MS Word 2007 XML", + dot: "MS Word 97 Vorlage", + dotx: "MS Word 2007 XML Template", + dotm: "MS Word 2007 XML Template", + epub: "EPUB", + fb2: "Fictionbook 2", + fodt: "OpenDocument Text Flat XML", + htm: "HTML (StarWriter)", + html: "HTML (StarWriter)", + hwp: "writer_MIZI_Hwp_97", + mcw: "MacWrite", + mw: "MacWrite", + mwd: "Mariner_Write", + lwp: "LotusWordPro", + lrf: "BroadBand eBook", + odt: "writer8", + ott: "writer8_template", + pages: "Apple Pages", + // pdf: "writer_pdf_import", + psw: "PocketWord File", + rtf: "Rich Text Format", + sdw: "StarOffice_Writer", + stw: "writer_StarOffice_XML_Writer_Template", + sxw: "StarOffice XML (Writer)", + tab: "Text", + tsv: "Text", + txt: "Text", + wn: "WriteNow", + wpd: "WordPerfect", + wps: "MS Word 97", + wpt: "MS Word 97 Vorlage", + wri: "MS_Write", + xhtml: "HTML (StarWriter)", + xml: "OpenDocument Text Flat XML", + zabw: "AbiWord", + }, + calc: {}, +}; + +const getFilters = (fileType: string, converto: string) => { + if (fileType in filters.text && converto in filters.text) { + return [filters.text[fileType], filters.text[converto]]; + } else if (fileType in filters.calc && converto in filters.calc) { + return [filters.calc[fileType], filters.calc[converto]]; + } + return [null, null]; +}; + +export function convert( + filePath: string, + fileType: string, + convertTo: string, + targetPath: string, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + options?: unknown, +): Promise { + const outputPath = targetPath.split("/").slice(0, -1).join("/").replace("./", "") ?? targetPath; + + // Build arguments array + const args: string[] = []; + args.push("--headless"); + const [inFilter, outFilter] = getFilters(fileType, convertTo); + + if (inFilter) { + args.push(`--infilter="${inFilter}"`); + } + + if (outFilter) { + args.push("--convert-to", `${convertTo}:${outFilter}`, "--outdir", outputPath, filePath); + } else { + args.push("--convert-to", convertTo, "--outdir", outputPath, filePath); + } + + return new Promise((resolve, reject) => { + execFile("soffice", args, (error, stdout, stderr) => { + if (error) { + reject(`error: ${error}`); + } + + if (stdout) { + console.log(`stdout: ${stdout}`); + } + + if (stderr) { + console.error(`stderr: ${stderr}`); + } + + resolve("Done"); + }); + }); +} diff --git a/src/converters/main.ts b/src/converters/main.ts index a9f2e42..63106ff 100644 --- a/src/converters/main.ts +++ b/src/converters/main.ts @@ -1,4 +1,6 @@ -import { normalizeFiletype } from "../helpers/normalizeFiletype"; +import db from "../db/db"; +import { MAX_CONVERT_PROCESS } from "../helpers/env"; +import { normalizeFiletype, normalizeOutputFiletype } from "../helpers/normalizeFiletype"; import { convert as convertassimp, properties as propertiesassimp } from "./assimp"; import { convert as convertCalibre, properties as propertiesCalibre } from "./calibre"; import { convert as convertDvisvgm, properties as propertiesDvisvgm } from "./dvisvgm"; @@ -11,6 +13,8 @@ import { convert as convertImagemagick, properties as propertiesImagemagick } fr import { convert as convertInkscape, properties as propertiesInkscape } from "./inkscape"; import { convert as convertLibheif, properties as propertiesLibheif } from "./libheif"; import { convert as convertLibjxl, properties as propertiesLibjxl } from "./libjxl"; +import { convert as convertLibreOffice, properties as propertiesLibreOffice } from "./libreoffice"; +import { convert as convertMsgconvert, properties as propertiesMsgconvert } from "./msgconvert"; import { convert as convertPandoc, properties as propertiesPandoc } from "./pandoc"; import { convert as convertPotrace, properties as propertiesPotrace } from "./potrace"; import { convert as convertresvg, properties as propertiesresvg } from "./resvg"; @@ -47,6 +51,11 @@ const properties: Record< ) => unknown; } > = { + // Prioritize Inkscape for EMF files as it handles them better than ImageMagick + inkscape: { + properties: propertiesInkscape, + converter: convertInkscape, + }, libjxl: { properties: propertiesLibjxl, converter: convertLibjxl, @@ -71,10 +80,18 @@ const properties: Record< properties: propertiesCalibre, converter: convertCalibre, }, + libreoffice: { + properties: propertiesLibreOffice, + converter: convertLibreOffice, + }, pandoc: { properties: propertiesPandoc, converter: convertPandoc, }, + msgconvert: { + properties: propertiesMsgconvert, + converter: convertMsgconvert, + }, dvisvgm: { properties: propertiesDvisvgm, converter: convertDvisvgm, @@ -87,10 +104,6 @@ const properties: Record< properties: propertiesGraphicsmagick, converter: convertGraphicsmagick, }, - inkscape: { - properties: propertiesInkscape, - converter: convertInkscape, - }, assimp: { properties: propertiesassimp, converter: convertassimp, @@ -105,6 +118,63 @@ const properties: Record< }, }; +function chunks(arr: T[], size: number): T[][] { + if(size <= 0){ + return [arr] + } + return Array.from({ length: Math.ceil(arr.length / size) }, (_: T, i: number) => + arr.slice(i * size, i * size + size) + ); +} + +export async function handleConvert( + fileNames: string[], + userUploadsDir: string, + userOutputDir: string, + convertTo: string, + converterName: string, + jobId: any +) { + + const query = db.query( + "INSERT INTO file_names (job_id, file_name, output_file_name, status) VALUES (?1, ?2, ?3, ?4)", + ); + + + for (const chunk of chunks(fileNames, MAX_CONVERT_PROCESS)) { + const toProcess: Promise[] = []; + for(const fileName of chunk) { + const filePath = `${userUploadsDir}${fileName}`; + const fileTypeOrig = fileName.split(".").pop() ?? ""; + const fileType = normalizeFiletype(fileTypeOrig); + const newFileExt = normalizeOutputFiletype(convertTo); + const newFileName = fileName.replace( + new RegExp(`${fileTypeOrig}(?!.*${fileTypeOrig})`), + newFileExt, + ); + const targetPath = `${userOutputDir}${newFileName}`; + toProcess.push( + new Promise((resolve, reject) => { + mainConverter( + filePath, + fileType, + convertTo, + targetPath, + {}, + converterName, + ).then(r => { + if (jobId.value) { + query.run(jobId.value, fileName, newFileName, r); + } + resolve(r); + }).catch(c => reject(c)); + }) + ); + } + await Promise.all(toProcess); + } +} + export async function mainConverter( inputFilePath: string, fileTypeOriginal: string, diff --git a/src/converters/msgconvert.ts b/src/converters/msgconvert.ts new file mode 100644 index 0000000..b6bf826 --- /dev/null +++ b/src/converters/msgconvert.ts @@ -0,0 +1,45 @@ +import { execFile } from "node:child_process"; + +export const properties = { + from: { + email: ["msg"], + }, + to: { + email: ["eml"], + }, +}; + +export function convert( + filePath: string, + fileType: string, + convertTo: string, + targetPath: string, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + options?: unknown, +): Promise { + return new Promise((resolve, reject) => { + if (fileType === "msg" && convertTo === "eml") { + // Convert MSG to EML using msgconvert + // msgconvert will output to the same directory as the input file with .eml extension + // We need to use --outfile to specify the target path + const args = ["--outfile", targetPath, filePath]; + + execFile("msgconvert", args, (error, stdout, stderr) => { + if (error) { + reject(new Error(`msgconvert failed: ${error.message}`)); + return; + } + + if (stderr) { + // Log sanitized stderr to avoid exposing sensitive paths + const sanitizedStderr = stderr.replace(/(\/[^\s]+)/g, "[REDACTED_PATH]"); + console.warn(`msgconvert stderr: ${sanitizedStderr.length > 200 ? sanitizedStderr.slice(0, 200) + '...' : sanitizedStderr}`); + } + + resolve(targetPath); + }); + } else { + reject(new Error(`Unsupported conversion from ${fileType} to ${convertTo}. Only MSG to EML conversion is currently supported.`)); + } + }); +} diff --git a/src/helpers/env.ts b/src/helpers/env.ts index cbdf423..6722c54 100644 --- a/src/helpers/env.ts +++ b/src/helpers/env.ts @@ -15,3 +15,8 @@ export const HIDE_HISTORY = process.env.HIDE_HISTORY?.toLowerCase() === "true" | export const WEBROOT = process.env.WEBROOT ?? ""; export const LANGUAGE = process.env.LANGUAGE?.toLowerCase() || "en"; + +export const MAX_CONVERT_PROCESS = process.env.MAX_CONVERT_PROCESS && Number(process.env.MAX_CONVERT_PROCESS) > 0 ? Number(process.env.MAX_CONVERT_PROCESS) : 0 + +export const UNAUTHENTICATED_USER_SHARING = + process.env.UNAUTHENTICATED_USER_SHARING?.toLowerCase() === "true" || false; \ No newline at end of file diff --git a/src/helpers/printVersions.ts b/src/helpers/printVersions.ts index 42044d2..eb1a591 100644 --- a/src/helpers/printVersions.ts +++ b/src/helpers/printVersions.ts @@ -144,6 +144,26 @@ if (process.env.NODE_ENV === "production") { } }); + exec("soffice --version", (error, stdout) => { + if (error) { + console.error("libreoffice is not installed"); + } + + if (stdout) { + console.log(stdout.split("\n")[0]); + } + }); + + exec("msgconvert --version", (error, stdout) => { + if (error) { + console.error("msgconvert (libemail-outlook-message-perl) is not installed"); + } + + if (stdout) { + console.log(stdout.split("\n")[0]); + } + }); + exec("bun -v", (error, stdout) => { if (error) { console.error("Bun is not installed. wait what"); diff --git a/src/pages/convert.tsx b/src/pages/convert.tsx index a95302c..3dbb892 100644 --- a/src/pages/convert.tsx +++ b/src/pages/convert.tsx @@ -2,11 +2,11 @@ import { mkdir } from "node:fs/promises"; import { Elysia, t } from "elysia"; import sanitize from "sanitize-filename"; import { outputDir, uploadsDir } from ".."; -import { mainConverter } from "../converters/main"; +import { handleConvert } from "../converters/main"; import db from "../db/db"; import { Jobs } from "../db/types"; import { WEBROOT } from "../helpers/env"; -import { normalizeFiletype, normalizeOutputFiletype } from "../helpers/normalizeFiletype"; +import { normalizeFiletype } from "../helpers/normalizeFiletype"; import { userService } from "./user"; export const convert = new Elysia().use(userService).post( @@ -61,36 +61,8 @@ export const convert = new Elysia().use(userService).post( jobId.value, ); - const query = db.query( - "INSERT INTO file_names (job_id, file_name, output_file_name, status) VALUES (?1, ?2, ?3, ?4)", - ); - // Start the conversion process in the background - Promise.all( - fileNames.map(async (fileName) => { - const filePath = `${userUploadsDir}${fileName}`; - const fileTypeOrig = fileName.split(".").pop() ?? ""; - const fileType = normalizeFiletype(fileTypeOrig); - const newFileExt = normalizeOutputFiletype(convertTo); - const newFileName = fileName.replace( - new RegExp(`${fileTypeOrig}(?!.*${fileTypeOrig})`), - newFileExt, - ); - const targetPath = `${userOutputDir}${newFileName}`; - - const result = await mainConverter( - filePath, - fileType, - convertTo, - targetPath, - {}, - converterName, - ); - if (jobId.value) { - query.run(jobId.value, fileName, newFileName, result); - } - }), - ) + handleConvert(fileNames, userUploadsDir, userOutputDir, convertTo, converterName, jobId) .then(() => { // All conversions are done, update the job status to 'completed' if (jobId.value) { diff --git a/src/pages/download.tsx b/src/pages/download.tsx index 917ee39..3333396 100644 --- a/src/pages/download.tsx +++ b/src/pages/download.tsx @@ -4,6 +4,8 @@ import { outputDir } from ".."; import db from "../db/db"; import { WEBROOT } from "../helpers/env"; import { userService } from "./user"; +import path from "node:path"; +import * as tar from "tar"; export const download = new Elysia() .use(userService) @@ -35,8 +37,7 @@ export const download = new Elysia() return Bun.file(filePath); }, ) - .get("/zip/:userId/:jobId", async ({ params, jwt, redirect, cookie: { auth } }) => { - // TODO: Implement zip download + .get("/archive/:userId/:jobId", async ({ params, jwt, redirect, cookie: { auth } }) => { if (!auth?.value) { return redirect(`${WEBROOT}/login`, 302); } @@ -54,9 +55,11 @@ export const download = new Elysia() return redirect(`${WEBROOT}/results`, 302); } - // const userId = decodeURIComponent(params.userId); - // const jobId = decodeURIComponent(params.jobId); - // const outputPath = `${outputDir}${userId}/`{jobId}/); + const userId = decodeURIComponent(params.userId); + const jobId = decodeURIComponent(params.jobId); + const outputPath = `${outputDir}${userId}/${jobId}`; + const outputTar = path.join(outputPath, `converted_files_${jobId}.tar`) - // return Bun.zip(outputPath); + await tar.create({file: outputTar, cwd: outputPath, filter: (path) => { return !path.match(".*\\.tar"); }}, ["."]); + return Bun.file(outputTar); }); diff --git a/src/pages/results.tsx b/src/pages/results.tsx index 81e6128..edd70e7 100644 --- a/src/pages/results.tsx +++ b/src/pages/results.tsx @@ -6,12 +6,17 @@ import db from "../db/db"; import { Filename, Jobs } from "../db/types"; import { ALLOW_UNAUTHENTICATED, WEBROOT } from "../helpers/env"; import { userService } from "./user"; +import { JWTPayloadSpec } from "@elysiajs/jwt"; function ResultsArticle({ + user, job, files, outputPath, }: { + user: { + id: string; + } & JWTPayloadSpec; job: Jobs; files: Filename[]; outputPath: string; @@ -21,14 +26,19 @@ function ResultsArticle({

Results

- + +
- +