Compare commits

...

3 Commits

Author SHA1 Message Date
Abhinav Raut
550a3fa801 fix: update Message-ID determination logic to prefer IMAP-parsed IDs over raw headers 2025-08-20 04:26:23 +05:30
Abhinav Raut
6bbfbe8cf6 add test cases for imap msg id parsing 2025-08-20 04:18:37 +05:30
Abhinav Raut
f9ed326d72 fix: handle message empty message ids in imap 2025-08-20 03:29:16 +05:30
2 changed files with 171 additions and 16 deletions

View File

@@ -140,6 +140,7 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
headerAutoSubmitted,
headerAutoreply,
headerLibredeskLoopPrevention,
headerMessageID,
},
},
},
@@ -147,10 +148,11 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
// Collect messages to process later.
type msgData struct {
env *imap.Envelope
seqNum uint32
autoReply bool
isLoop bool
env *imap.Envelope
seqNum uint32
autoReply bool
isLoop bool
extractedMessageID string
}
var messages []msgData
@@ -182,9 +184,10 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
}
var (
env *imap.Envelope
autoReply bool
isLoop bool
env *imap.Envelope
autoReply bool
isLoop bool
extractedMessageID string
)
// Process all fetch items for the current message.
for {
@@ -215,6 +218,9 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
if isLoopMessage(envelope, inboxEmail) {
isLoop = true
}
// Extract Message-Id from raw headers as fallback for problematic Message IDs
extractedMessageID = extractMessageIDFromHeaders(envelope)
}
// Envelope.
@@ -223,12 +229,13 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
}
}
// Skip if we couldn't get headers or envelope.
// Skip if we couldn't get the envelope.
if env == nil {
e.lo.Warn("skipping message without envelope", "seq_num", msg.SeqNum, "inbox_id", e.Identifier())
continue
}
messages = append(messages, msgData{env: env, seqNum: msg.SeqNum, autoReply: autoReply, isLoop: isLoop})
messages = append(messages, msgData{env: env, seqNum: msg.SeqNum, autoReply: autoReply, isLoop: isLoop, extractedMessageID: extractedMessageID})
}
// Now process each collected message.
@@ -253,7 +260,7 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
}
// Process the envelope.
if err := e.processEnvelope(ctx, client, msgData.env, msgData.seqNum, inboxID); err != nil && err != context.Canceled {
if err := e.processEnvelope(ctx, client, msgData.env, msgData.seqNum, inboxID, msgData.extractedMessageID); err != nil && err != context.Canceled {
e.lo.Error("error processing envelope", "error", err)
}
}
@@ -262,17 +269,32 @@ func (e *Email) fetchAndProcessMessages(ctx context.Context, client *imapclient.
}
// processEnvelope processes a single email envelope.
func (e *Email) processEnvelope(ctx context.Context, client *imapclient.Client, env *imap.Envelope, seqNum uint32, inboxID int) error {
func (e *Email) processEnvelope(ctx context.Context, client *imapclient.Client, env *imap.Envelope, seqNum uint32, inboxID int, extractedMessageID string) error {
if len(env.From) == 0 {
e.lo.Warn("no sender received for email", "message_id", env.MessageID)
return nil
}
var fromAddress = strings.ToLower(env.From[0].Addr())
// Determine final Message ID - prefer IMAP-parsed, fallback to raw header extraction
messageID := env.MessageID
if messageID == "" {
messageID = extractedMessageID
if messageID != "" {
e.lo.Debug("using raw header Message-ID as fallback for malformed ID", "message_id", messageID, "subject", env.Subject, "from", fromAddress)
}
}
// Drop message if we still don't have a valid Message ID
if messageID == "" {
e.lo.Error("dropping message: no valid Message-ID found in IMAP parsing or raw headers", "subject", env.Subject, "from", fromAddress)
return nil
}
// Check if the message already exists in the database; if it does, ignore it.
exists, err := e.messageStore.MessageExists(env.MessageID)
exists, err := e.messageStore.MessageExists(messageID)
if err != nil {
e.lo.Error("error checking if message exists", "message_id", env.MessageID)
e.lo.Error("error checking if message exists", "message_id", messageID)
return fmt.Errorf("checking if message exists in DB: %w", err)
}
if exists {
@@ -291,7 +313,7 @@ func (e *Email) processEnvelope(ctx context.Context, client *imapclient.Client,
return nil
}
e.lo.Debug("processing new incoming message", "message_id", env.MessageID, "subject", env.Subject, "from", fromAddress, "inbox_id", inboxID)
e.lo.Debug("processing new incoming message", "message_id", messageID, "subject", env.Subject, "from", fromAddress, "inbox_id", inboxID)
// Make contact.
firstName, lastName := getContactName(env.From[0])
@@ -350,7 +372,7 @@ func (e *Email) processEnvelope(ctx context.Context, client *imapclient.Client,
InboxID: inboxID,
Status: models.MessageStatusReceived,
Subject: env.Subject,
SourceID: null.StringFrom(env.MessageID),
SourceID: null.StringFrom(messageID),
Meta: meta,
},
Contact: contact,
@@ -385,7 +407,7 @@ func (e *Email) processEnvelope(ctx context.Context, client *imapclient.Client,
}
if fullItem, ok := fullFetchItem.(imapclient.FetchItemDataBodySection); ok {
e.lo.Debug("fetching full message body", "message_id", env.MessageID)
e.lo.Debug("fetching full message body", "message_id", messageID)
return e.processFullMessage(fullItem, incomingMsg)
}
}
@@ -534,3 +556,13 @@ func extractAllHTMLParts(part *enmime.Part) []string {
return htmlParts
}
// extractMessageIDFromHeaders extracts and cleans the Message-ID from email headers.
// This function handles problematic Message IDs by extracting them from raw headers
// and cleaning them of angle brackets and whitespace.
func extractMessageIDFromHeaders(envelope *enmime.Envelope) string {
if rawMessageID := envelope.GetHeader(headerMessageID); rawMessageID != "" {
return strings.TrimSpace(strings.Trim(rawMessageID, "<>"))
}
return ""
}

View File

@@ -0,0 +1,123 @@
package email
import (
"strings"
"testing"
"github.com/emersion/go-message/mail"
"github.com/jhillyerd/enmime"
)
// TestGoIMAPMessageIDParsing shows how go-imap fails to parse malformed Message-IDs
// and demonstrates the fallback solution.
// go-imap uses mail.Header.MessageID() which strictly follows RFC 5322 and returns
// empty strings for Message-IDs with multiple @ symbols.
//
// This caused emails to be dropped since we require Message-IDs for deduplication.
// References:
// - https://community.mailcow.email/d/701-multiple-at-in-message-id/5
// - https://github.com/emersion/go-message/issues/154#issuecomment-1425634946
func TestGoIMAPMessageIDParsing(t *testing.T) {
testCases := []struct {
input string
expectedIMAP string
expectedFallback string
name string
}{
{"<normal@example.com>", "normal@example.com", "normal@example.com", "normal message ID"},
{"<malformed@@example.com>", "", "malformed@@example.com", "double @ - IMAP fails, fallback works"},
{"<001c01d710db$a8137a50$f83a6ef0$@jones.smith@example.com>", "", "001c01d710db$a8137a50$f83a6ef0$@jones.smith@example.com", "mailcow-style - IMAP fails, fallback works"},
{"<test@@@domain.com>", "", "test@@@domain.com", "triple @ - IMAP fails, fallback works"},
{" <abc123@example.com> ", "abc123@example.com", "abc123@example.com", "with whitespace - both handle correctly"},
{"abc123@example.com", "", "abc123@example.com", "no angle brackets - IMAP fails, fallback works"},
{"", "", "", "empty input"},
{"<>", "", "", "empty brackets"},
{"<CAFnQjQFhY8z@mail.example.com@gateway.company.com>", "", "CAFnQjQFhY8z@mail.example.com@gateway.company.com", "gateway-style - IMAP fails, fallback works"},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Test go-imap parsing behavior
var h mail.Header
h.Set("Message-Id", tc.input)
imapResult, _ := h.MessageID()
if imapResult != tc.expectedIMAP {
t.Errorf("IMAP parsing of %q: expected %q, got %q", tc.input, tc.expectedIMAP, imapResult)
}
// Test fallback solution
if tc.input != "" {
rawEmail := "From: test@example.com\nMessage-ID: " + tc.input + "\n\nBody"
envelope, err := enmime.ReadEnvelope(strings.NewReader(rawEmail))
if err != nil {
t.Fatal(err)
}
fallbackResult := extractMessageIDFromHeaders(envelope)
if fallbackResult != tc.expectedFallback {
t.Errorf("Fallback extraction of %q: expected %q, got %q", tc.input, tc.expectedFallback, fallbackResult)
}
// Critical check: ensure fallback works when IMAP fails
if imapResult == "" && tc.expectedFallback != "" && fallbackResult == "" {
t.Errorf("CRITICAL: Both IMAP and fallback failed for %q - would drop email!", tc.input)
}
}
})
}
}
// TestEdgeCasesMessageID tests additional edge cases for Message-ID extraction.
func TestEdgeCasesMessageID(t *testing.T) {
tests := []struct {
name string
email string
expected string
}{
{
name: "no Message-ID header",
email: `From: test@example.com
To: inbox@test.com
Subject: Test
Body`,
expected: "",
},
{
name: "malformed header syntax",
email: `From: test@example.com
Message-ID: malformed-no-brackets@@domain.com
To: inbox@test.com
Body`,
expected: "malformed-no-brackets@@domain.com",
},
{
name: "multiple Message-ID headers (first wins)",
email: `From: test@example.com
Message-ID: <first@example.com>
Message-ID: <second@@example.com>
To: inbox@test.com
Body`,
expected: "first@example.com",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
envelope, err := enmime.ReadEnvelope(strings.NewReader(tt.email))
if err != nil {
t.Fatal(err)
}
result := extractMessageIDFromHeaders(envelope)
if result != tt.expected {
t.Errorf("Expected %q, got %q", tt.expected, result)
}
})
}
}