Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/msg match enhance #230

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ require (
github.com/mjl-/sherpats v0.0.6
github.com/prometheus/client_golang v1.18.0
github.com/russross/blackfriday/v2 v2.1.0
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
go.etcd.io/bbolt v1.3.11
golang.org/x/crypto v0.27.0
golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f
golang.org/x/net v0.29.0
golang.org/x/text v0.18.0
golang.org/x/text v0.19.0
rsc.io/qr v0.2.0
)

Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
Expand Down Expand Up @@ -97,8 +99,8 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE=
golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg=
Expand Down
19 changes: 19 additions & 0 deletions store/account.go
Original file line number Diff line number Diff line change
Expand Up @@ -1855,12 +1855,31 @@ ruleset:

header:
for _, t := range rs.HeadersRegexpCompiled {
isSubjectMatch := t[0].MatchString("subject")
for k, vl := range header {
k = strings.ToLower(k)
if t[0].MatchString("body") { // message body match
ws := PrepareWordSearch([]string{t[1].String()}, []string{})
// todo: regexp match
ok, err := ws.MatchPart(log, &p, true)
if err != nil {
log.Errorx("Failed to match body: %v", err)
}
if ok {
continue header
}
}
if !t[0].MatchString(k) {
continue
}
for _, v := range vl {
if isSubjectMatch {
// todo: memorize decoded text
v, err = decodeRFC2047(v)
if err != nil {
log.Errorx("Failed to decode subject: %v", err, slog.String("v", v))
}
}
v = strings.ToLower(strings.TrimSpace(v))
if t[1].MatchString(v) {
continue header
Expand Down
67 changes: 67 additions & 0 deletions store/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@ package store

import (
"bytes"
"encoding/base64"
"fmt"
"io"
"mime/quotedprintable"
"regexp"
"strings"
"unicode"
"unicode/utf8"

"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/japanese"
encUnicode "golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)

// WordSearch holds context for a search, with scratch buffers to prevent
Expand Down Expand Up @@ -193,3 +202,61 @@ func toLower(buf []byte) []byte {
}
return r
}

func decodeRFC2047(encoded string) (string, error) {
// match e.g. =?(iso-2022-jp)?(B)?(Rnc6...)?=
r := regexp.MustCompile(`(?i)=\?([^?]+)\?([BQ])\?([^?]+)\?=`)
matches := r.FindAllStringSubmatch(encoded, -1)

if len(matches) == 0 { // no match. Looks ASCII.
return encoded, nil
}

var decodedStrings []string
for _, match := range matches {
charset := match[1]
encodingName := strings.ToUpper(match[2])
encodedText := match[3]

// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
switch encodingName {
case "B": // Base64
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return encoded, fmt.Errorf("Base64 decode error: %w", err)
}
case "Q": // Quoted-Printable
decodedBytes, err = io.ReadAll(quotedprintable.NewReader(strings.NewReader(encodedText)))
if err != nil {
return "", fmt.Errorf("Quoted-Printable decode error: %w", err)
}
default:
return encoded, fmt.Errorf("not supported encoding: %s", encodingName)
}

// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
default:
return encoded, fmt.Errorf("not supported charset: %s", charset)
}

// Decode with charset
reader := transform.NewReader(strings.NewReader(string(decodedBytes)), enc.NewDecoder())
decodedText, err := io.ReadAll(reader)
if err != nil {
return encoded, err
}

decodedStrings = append(decodedStrings, string(decodedText))
}

// Concat multiple strings
return strings.Join(decodedStrings, ""), nil
}
38 changes: 38 additions & 0 deletions store/search_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package store

import (
"fmt"
"testing"
)

func TestSubjectMatch(t *testing.T) {
// Auto detect subject text encoding and decode

//log := mlog.New("search", nil)

originalSubject := `テストテキスト Abc 123...`
asciiSubject := "test text Abc 123..."

encodedSubjectUTF8 := `=?UTF-8?b?44OG44K544OI44OG44Kt44K544OIIEFiYyAxMjMuLi4=?=`
encodedSubjectISO2022 := `=?iso-2022-jp?B?GyRCJUYlOSVIJUYlLSU5JUgbKEIgQWJjIDEyMy4uLg==?=`
encodedSubjectUTF8 = encodedSubjectUTF8 + " \n " + encodedSubjectUTF8
encodedSubjectISO2022 = encodedSubjectISO2022 + " \n " + encodedSubjectISO2022
originalSubject = originalSubject + originalSubject

encodedTexts := map[string]string{encodedSubjectUTF8: originalSubject, encodedSubjectISO2022: originalSubject, asciiSubject: asciiSubject}

for encodedSubject, originalSubject := range encodedTexts {

// Autodetect & decode
decodedSubject, err := decodeRFC2047(encodedSubject)

fmt.Printf("decoded text:%s\n", decodedSubject)
if err != nil {
t.Fatalf("Decode error: %v", err)
}

if originalSubject != decodedSubject {
t.Fatalf("Decode mismatch %s != %s", originalSubject, decodedSubject)
}
}
}
5 changes: 4 additions & 1 deletion vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ github.com/prometheus/procfs/internal/util
# github.com/russross/blackfriday/v2 v2.1.0
## explicit
github.com/russross/blackfriday/v2
# github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
## explicit
github.com/saintfish/chardet
# go.etcd.io/bbolt v1.3.11
## explicit; go 1.22
go.etcd.io/bbolt
Expand Down Expand Up @@ -97,7 +100,7 @@ golang.org/x/sync/errgroup
golang.org/x/sys/cpu
golang.org/x/sys/unix
golang.org/x/sys/windows
# golang.org/x/text v0.18.0
# golang.org/x/text v0.19.0
## explicit; go 1.18
golang.org/x/text/cases
golang.org/x/text/encoding
Expand Down
2 changes: 1 addition & 1 deletion webaccount/account.js
Original file line number Diff line number Diff line change
Expand Up @@ -1692,7 +1692,7 @@ const destination = async (name) => {
let fullName;
let saveButton;
const addresses = [name, ...Object.keys(acc.Destinations || {}).filter(a => !a.startsWith('@') && a !== name)];
dom._kids(page, crumbs(crumblink('Mox Account', '#'), 'Destination ' + name), dom.div(dom.span('Default mailbox', attr.title('Default mailbox where email for this recipient is delivered to if it does not match any ruleset. Default is Inbox.')), dom.br(), defaultMailbox = dom.input(attr.value(dest.Mailbox), attr.placeholder('Inbox'))), dom.br(), dom.div(dom.span('Full name', attr.title('Name to use in From header when composing messages. If not set, the account default full name is used.')), dom.br(), fullName = dom.input(attr.value(dest.FullName))), dom.br(), dom.h2('Rulesets'), dom.p('Incoming messages are checked against the rulesets. If a ruleset matches, the message is delivered to the mailbox configured for the ruleset instead of to the default mailbox.'), dom.p('"Is Forward" does not affect matching, but changes prevents the sending mail server from being included in future junk classifications by clearing fields related to the forwarding email server (IP address, EHLO domain, MAIL FROM domain and a matching DKIM domain), and prevents DMARC rejects for forwarded messages.'), dom.p('"List allow domain" does not affect matching, but skips the regular spam checks if one of the verified domains is a (sub)domain of the domain mentioned here.'), dom.p('"Accept rejects to mailbox" does not affect matching, but causes messages classified as junk to be accepted and delivered to this mailbox, instead of being rejected during the SMTP transaction. Useful for incoming forwarded messages where rejecting incoming messages may cause the forwarding server to stop forwarding.'), dom.table(dom.thead(dom.tr(dom.th('SMTP "MAIL FROM" regexp', attr.title('Matches if this regular expression matches (a substring of) the SMTP MAIL FROM address (not the message From-header). E.g. [email protected].')), dom.th('Message "From" address regexp', attr.title('Matches if this regular expression matches (a substring of) the single address in the message From header.')), dom.th('Verified domain', attr.title('Matches if this domain matches an SPF- and/or DKIM-verified (sub)domain.')), dom.th('Headers regexp', attr.title('Matches if these header field/value regular expressions all match (substrings of) the message headers. Header fields and valuees are converted to lower case before matching. Whitespace is trimmed from the value before matching. A header field can occur multiple times in a message, only one instance has to match. For mailing lists, you could match on ^list-id$ with the value typically the mailing list address in angled brackets with @ replaced with a dot, e.g. <name\\.lists\\.example\\.org>.')), dom.th('Is Forward', attr.title("Influences spam filtering only, this option does not change whether a message matches this ruleset. Can only be used together with SMTPMailFromRegexp and VerifiedDomain. SMTPMailFromRegexp must be set to the address used to deliver the forwarded message, e.g. '^user(|\\+.*)@forward\\.example$'. Changes to junk analysis: 1. Messages are not rejected for failing a DMARC policy, because a legitimate forwarded message without valid/intact/aligned DKIM signature would be rejected because any verified SPF domain will be 'unaligned', of the forwarding mail server. 2. The sending mail server IP address, and sending EHLO and MAIL FROM domains and matching DKIM domain aren't used in future reputation-based spam classifications (but other verified DKIM domains are) because the forwarding server is not a useful spam signal for future messages.")), dom.th('List allow domain', attr.title("Influences spam filtering only, this option does not change whether a message matches this ruleset. If this domain matches an SPF- and/or DKIM-verified (sub)domain, the message is accepted without further spam checks, such as a junk filter or DMARC reject evaluation. DMARC rejects should not apply for mailing lists that are not configured to rewrite the From-header of messages that don't have a passing DKIM signature of the From-domain. Otherwise, by rejecting messages, you may be automatically unsubscribed from the mailing list. The assumption is that mailing lists do their own spam filtering/moderation.")), dom.th('Allow rejects to mailbox', attr.title("Influences spam filtering only, this option does not change whether a message matches this ruleset. If a message is classified as spam, it isn't rejected during the SMTP transaction (the normal behaviour), but accepted during the SMTP transaction and delivered to the specified mailbox. The specified mailbox is not automatically cleaned up like the account global Rejects mailbox, unless set to that Rejects mailbox.")), dom.th('Mailbox', attr.title('Mailbox to deliver to if this ruleset matches.')), dom.th('Comment', attr.title('Free-form comments.')), dom.th('Action'))), rulesetsTbody, dom.tfoot(dom.tr(dom.td(attr.colspan('9')), dom.td(dom.clickbutton('Add ruleset', function click() {
dom._kids(page, crumbs(crumblink('Mox Account', '#'), 'Destination ' + name), dom.div(dom.span('Default mailbox', attr.title('Default mailbox where email for this recipient is delivered to if it does not match any ruleset. Default is Inbox.')), dom.br(), defaultMailbox = dom.input(attr.value(dest.Mailbox), attr.placeholder('Inbox'))), dom.br(), dom.div(dom.span('Full name', attr.title('Name to use in From header when composing messages. If not set, the account default full name is used.')), dom.br(), fullName = dom.input(attr.value(dest.FullName))), dom.br(), dom.h2('Rulesets'), dom.p('Incoming messages are checked against the rulesets. If a ruleset matches, the message is delivered to the mailbox configured for the ruleset instead of to the default mailbox.'), dom.p('"Is Forward" does not affect matching, but changes prevents the sending mail server from being included in future junk classifications by clearing fields related to the forwarding email server (IP address, EHLO domain, MAIL FROM domain and a matching DKIM domain), and prevents DMARC rejects for forwarded messages.'), dom.p('"List allow domain" does not affect matching, but skips the regular spam checks if one of the verified domains is a (sub)domain of the domain mentioned here.'), dom.p('"Accept rejects to mailbox" does not affect matching, but causes messages classified as junk to be accepted and delivered to this mailbox, instead of being rejected during the SMTP transaction. Useful for incoming forwarded messages where rejecting incoming messages may cause the forwarding server to stop forwarding.'), dom.table(dom.thead(dom.tr(dom.th('SMTP "MAIL FROM" regexp', attr.title('Matches if this regular expression matches (a substring of) the SMTP MAIL FROM address (not the message From-header). E.g. [email protected].')), dom.th('Message "From" address regexp', attr.title('Matches if this regular expression matches (a substring of) the single address in the message From header.')), dom.th('Verified domain', attr.title('Matches if this domain matches an SPF- and/or DKIM-verified (sub)domain.')), dom.th('Headers/body regexp', attr.title('Matches if these header field/value regular expressions all match (substrings of) the message headers. Header fields and valuees are converted to lower case before matching. Whitespace is trimmed from the value before matching. A header field can occur multiple times in a message, only one instance has to match. For mailing lists, you could match on ^list-id$ with the value typically the mailing list address in angled brackets with @ replaced with a dot, e.g. <name\\.lists\\.example\\.org>. Use field "body" for simple message content match')), dom.th('Is Forward', attr.title("Influences spam filtering only, this option does not change whether a message matches this ruleset. Can only be used together with SMTPMailFromRegexp and VerifiedDomain. SMTPMailFromRegexp must be set to the address used to deliver the forwarded message, e.g. '^user(|\\+.*)@forward\\.example$'. Changes to junk analysis: 1. Messages are not rejected for failing a DMARC policy, because a legitimate forwarded message without valid/intact/aligned DKIM signature would be rejected because any verified SPF domain will be 'unaligned', of the forwarding mail server. 2. The sending mail server IP address, and sending EHLO and MAIL FROM domains and matching DKIM domain aren't used in future reputation-based spam classifications (but other verified DKIM domains are) because the forwarding server is not a useful spam signal for future messages.")), dom.th('List allow domain', attr.title("Influences spam filtering only, this option does not change whether a message matches this ruleset. If this domain matches an SPF- and/or DKIM-verified (sub)domain, the message is accepted without further spam checks, such as a junk filter or DMARC reject evaluation. DMARC rejects should not apply for mailing lists that are not configured to rewrite the From-header of messages that don't have a passing DKIM signature of the From-domain. Otherwise, by rejecting messages, you may be automatically unsubscribed from the mailing list. The assumption is that mailing lists do their own spam filtering/moderation.")), dom.th('Allow rejects to mailbox', attr.title("Influences spam filtering only, this option does not change whether a message matches this ruleset. If a message is classified as spam, it isn't rejected during the SMTP transaction (the normal behaviour), but accepted during the SMTP transaction and delivered to the specified mailbox. The specified mailbox is not automatically cleaned up like the account global Rejects mailbox, unless set to that Rejects mailbox.")), dom.th('Mailbox', attr.title('Mailbox to deliver to if this ruleset matches.')), dom.th('Comment', attr.title('Free-form comments.')), dom.th('Action'))), rulesetsTbody, dom.tfoot(dom.tr(dom.td(attr.colspan('9')), dom.td(dom.clickbutton('Add ruleset', function click() {
addRulesetsRow({
SMTPMailFromRegexp: '',
MsgFromRegexp: '',
Expand Down
Loading