Skip to content

Commit ef132bc

Browse files
committed
Combine rules with boolean operators.
Rules in *.list files can now be combined with boolean operators: <django> & <guide> 25 google.com &^ /search/p 300 bing.com & (<roses> | <pansies>) 100 (&^ is AND NOT.)
1 parent c72956c commit ef132bc

File tree

14 files changed

+261
-107
lines changed

14 files changed

+261
-107
lines changed

acl.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ type ACLDefinitions struct {
3030
ConnectPorts map[int][]string
3131
ContentTypes map[string][]string
3232
Methods map[string][]string
33-
Referers map[string][]string
33+
Referers map[rule][]string
3434
RefererCategories map[string][]string
3535
StatusCodes map[int][]string
3636
URLs *URLMatcher
37-
URLTags map[string][]string
37+
URLTags map[rule][]string
3838
UserIPs IPMap
3939
UserNames map[string][]string
4040
ServerIPs IPMap
@@ -116,12 +116,13 @@ func (a *ACLDefinitions) AddRule(acl string, newRule []string) error {
116116
a.URLs = newURLMatcher()
117117
}
118118
if a.Referers == nil {
119-
a.Referers = make(map[string][]string)
119+
a.Referers = make(map[rule][]string)
120120
}
121121
for _, u := range args {
122122
u = strings.ToLower(u)
123-
a.URLs.AddRule(rule{t: urlMatch, content: u})
124-
a.Referers[u] = append(a.Referers[u], acl)
123+
r := simpleRule{t: urlMatch, content: u}
124+
a.URLs.AddRule(r)
125+
a.Referers[r] = append(a.Referers[r], acl)
125126
}
126127

127128
case "referer-category", "referrer-category":
@@ -171,12 +172,13 @@ func (a *ACLDefinitions) AddRule(acl string, newRule []string) error {
171172
a.URLs = newURLMatcher()
172173
}
173174
if a.URLTags == nil {
174-
a.URLTags = make(map[string][]string)
175+
a.URLTags = make(map[rule][]string)
175176
}
176177
for _, u := range args {
177178
u = strings.ToLower(u)
178-
a.URLs.AddRule(rule{t: urlMatch, content: u})
179-
a.URLTags[u] = append(a.URLTags[u], acl)
179+
r := simpleRule{t: urlMatch, content: u}
180+
a.URLs.AddRule(r)
181+
a.URLTags[r] = append(a.URLTags[r], acl)
180182
}
181183

182184
case "user-agent":
@@ -355,7 +357,7 @@ func (a *ACLDefinitions) requestACLs(r *http.Request, user string) map[string]bo
355357

356358
if a.URLs != nil {
357359
for match := range a.URLs.MatchingRules(r.URL) {
358-
for _, acl := range a.URLTags[match.content] {
360+
for _, acl := range a.URLTags[match] {
359361
acls[acl] = true
360362
}
361363
}
@@ -364,7 +366,7 @@ func (a *ACLDefinitions) requestACLs(r *http.Request, user string) map[string]bo
364366
refURL, err := url.Parse(referer)
365367
if err == nil {
366368
for match := range a.URLs.MatchingRules(refURL) {
367-
for _, acl := range a.Referers[match.content] {
369+
for _, acl := range a.Referers[match] {
368370
acls[acl] = true
369371
}
370372
}

categories.go

Lines changed: 65 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ func loadRuleFile(c *category, filename string, multiplier float64) {
205205
break
206206
}
207207

208-
r, line, err := parseRule(line)
208+
r, line, err := parseCompoundRule(line)
209209
if err != nil {
210210
log.Printf("Error in line %d of %s: %s", cr.LineNo, filename, err)
211211
continue
@@ -220,43 +220,54 @@ func loadRuleFile(c *category, filename string, multiplier float64) {
220220
w.maxPoints = int(float64(w.maxPoints) * multiplier)
221221
}
222222

223-
if r.t == defaultRule {
223+
if sr, ok := r.(simpleRule); ok && sr.t == defaultRule {
224224
defaultWeight = w.points
225225
} else {
226226
c.weights[r] = w
227227
}
228228
}
229229
}
230230

231+
func (cf *config) addRule(r rule) {
232+
switch r := r.(type) {
233+
case simpleRule:
234+
switch r.t {
235+
case contentPhrase:
236+
cf.ContentPhraseList.addPhrase(r.content)
237+
case imageHash:
238+
content := r.content
239+
threshold := -1
240+
if dash := strings.Index(content, "-"); dash != -1 {
241+
t, err := strconv.Atoi(content[dash+1:])
242+
if err != nil {
243+
log.Printf("%v: %v", r, err)
244+
return
245+
}
246+
threshold = t
247+
content = content[:dash]
248+
}
249+
h, err := dhash.Parse(content)
250+
if err != nil {
251+
log.Printf("%v: %v", r, err)
252+
return
253+
}
254+
cf.ImageHashes = append(cf.ImageHashes, dhashWithThreshold{h, threshold})
255+
default:
256+
cf.URLRules.AddRule(r)
257+
}
258+
case compoundRule:
259+
cf.addRule(r.left)
260+
cf.addRule(r.right)
261+
cf.CompoundRules = append(cf.CompoundRules, r)
262+
}
263+
}
264+
231265
// collectRules collects the rules from all the categories and adds
232266
// them to URLRules and phraseRules.
233267
func (cf *config) collectRules() {
234268
for _, c := range cf.Categories {
235-
for rule, _ := range c.weights {
236-
switch rule.t {
237-
case contentPhrase:
238-
cf.ContentPhraseList.addPhrase(rule.content)
239-
case imageHash:
240-
content := rule.content
241-
threshold := -1
242-
if dash := strings.Index(content, "-"); dash != -1 {
243-
t, err := strconv.Atoi(content[dash+1:])
244-
if err != nil {
245-
log.Printf("%v: %v", rule, err)
246-
continue
247-
}
248-
threshold = t
249-
content = content[:dash]
250-
}
251-
h, err := dhash.Parse(content)
252-
if err != nil {
253-
log.Printf("%v: %v", rule, err)
254-
continue
255-
}
256-
cf.ImageHashes = append(cf.ImageHashes, dhashWithThreshold{h, threshold})
257-
default:
258-
cf.URLRules.AddRule(rule)
259-
}
269+
for r, _ := range c.weights {
270+
cf.addRule(r)
260271
}
261272
}
262273
cf.ContentPhraseList.findFallbackNodes(0, nil)
@@ -284,12 +295,39 @@ func (c *category) score(tally map[rule]int, conf *config) int {
284295
return total
285296
}
286297

298+
func (cf *config) applyCompoundRules(tally map[rule]int) {
299+
for _, cr := range cf.CompoundRules {
300+
left := tally[cr.left]
301+
right := tally[cr.right]
302+
combined := left
303+
switch cr.op {
304+
case "&":
305+
if right < left {
306+
combined = right
307+
}
308+
case "|":
309+
if right > left {
310+
combined = right
311+
}
312+
case "&^":
313+
if right != 0 {
314+
combined = 0
315+
}
316+
}
317+
if combined != 0 {
318+
tally[cr] = combined
319+
}
320+
}
321+
}
322+
287323
// categoryScores returns a map containing a page's score for each category.
288324
func (cf *config) categoryScores(tally map[rule]int) map[string]int {
289325
if len(tally) == 0 {
290326
return nil
291327
}
292328

329+
cf.applyCompoundRules(tally)
330+
293331
scores := make(map[string]int)
294332
for _, c := range cf.Categories {
295333
s := c.score(tally, cf)

classify.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ func handleClassification(w http.ResponseWriter, r *http.Request) {
117117
for _, h := range conf.ImageHashes {
118118
distance := dhash.Distance(hash, h.Hash)
119119
if distance <= h.Threshold || h.Threshold == -1 && distance <= conf.DhashThreshold {
120-
tally[rule{imageHash, h.String()}]++
120+
tally[simpleRule{imageHash, h.String()}]++
121121
scoresNeedUpdate = true
122122
}
123123
}

combinators.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
)
7+
8+
// parser combinators, inspired by nom
9+
10+
type parser[T any] func(input string) (value T, rest string, err error)
11+
12+
func tag(t string) parser[string] {
13+
return func(input string) (string, string, error) {
14+
s := strings.TrimLeft(input, " \t\r\n\f")
15+
if strings.HasPrefix(s, t) {
16+
return t, strings.TrimPrefix(s, t), nil
17+
} else {
18+
return "", input, fmt.Errorf("not found: %q", t)
19+
}
20+
}
21+
}
22+
23+
func anyTag(tags ...string) parser[string] {
24+
return func(input string) (string, string, error) {
25+
s := strings.TrimLeft(input, " \t\r\n\f")
26+
for _, t := range tags {
27+
if strings.HasPrefix(s, t) {
28+
return t, strings.TrimPrefix(s, t), nil
29+
}
30+
}
31+
return "", input, fmt.Errorf("not found: %q", tags)
32+
}
33+
}
34+
35+
func alt[T any](options ...parser[T]) parser[T] {
36+
return func(input string) (value T, rest string, err error) {
37+
for _, p := range options {
38+
value, rest, err = p(input)
39+
if err == nil {
40+
return
41+
}
42+
}
43+
return
44+
}
45+
}
46+
47+
func delimited[T, U, V any](left parser[T], inner parser[U], right parser[V]) parser[U] {
48+
return func(input string) (value U, rest string, err error) {
49+
_, rest, err = left(input)
50+
if err != nil {
51+
return
52+
}
53+
value, rest, err = inner(rest)
54+
if err != nil {
55+
return
56+
}
57+
_, rest, err = right(rest)
58+
return
59+
}
60+
}

config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ type config struct {
5050
CountOnce bool
5151
Threshold int
5252
URLRules *URLMatcher
53+
CompoundRules []compoundRule
5354
MaxContentScanSize int
5455
PublicSuffixes []string
5556

go.mod

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,38 @@
11
module github.com/andybalholm/redwood
22

33
require (
4-
github.com/PuerkitoBio/goquery v1.8.0 // indirect
54
github.com/andybalholm/brotli v1.0.5
65
github.com/andybalholm/cascadia v1.3.1
76
github.com/andybalholm/dhash v1.0.0
8-
github.com/andybalholm/go-bit v1.0.1 // indirect
97
github.com/baruwa-enterprise/clamd v1.0.1
10-
github.com/chzyer/readline v1.5.1 // indirect
11-
github.com/dlclark/regexp2 v1.8.0 // indirect
128
github.com/dop251/goja v0.0.0-20230216180835-5937a312edda
139
github.com/dop251/goja_nodejs v0.0.0-20230207183254-2229640ea097
1410
github.com/golang/gddo v0.0.0-20210115222349-20d68f94ee1f
1511
github.com/klauspost/compress v1.15.15
1612
github.com/miekg/dns v1.1.50
1713
github.com/open-ch/ja3 v1.0.1
18-
github.com/pkg/errors v0.9.1 // indirect
1914
github.com/qri-io/starlib v0.5.0
2015
github.com/remogatto/ftpget v0.0.0-20120222025949-5c3c8286a3b0
2116
go.starlark.net v0.0.0-20230128213706-3f75dec8e403
2217
golang.org/x/crypto v0.6.0
2318
golang.org/x/image v0.5.0
2419
golang.org/x/net v0.7.0
2520
golang.org/x/text v0.7.0
26-
golang.org/x/tools v0.6.0 // indirect
2721
gopkg.in/yaml.v3 v3.0.1
2822
)
2923

30-
go 1.15
24+
require (
25+
github.com/PuerkitoBio/goquery v1.8.0 // indirect
26+
github.com/andybalholm/go-bit v1.0.1 // indirect
27+
github.com/chzyer/readline v1.5.1 // indirect
28+
github.com/dlclark/regexp2 v1.8.0 // indirect
29+
github.com/dustmop/soup v1.1.2-0.20190516214245-38228baa104e // indirect
30+
github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect
31+
github.com/pkg/errors v0.9.1 // indirect
32+
golang.org/x/mod v0.8.0 // indirect
33+
golang.org/x/sys v0.5.0 // indirect
34+
golang.org/x/tools v0.6.0 // indirect
35+
gopkg.in/yaml.v2 v2.4.0 // indirect
36+
)
37+
38+
go 1.18

0 commit comments

Comments
 (0)