-
Notifications
You must be signed in to change notification settings - Fork 1
/
filters.go
56 lines (51 loc) · 1.15 KB
/
filters.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package main
import (
"net/url"
"strings"
// log "github.com/romana/rlog"
)
// Filters
var excludedPath = []string{
"/cdn-cgi",
"/legal",
"/static",
"/blog",
}
var excludedSubdomain = []string{
"www.monzo.com",
"community.monzo.com",
"status.monzo.com",
}
// Check if internal url
func IsInternal(URL string, crawler Crawler) bool {
href, _ := url.Parse(URL)
baseURL, _ := url.Parse(crawler.host)
if strings.HasSuffix(href.Hostname(), baseURL.Hostname()) {
return true
}
// log.Debug(baseURL.Hostname() + " : doesn't match with : " + href.Hostname())
return false
}
// Check if request comes is excluded path
func IsValidPath(URL string, crawler Crawler) bool {
href, _ := url.Parse(URL)
path := href.Path
for _, v := range excludedPath {
if strings.HasPrefix(path, v) {
// log.Debug(href.Path + " : prefix is in excluded list")
return false
}
}
return true
}
func IsValidSubdomain(URL string, crawler Crawler) bool {
href, _ := url.Parse(URL)
domain := href.Hostname()
for _, v := range excludedSubdomain {
if strings.Contains(domain, v) {
// log.Debug(domain + ": is part of excluded list")
return false
}
}
return true
}