Skip to content

Commit

Permalink
Small fixes to fix memory usage (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
CorentinB authored Jan 13, 2024
1 parent a8bd42a commit 5af2058
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 19 deletions.
8 changes: 4 additions & 4 deletions internal/pkg/crawl/capture.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,18 +232,18 @@ func (c *Crawl) Capture(item *frontier.Item) {

// Execute site-specific code on the request, before sending it
if tiktok.IsTikTokURL(utils.URLToString(item.URL)) {
req = tiktok.AddHeaders(req)
tiktok.AddHeaders(req)
} else if telegram.IsTelegramURL(utils.URLToString(item.URL)) && !telegram.IsTelegramEmbedURL(utils.URLToString(item.URL)) {
// If the URL is a Telegram URL, we make an embed URL out of it
embedURL := telegram.CreateEmbedURL(item.URL)
telegram.TransformURL(item.URL)

// Then we create an item
embedItem := frontier.NewItem(embedURL, item, item.Type, item.Hop, item.ID)
embedItem := frontier.NewItem(item.URL, item, item.Type, item.Hop, item.ID)

// And capture it
c.Capture(embedItem)
} else if vk.IsVKURL(utils.URLToString(item.URL)) {
req = vk.AddHeaders(req)
vk.AddHeaders(req)
}

// Execute request
Expand Down
12 changes: 4 additions & 8 deletions internal/pkg/crawl/sitespecific/telegram/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,11 @@ func IsTelegramURL(url string) bool {
return strings.Contains(url, "/t.me/")
}

func CreateEmbedURL(URL *url.URL) *url.URL {
func TransformURL(URL *url.URL) {
// Add embed=1 to the URL, without changing the original URL
embedURL := *URL

if len(embedURL.RawQuery) > 0 {
embedURL.RawQuery += "&embed=1&mode=tme"
if len(URL.RawQuery) > 0 {
URL.RawQuery += "&embed=1&mode=tme"
} else {
embedURL.RawQuery = "embed=1&mode=tme"
URL.RawQuery = "embed=1&mode=tme"
}

return &embedURL
}
4 changes: 1 addition & 3 deletions internal/pkg/crawl/sitespecific/tiktok/tiktok.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ func IsTikTokURL(URL string) bool {
return strings.Contains(URL, "/tiktok.com")
}

func AddHeaders(req *http.Request) *http.Request {
func AddHeaders(req *http.Request) {
req.Header.Set("Authority", "www.tiktok.com")
req.Header.Set("Sec-Ch-Ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Microsoft Edge\";v=\"99\"")
req.Header.Set("Sec-Ch-Ua-Mobile", "?0")
Expand All @@ -23,6 +23,4 @@ func AddHeaders(req *http.Request) *http.Request {
req.Header.Set("Sec-Fetch-User", "?1")
req.Header.Set("Sec-Fetch-Dest", "document")
req.Header.Set("Accept-Language", "en-US,en;q=0.9,fr;q=0.8")

return req
}
4 changes: 1 addition & 3 deletions internal/pkg/crawl/sitespecific/vk/vk.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ func IsVKURL(URL string) bool {
return strings.Contains(URL, "/vk.com")
}

func AddHeaders(req *http.Request) *http.Request {
func AddHeaders(req *http.Request) {
req.Header.Set("Authority", "vk.com")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
req.Header.Set("Accept-Language", "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7")
Expand All @@ -26,6 +26,4 @@ func AddHeaders(req *http.Request) *http.Request {

// Gives "Your browser is out of date" error when using default UA.
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")

return req
}
1 change: 0 additions & 1 deletion internal/pkg/frontier/save.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ func (f *Frontier) Save() {
defer encodeFile.Close()

// Write to the file

if err := SyncMapEncode(f.HostPool, encodeFile); err != nil {
f.LoggingChan <- &FrontierLogMessage{
Fields: logrus.Fields{
Expand Down

0 comments on commit 5af2058

Please sign in to comment.