From 754453ad7f9579a6021c484d5014a3cd12fd0e35 Mon Sep 17 00:00:00 2001 From: Ioseba Palop <4988743+iosebyte@users.noreply.github.com> Date: Fri, 27 Aug 2021 13:03:16 +0200 Subject: [PATCH] Fixes BingWeb query (#116) --- .../SearcherCore/Searcher/BingWebSearcher.cs | 16 +++++++++------- .../Searcher/DuckduckgoWebSearcher.cs | 5 ++--- .../SearcherCore/Searcher/LinkSearcher.cs | 1 + 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/SearcherCore/SearcherCore/Searcher/BingWebSearcher.cs b/SearcherCore/SearcherCore/Searcher/BingWebSearcher.cs index a59b8c7..a6ec74b 100644 --- a/SearcherCore/SearcherCore/Searcher/BingWebSearcher.cs +++ b/SearcherCore/SearcherCore/Searcher/BingWebSearcher.cs @@ -39,7 +39,7 @@ private int GetBingResults(string searchString, int currentResultPerPage, int cu searchString += string.Format(" loc:{0}", RegionToHtmlOption(LocatedInRegion)); OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Searching first={2} q={1}", Name, searchString, currentOffset + 1))); - string requestUrl = String.Format("http://www.bing.com/search?first={1}&q={0}", searchString, currentOffset + 1); + string requestUrl = String.Format("https://www.bing.com/search?first={1}&q={0}", searchString, currentOffset + 1); int retries = 0; bool error; @@ -48,14 +48,16 @@ private int GetBingResults(string searchString, int currentResultPerPage, int cu { error = false; HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(requestUrl); - if (!string.IsNullOrEmpty(UserAgent)) + if (!String.IsNullOrEmpty(UserAgent)) + { request.UserAgent = UserAgent; - + } + else + { + request.UserAgent = DefaultUserAgent; + } + request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"; request.Timeout = 5000 + 10000 * retries; - request.CookieContainer = new CookieContainer(); - request.CookieContainer.Add(new Cookie("SRCHHPGUSR", "ADLT=OFF&NRSLT=" + currentResultPerPage, "/", ".bing.com")); - - request.CookieContainer.Add(new Cookie("MUID", "00000000000000000000000000000000", "/", ".bing.com")); try { OnSearcherLogEvent(new EventsThreads.ThreadStringEventArgs(string.Format("[{0}] Requesting URL {1}", this.Name, request.RequestUri.ToString()))); diff --git a/SearcherCore/SearcherCore/Searcher/DuckduckgoWebSearcher.cs b/SearcherCore/SearcherCore/Searcher/DuckduckgoWebSearcher.cs index f43017a..59d61cb 100644 --- a/SearcherCore/SearcherCore/Searcher/DuckduckgoWebSearcher.cs +++ b/SearcherCore/SearcherCore/Searcher/DuckduckgoWebSearcher.cs @@ -12,7 +12,6 @@ namespace FOCA.Searcher public class DuckduckgoWebSearcher : LinkSearcher { private const int MAX_PAGES = 3; - private const string userAgent = "Mozilla/5.0 (Linux; U; Android 4.1.1; en-gb; Build/KLP) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30"; private static readonly string[] supportedFileTypes = new string[] { "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx" }; public DuckduckgoWebSearcher() : base("DuckDuckGoWeb", supportedFileTypes) @@ -51,7 +50,7 @@ private int Query(string searchTerms, CancellationToken cancelToken) private string SendInitialRequest(string searchString) { HttpWebRequest request = HttpWebRequest.CreateHttp(string.Format("https://duckduckgo.com/html/?q={0}&t=h_", System.Web.HttpUtility.UrlEncode(searchString))); - request.UserAgent = userAgent; + request.UserAgent = DefaultUserAgent; request.Referer = "https://duckduckgo.com/"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); using (StreamReader responseReader = new StreamReader(response.GetResponseStream())) @@ -68,7 +67,7 @@ private string MoreResults(string searchString, string postParameters) request.Headers.Add("Origin: https://duckduckgo.com"); request.Referer = "https://duckduckgo.com/"; request.ContentType = "application/x-www-form-urlencoded"; - request.UserAgent = userAgent; + request.UserAgent = DefaultUserAgent; request.ContentLength = postParameters.Length; request.Headers.Add("Cache-Control: max-age=0"); request.Headers.Add("Upgrade-Insecure-Requests: 1"); diff --git a/SearcherCore/SearcherCore/Searcher/LinkSearcher.cs b/SearcherCore/SearcherCore/Searcher/LinkSearcher.cs index 196ef47..5978258 100644 --- a/SearcherCore/SearcherCore/Searcher/LinkSearcher.cs +++ b/SearcherCore/SearcherCore/Searcher/LinkSearcher.cs @@ -8,6 +8,7 @@ namespace FOCA.Searcher { public abstract class LinkSearcher : Searcher { + public const string DefaultUserAgent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"; protected List SupportedExtensions { get; private set; } public string UserAgent { get; set; }