diff --git a/internal/pkg/crawl/stats.go b/internal/pkg/crawl/stats.go index fd77c437..62591846 100644 --- a/internal/pkg/crawl/stats.go +++ b/internal/pkg/crawl/stats.go @@ -29,16 +29,19 @@ func (c *Crawl) printLiveStats() { crawledSeeds := c.CrawledSeeds.Value() crawledAssets := c.CrawledAssets.Value() + queueStats := c.Queue.GetStats() + stats.AddRow("", "") stats.AddRow(" - Job:", c.Job) stats.AddRow(" - State:", c.getCrawlState()) stats.AddRow(" - Active workers:", strconv.Itoa(int(c.ActiveWorkers.Value()))+"/"+strconv.Itoa(c.Workers.wpLen())) stats.AddRow(" - URI/s:", c.URIsPerSecond.Rate()) - stats.AddRow(" - Queued:", c.Queue.GetStats().TotalElements) + stats.AddRow(" - Items in queue:", queueStats.TotalElements) + stats.AddRow(" - Hosts in queue:", queueStats.UniqueHosts) + stats.AddRow(" - Handover Get() success:", queueStats.HandoverSuccessGetCount) stats.AddRow(" - Queue empty bool state:", c.Queue.Empty.Get()) stats.AddRow(" - Can Enqueue:", c.Queue.CanEnqueue()) stats.AddRow(" - Can Dequeue:", c.Queue.CanDequeue()) - stats.AddRow(" - Hosts in queue:", c.Queue.GetStats().UniqueHosts) stats.AddRow(" - Crawled total:", crawledSeeds+crawledAssets) stats.AddRow(" - Crawled seeds:", crawledSeeds) stats.AddRow(" - Crawled assets:", crawledAssets) diff --git a/internal/pkg/queue/handover.go b/internal/pkg/queue/handover.go index 3c996263..3c0c0265 100644 --- a/internal/pkg/queue/handover.go +++ b/internal/pkg/queue/handover.go @@ -1,12 +1,16 @@ package queue +import "sync/atomic" + type HandoverChannel struct { - ch chan *Item + ch chan *Item + count *atomic.Uint64 } func NewHandoverChannel() *HandoverChannel { return &HandoverChannel{ - ch: make(chan *Item, 1), // Buffer of 1 for non-blocking operations + ch: make(chan *Item, 1), // Buffer of 1 for non-blocking operations + count: new(atomic.Uint64), } } @@ -22,6 +26,7 @@ func (h *HandoverChannel) TryPut(item *Item) bool { func (h *HandoverChannel) TryGet() (*Item, bool) { select { case item := <-h.ch: + h.count.Add(1) return item, true default: return nil, false diff --git a/internal/pkg/queue/stats.go b/internal/pkg/queue/stats.go index fe204474..da5d6b8b 100644 --- a/internal/pkg/queue/stats.go +++ b/internal/pkg/queue/stats.go @@ -25,6 +25,7 @@ type QueueStats struct { AverageTimeBetweenEnqueues time.Duration `json:"average_time_between_enqueues"` AverageTimeBetweenDequeues time.Duration `json:"average_time_between_dequeues"` AverageElementsPerHost float64 `json:"average_elements_per_host"` + HandoverSuccessGetCount uint64 `json:"handover_success_get_count"` } type HostStat struct { @@ -80,6 +81,9 @@ func (q *PersistentGroupedQueue) genStats() { if q.stats.EnqueueCount > 0 { q.stats.AverageTimeBetweenEnqueues = time.Since(q.stats.FirstEnqueueTime) / time.Duration(q.stats.EnqueueCount) } + + // Calculate handover success get count + q.stats.HandoverSuccessGetCount = q.Handover.count.Load() } func (q *PersistentGroupedQueue) loadStatsFromFile(path string) error {