diff --git a/internal/pkg/crawl/extractor/xml_test.go b/internal/pkg/crawl/extractor/xml_test.go new file mode 100644 index 00000000..532d8c96 --- /dev/null +++ b/internal/pkg/crawl/extractor/xml_test.go @@ -0,0 +1,103 @@ +package extractor + +import ( + "bytes" + "io" + "net/http" + "net/url" + "testing" +) + +func TestXML(t *testing.T) { + tests := []struct { + name string + xmlBody string + wantURLs []*url.URL + wantErr bool + }{ + { + name: "Valid XML with URLs", + xmlBody: ` + + http://example.com + + https://example.org + + just some text + `, + wantURLs: []*url.URL{ + {Scheme: "http", Host: "example.com"}, + {Scheme: "https", Host: "example.org"}, + }, + wantErr: false, + }, + { + name: "Empty XML", + xmlBody: ``, + wantURLs: nil, + wantErr: false, + }, + { + name: "Invalid XML", + xmlBody: ``, + wantURLs: nil, + wantErr: true, + }, + { + name: "XML with invalid URL", + xmlBody: ` + + http://example.com + not a valid url + `, + wantURLs: []*url.URL{ + {Scheme: "http", Host: "example.com"}, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resp := &http.Response{ + Body: io.NopCloser(bytes.NewBufferString(tt.xmlBody)), + } + + gotURLs, err := XML(resp) + + if (err != nil) != tt.wantErr { + t.Errorf("XML() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !compareURLs(gotURLs, tt.wantURLs) { + t.Errorf("XML() gotURLs = %v, want %v", gotURLs, tt.wantURLs) + } + }) + } +} + +func TestXMLBodyReadError(t *testing.T) { + resp := &http.Response{ + Body: io.NopCloser(bytes.NewReader([]byte{})), // Empty reader to simulate EOF + } + resp.Body.Close() // Close the body to simulate a read error + + _, err := XML(resp) + if err == nil { + t.Errorf("XML() expected error, got nil") + } +} + +// compareURLs compares two slices of *url.URL +func compareURLs(a, b []*url.URL) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i].String() != b[i].String() { + return false + } + } + return true +}