diff --git a/internal/pkg/crawl/extractor/xml_test.go b/internal/pkg/crawl/extractor/xml_test.go
new file mode 100644
index 00000000..532d8c96
--- /dev/null
+++ b/internal/pkg/crawl/extractor/xml_test.go
@@ -0,0 +1,103 @@
+package extractor
+
+import (
+ "bytes"
+ "io"
+ "net/http"
+ "net/url"
+ "testing"
+)
+
+func TestXML(t *testing.T) {
+ tests := []struct {
+ name string
+ xmlBody string
+ wantURLs []*url.URL
+ wantErr bool
+ }{
+ {
+ name: "Valid XML with URLs",
+ xmlBody: `
+
+ - http://example.com
+
+ https://example.org
+
+ just some text
+ `,
+ wantURLs: []*url.URL{
+ {Scheme: "http", Host: "example.com"},
+ {Scheme: "https", Host: "example.org"},
+ },
+ wantErr: false,
+ },
+ {
+ name: "Empty XML",
+ xmlBody: ``,
+ wantURLs: nil,
+ wantErr: false,
+ },
+ {
+ name: "Invalid XML",
+ xmlBody: ``,
+ wantURLs: nil,
+ wantErr: true,
+ },
+ {
+ name: "XML with invalid URL",
+ xmlBody: `
+
+ - http://example.com
+ - not a valid url
+ `,
+ wantURLs: []*url.URL{
+ {Scheme: "http", Host: "example.com"},
+ },
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ resp := &http.Response{
+ Body: io.NopCloser(bytes.NewBufferString(tt.xmlBody)),
+ }
+
+ gotURLs, err := XML(resp)
+
+ if (err != nil) != tt.wantErr {
+ t.Errorf("XML() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+
+ if !compareURLs(gotURLs, tt.wantURLs) {
+ t.Errorf("XML() gotURLs = %v, want %v", gotURLs, tt.wantURLs)
+ }
+ })
+ }
+}
+
+func TestXMLBodyReadError(t *testing.T) {
+ resp := &http.Response{
+ Body: io.NopCloser(bytes.NewReader([]byte{})), // Empty reader to simulate EOF
+ }
+ resp.Body.Close() // Close the body to simulate a read error
+
+ _, err := XML(resp)
+ if err == nil {
+ t.Errorf("XML() expected error, got nil")
+ }
+}
+
+// compareURLs compares two slices of *url.URL
+func compareURLs(a, b []*url.URL) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if a[i].String() != b[i].String() {
+ return false
+ }
+ }
+ return true
+}