type Sitemap struct {
URLs []LocUrl `xml:"url"`
Sitemap []LocUrl `xml:"sitemap"`
}
type LocUrl struct {
Loc string `xml:"loc"`
}
sitemap := Sitemap{}
if err := xml.NewDecoder(strings.NewReader(resp.ToText())).Decode(&sitemap); err != nil {
return result, errors.Wrap(err, "could not decode xml")
}
for _, v := range sitemap.URLs {
url, err := urllib.GetURL(regexp.MustCompile(`(/.+)`).FindString(strings.Trim(v.Loc, " \t\n")), *navRequest.URL)
if err != nil {
continue
}
request := parse.GetRequest(enums.GET, url)
request.Source = enums.FromSitemap
_ = callback(request)
result = append(result, request)
}
for _, v := range sitemap.Sitemap {
url, err := urllib.GetURL(regexp.MustCompile(`(/.+)`).FindString(strings.Trim(v.Loc, " \t\n")), *navRequest.URL)
if err != nil {
continue
}
request := parse.GetRequest(enums.GET, url)
request.Source = enums.FromSitemap
_ = callback(request)
result = append(result, request)
}
return result, nil
增加两个结构体用于sitemap.xml内容解析
之后在获取的返回包body后