package utils import ( "os" "strings" "golang.org/x/net/html" ) // ExtractHrefs 从HTML文件中提取所有rel="prefetch"的link标签的href属性 func ExtractHrefs(filename string) ([]string, error) { file, err := os.Open(filename) if err != nil { return nil, err } defer file.Close() doc, err := html.Parse(file) if err != nil { return nil, err } var hrefs []string var f func(*html.Node) f = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "link" { var href, rel string // 获取href和rel属性 for _, a := range n.Attr { switch a.Key { case "href": href = a.Val case "rel": rel = a.Val } } // 如果是prefetch链接且href包含.html,添加到结果中 if rel == "prefetch" && href != "" && strings.Contains(href, ".html") { hrefs = append(hrefs, href) } } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c) } } f(doc) // 去重 return removeDuplicates(hrefs), nil } // removeDuplicates 移除重复的URL func removeDuplicates(urls []string) []string { seen := make(map[string]bool) result := []string{} for _, url := range urls { if !seen[url] { seen[url] = true result = append(result, url) } } return result }