Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var LinkScanner = felix.ScanFunc(func(ctx context.Context, r io.Reader, e felix.Emitter) error { doc, err := goquery.NewDocumentFromReader(r) if err != nil { return errors.Wrap(err, "could not read HTML document") } foundURLs := make(map[string]bool) doc.Find("a").Each(func(index int, item *goquery.Selection) { if href, ok := item.Attr("href"); ok && !foundURLs[href] { title := item.Text() if strings.TrimSpace(title) == "" { title = href } foundURLs[href] = true e.EmitLink(felix.Link{ Title: title, URL: href, }) } }) if s, err := doc.Html(); err == nil { urls := urlPattern.FindAllString(s, -1) for _, u := range urls { if !foundURLs[u] { foundURLs[u] = true e.EmitLink(felix.Link{ Title: u, URL: u, }) } } } return nil })
LinkScanner parses r as an HTML document and extracts all links. Links are uniquely identified by the links URL. Multiple instances of the same URL (e.g. href), will only be reported once (i.e. the first found instance).
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.