regexrover

package
v1.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 16, 2024 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AppendFileLines

func AppendFileLines(lines []string, path string) error

func PrintConfig

func PrintConfig(config Config)

func ReadFileLines

func ReadFileLines(path string) ([]string, error)

func SearchAllKeywords

func SearchAllKeywords(keywords []string, config Config) []string

SearchAllKeywords take each keyword (can be multi word) and perform GoogleSearch retrieving all results that Google Custom Search returns

Types

type CSVWriter

type CSVWriter struct {
	// contains filtered or unexported fields
}

func (*CSVWriter) Close

func (w *CSVWriter) Close() error

func (*CSVWriter) Init

func (w *CSVWriter) Init(cacheMaxSize int) error

func (*CSVWriter) WriteAll

func (w *CSVWriter) WriteAll(records [][]string) error

func (*CSVWriter) WriteWithCache

func (w *CSVWriter) WriteWithCache(key string, value string, forceWrite bool) error

type Config

type Config struct {
	ApiKey                string
	Cx                    string
	OutputDriver          string
	KeywordsFile          string
	CrawlIgnoreDomains    []string
	CrawlAllowedUrlsRegex string
	CrawlUserAgent        string
	CrawlTag              string
	CrawlMatchRegex       string
	CrawlUrlsFile         string
	CrawlCacheDir         string
	CrawlDepth            int
	CrawlThreads          int
	CrawlLog              bool
	MatchOutputChunks     int
	MaxSearchResults      int
}

func ReadEnvConfig

func ReadEnvConfig() (Config, error)

type Crawler

type Crawler struct {
	// contains filtered or unexported fields
}

func NewCrawler

func NewCrawler(config Config, writer Writer) (*Crawler, error)

func (*Crawler) GetStats

func (crawler *Crawler) GetStats() CrawlerStats

func (*Crawler) QueueAdd

func (crawler *Crawler) QueueAdd(crawlUrls []string) (int, error)

func (*Crawler) Run

func (crawler *Crawler) Run() error

func (*Crawler) Stop

func (crawler *Crawler) Stop()

type CrawlerStats

type CrawlerStats struct {
	// contains filtered or unexported fields
}

type SQLiteWriter

type SQLiteWriter struct {
	// contains filtered or unexported fields
}

func (*SQLiteWriter) Close

func (w *SQLiteWriter) Close() error

func (*SQLiteWriter) Init

func (w *SQLiteWriter) Init(cacheMaxSize int) error

func (*SQLiteWriter) WriteAll

func (w *SQLiteWriter) WriteAll(records [][]string) error

func (*SQLiteWriter) WriteWithCache

func (w *SQLiteWriter) WriteWithCache(key string, value string, forceWrite bool) error

type SearchResults

type SearchResults struct {
	Links []string
	// contains filtered or unexported fields
}

func GoogleSearch

func GoogleSearch(query string, apiKey string, cx string, startIndex int) (SearchResults, error)

GoogleSearch returns a SearchResults struct with Links as a list of links found

type Writer

type Writer interface {
	WriterInit
	WriteWithCache(key string, value string, forceWrite bool) error
	WriteAll(records [][]string) error
	Close() error
}

func NewWriter

func NewWriter(writerType string, cacheMaxSize int) (Writer, error)

type WriterInit

type WriterInit interface {
	Init(cacheMaxSize int) error
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL