search_clients

package
v0.0.0-...-1a4fc59 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 29, 2023 License: MIT Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// Unique User-Agent header value for this client, see
	// https://en.wikipedia.org/api/rest_v1/#/ for more information.
	UserAgent = "CSXL/Solus/1.0.0"
)

Functions

func GoogleSearchResultsToJSON

func GoogleSearchResultsToJSON(results []*GoogleSearchResult) (string, error)

Types

type ErrorCallback

type ErrorCallback = colly.ErrorCallback

type GoogleSearchClient

type GoogleSearchClient struct {
	// contains filtered or unexported fields
}

func NewGoogleSearchClient

func NewGoogleSearchClient(ctx context.Context, apiKey string, googleSearchEngineID string) (*GoogleSearchClient, error)

func (*GoogleSearchClient) GetBasePath

func (gsc *GoogleSearchClient) GetBasePath() string

GetBasePath returns the base path of the GoogleSearchClient's HTTP client

func (*GoogleSearchClient) Search

func (gsc *GoogleSearchClient) Search(query string) ([]*GoogleSearchResult, error)

func (*GoogleSearchClient) SetBasePath

func (gsc *GoogleSearchClient) SetBasePath(basePath string)

SetBasePath sets the base path of the GoogleSearchClient's HTTP client

This is useful for unit testing.

type GoogleSearchResult

type GoogleSearchResult struct {
	Title    string
	Url      string
	Summary  string
	MIMEType string
}

type HTMLCallback

type HTMLCallback = colly.HTMLCallback

type HTMLElement

type HTMLElement = colly.HTMLElement

type Request

type Request = colly.Request

type RequestCallback

type RequestCallback = colly.RequestCallback

type ResponseCallback

type ResponseCallback = colly.ResponseCallback

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper wraps the Colly scraper

func NewScraper

func NewScraper() *Scraper

NewScraper creates a new Scraper

func NewScraperWithCache

func NewScraperWithCache(cacheDirectory string) *Scraper

NewScraperWithCache creates a new Scraper with a cache directory

func (*Scraper) Scrape

func (s *Scraper) Scrape(entryURL string, maxDepth int) ([]Website, error)

func (*Scraper) ScrapePage

func (s *Scraper) ScrapePage(url string) (*Website, error)

func (*Scraper) ScrapePages

func (s *Scraper) ScrapePages(urls []string) ([]Website, error)

type SearchClientConfig

type SearchClientConfig struct {
	GoogleSearchAPIKey   string
	GoogleSearchEngineID string
}

func NewSearchClientConfig

func NewSearchClientConfig(googleSearchAPIKey string, googleSearchEngineID string) *SearchClientConfig

func (*SearchClientConfig) GetGoogleSearchAPIKey

func (sc *SearchClientConfig) GetGoogleSearchAPIKey() string

func (*SearchClientConfig) GetGoogleSearchEngineID

func (sc *SearchClientConfig) GetGoogleSearchEngineID() string

type Website

type Website struct {
	Title       string
	URL         string
	Links       []string
	MIME        string
	HTMLContent string
	TextContent string
}

func (*Website) GetHTMLContent

func (w *Website) GetHTMLContent() string
func (w *Website) GetLinks() []string

func (*Website) GetMIME

func (w *Website) GetMIME() string

func (*Website) GetTextContent

func (w *Website) GetTextContent() string

func (*Website) GetTitle

func (w *Website) GetTitle() string

func (*Website) GetURL

func (w *Website) GetURL() string

type WikipediaClient

type WikipediaClient struct {
	UserAgent string
	// contains filtered or unexported fields
}

func NewWikipediaClient

func NewWikipediaClient(ctx context.Context) (*WikipediaClient, error)

func (*WikipediaClient) GetPage

func (c *WikipediaClient) GetPage(pageTitle string) (string, error)

func (*WikipediaClient) GetPageSummary

func (c *WikipediaClient) GetPageSummary(pageTitle string) (string, error)

GetPageSummary returns the summary of a Wikipedia page. See https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bextracts for more information.

func (*WikipediaClient) Search

Search performs a search on Wikipedia and returns a list of results. See https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bsearch for more information.

type WikipediaParseResult

type WikipediaParseResult struct {
	Data string `json:"*"`
}

type WikipediaQuerySearchResult

type WikipediaQuerySearchResult struct {
	Title  string `json:"title"`
	Url    string `json:"url"`
	PageID int    `json:"pageid"`
	Size   int    `json:"size"`
	// A snippet includes html higlighting matching text on the page from the search query.
	// Example:
	// <span class=\"searchmatch\">Computing</span> is any goal-oriented activity requiring, benefiting from...
	Snippet string `json:"snippet"`
}

type WikipediaQuerySummary

type WikipediaQuerySummary struct {
	Title   string `json:"title"`
	Summary string `json:"extract"`
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL