golgoquery

package
v0.0.0-...-1bfd785 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 2, 2023 License: MIT Imports: 10 Imported by: 0

README

golgoquery

First Example

{
  "uriflows": [
    {
      "uri": "http://example.com/",
      "queryflows": [
        {
          "queries": [
            {
              "selector": ["div", "p"],
              "attribute": "text",
              "action": "Echo"
            },
            {
              "selector": ["div", "a"],
              "attribute": "href",
              "action": "debug"
            }
          ]
        }
      ]
    }
  ]
}
  • fetchs "uri"
  • picks every query from "queryflows"
  • first "query", pulls innerHTML text from "div.p" selector element; passes list of results to Method mapped to "Dummy"
  • then next "query", pulls href from "div.a" selector element; passes list of results to Method mapped to "Dummy"

Second Example

{
  "uriflows": [
    {
      "uri": "http://example.com/",
      "queryflows": [
        {
          "queries": [
            {
              "selector": ["div", "p"],
              "attribute": "text",
              "action": "Echo"
            },
            {
              "selector": ["div", "a"],
              "attribute": "href",
            }
          ],
          "action": "debug"
        }
      ]
    }
  ]
}
  • fetchs "uri"
  • processes queries in queryflows, but since just one "query" has an action, action will be triggered only in that query processing
  • after all "queries" processed for "queryflows", since "queryflows" has an action... it will be called with list of all results and index "1"

NOTE:

  • if no action is passed, "debug" action will be called for it printing the result unless action name has been mentioned as "~"

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ReloadCache bool
	CacheDir    = "/tmp/.tune.cli"
	UserAgent   = "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0"
)
View Source
var (
	CacheGoquery bool
)
View Source
var (
	LocalActionMap = map[string]Action{
		"~":     Skip,
		"debug": Debug,
	}
)

local actionmap

Functions

func CacheUrl

func CacheUrl(url string) (*goquery.Document, error)

func Goquery

func Goquery(url string, goquerySelector string) (domNode *goquery.Selection)

func GoqueryDocument

func GoqueryDocument(url string) *goquery.Document

func GoqueryFromDocument

func GoqueryFromDocument(doc *goquery.Document, goquerySelector string) (domNode *goquery.Selection)

func GoqueryFromNode

func GoqueryFromNode(node *goquery.Selection, goquerySelector string) (domNode *goquery.Selection)

Types

type Action

type Action func([]GoqueryResults, int) ([]GoqueryResults, error)

Action type is for the actual functions that will process query's results and return same or processed result. It gets passed list of GoqueryResults for a queryflow, and index of current query. Index is passed as '-1' if Action is used at parent (QueryFlow) layer post processing all queries.

type DOMSelection

type DOMSelection *goquery.Document

type GoqueryResults

type GoqueryResults struct {
	Results []string
}

func Debug

func Debug(resultsList []GoqueryResults, idx int) ([]GoqueryResults, error)

Debug is a sample built-in action. That can be passed as action for query/queries to print results during development phase.

func GoqueryAttrsFromParents

func GoqueryAttrsFromParents(url string, selectors []string, attr string) (results GoqueryResults, err error)

func GoqueryHrefsFrom

func GoqueryHrefsFrom(url string, goquerySelector string) (results GoqueryResults, err error)

func GoqueryHrefsFromParents

func GoqueryHrefsFromParents(url string, selectors []string) (results GoqueryResults, err error)

func GoqueryTextFrom

func GoqueryTextFrom(url string, goquerySelector string) (results GoqueryResults, err error)

func GoqueryTextFromParents

func GoqueryTextFromParents(url string, selectors []string) (results GoqueryResults, err error)

func Skip

func Skip(resultsList []GoqueryResults, idx int) ([]GoqueryResults, error)

Skip is a built-in action for cases when no action on query results is desried to be handled by golgoquery.

func (*GoqueryResults) GoqueryAttrsFrom

func (results *GoqueryResults) GoqueryAttrsFrom(domNodes *goquery.Selection, attr string) (err error)

type Query

type Query struct {
	Selector   []string `json:"selector"`
	Attribute  string   `json:"attribute"`
	ActionName string   `json:"action"`
}

type QueryDSL

type QueryDSL struct {
	URIFlows [](*URIFlow) `json:"uriflows"`
}

func FromFile

func FromFile(path string, actionmap map[string]Action) (QueryDSL, error)

FromFile can be passed file with JSON blob representing QueryDSL and map of action name to function, to process the DSL.

func FromJson

func FromJson(jsonBlob []byte, actionmap map[string]Action) (qDSL QueryDSL, err error)

FromJson can be passed QueryDSL representing JSON blob and map of action name to function, to process the DSL.

func (*QueryDSL) Proc

func (queryDSL *QueryDSL) Proc(actionmap map[string]Action)

QueryDSL.Proc processes QueryDSL DSL block.

type QueryFlow

type QueryFlow struct {
	ResultsList []GoqueryResults `json:"content"`
	Queries     [](*Query)       `json:"queries"`
	ActionName  string           `json:"action"`
}

func (*QueryFlow) Proc

func (queryFlow *QueryFlow) Proc(actionmap map[string]Action, uri URI)

QueryFlow.Proc processes QueryFlow DSL block.

type URI

type URI string

type URIFlow

type URIFlow struct {
	URI        URI            `json:"uri"`
	QueryFlows [](*QueryFlow) `json:"queryflows"`
}

func (*URIFlow) Proc

func (uriFlow *URIFlow) Proc(actionmap map[string]Action)

URIFlow.Proc processes URIFlow DSL block.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL