Documentation ¶
Index ¶
Examples ¶
Constants ¶
const ( LogLevelDebug logLevelType = iota + 1 LogLevelWarning LogLevelFatal )
const (
ParseHubBaseUrl = "https://www.parsehub.com/api/"
)
Variables ¶
This section is empty.
Functions ¶
Types ¶
type HandleRunFunc ¶
type ParseHub ¶
type ParseHub struct {
// contains filtered or unexported fields
}
ParseHub adapter
func NewParseHub ¶
Creates new ParseHub adapter with api key
func (*ParseHub) GetAllProjects ¶
This will return all of the projects in your account
Example ¶
Get all parsehub project
parsehub := NewParseHub("__API_KEY__") if projects, err := parsehub.GetAllProjects(); err != nil { // handle error } else { for _, project := range projects { fmt.Printf("%+v", project) } }
Output:
func (*ParseHub) GetProject ¶
This will return the project object wrapper for a specific project.
Params:
start_url (Optional) The url to start running on. Defaults to the project’s start_site.
start_template (Optional) The template to start running with. Defaults to the projects’s start_template (inside the options_json).
start_value_override (Optional) The starting global scope for this run. This can be used to pass parameters to your run. For example, you can pass {"query": "San Francisco"} to use the query somewhere in your run. Defaults to the project’s start_value.
send_email (Optional) If set to anything other than 0, send an email when the run either completes successfully or fails due to an error. Defaults to 0.
Example ¶
Get parsehub project
parsehub := NewParseHub("__API_KEY__") if project, err := parsehub.GetProject("__PROJECT_TOKEN__"); err != nil { // handle error } else { fmt.Printf("%+v", project) }
Output:
func (*ParseHub) GetRun ¶
This returns the run object wrapper for a given run token.
Example ¶
Get parsehub run
parsehub := NewParseHub("__API_KEY__") if run, err := parsehub.GetRun("__RUN_TOKEN__"); err != nil { // handle error } else { fmt.Printf("%+v", run) }
Output:
func (*ParseHub) LoadRunFromBytes ¶
Loads run from string Example from webhook post body
Example ¶
Load data from string
runJsonBytes := []byte("__RUN_FROM_WEBHOOK__") parsehub := NewParseHub("__API_KEY__") if run, err := parsehub.LoadRunFromBytes(runJsonBytes); err != nil { log.Fatalf(err.Error()) } else { val := map[string]interface{}{} if err := run.LoadData(&val); err != nil { log.Fatalf(err.Error()) } fmt.Println("result", val) // delete after extract data if err := run.Delete(); err != nil { log.Fatalf(err.Error()) } }
Output:
type Project ¶
type Project struct {
// contains filtered or unexported fields
}
ParseHub project Wrapper
func NewProject ¶
Creates new parsehub project wrapper
func (*Project) LoadLastReadyData ¶
This returns the data for the most recent ready run for a project. You can use this method in order to have a synchronous interface to your project.
func (*Project) Run ¶
func (p *Project) Run(params ProjectRunParams, handleFunc HandleRunFunc) (*Run, error)
This will start running an instance of the project on the ParseHub cloud. It will create a new run object. This method will return immediately, while the run continues in the background. You can use webhooks or polling to figure out when the data for this run is ready in order to retrieve it.
Params: start_url (Optional) The url to start running on. Defaults to the project’s start_site.
start_template (Optional) The template to start running with. Defaults to the projects’s start_template (inside the options_json).
start_value_override (Optional) The starting global scope for this run. This can be used to pass parameters to your run. For example, you can pass {"query": "San Francisco"} to use the query somewhere in your run. Defaults to the project’s start_value.
send_email (Optional) If set to anything other than 0, send an email when the run either completes successfully or fails due to an error. Defaults to 0.
Example ¶
Run parsehub project with params and handle data async with polling status
parsehub := NewParseHub("__API_KEY__") handleFunc := func(run *Run) error { val := map[string]interface{}{} if err := run.LoadData(&val); err != nil { log.Fatalf(err.Error()) } fmt.Println("result", val) // delete after extract data if err := run.Delete(); err != nil { log.Fatalf(err.Error()) } return nil } if project, err := parsehub.GetProject("__PROJECT_TOKEN__"); err != nil { // handle error } else { // async run project.Run(ProjectRunParams{ StartTemplate: "__START_TEMPLATE__", StartUrl: "__START_URL__", }, handleFunc) // or nil if you use webhooks } // your code
Output:
type ProjectResponse ¶
type ProjectResponse struct { // A globally unique id representing this project. Token string `json:"token"` // The title give by the user when creating this project. Title string `json:"title"` // The JSON-stringified representation of all the instructions for running this project. // This representation is not yet documented, but will eventually allow developers to create // plugins for ParseHub. TemplatesJSON string `json:"templates_json"` // The name of the template with which ParseHub should start executing the project. MainTemplate string `json:"main_template"` // The default URL at which ParseHub should start running the project. Main_site string `json:"main_site"` // An object containing several advanced options for the project. OptionsJSON string `json:"option_json"` // The run object of the most recently started run (orderd by start_time) for the project. LastRun *RunResponse `json:"last_run"` // The run object of the most recent ready run (ordered by start_time) for the project. A ready run is one // whose data_ready attribute is truthy. The last_run and last_ready_run for a project may be the same. LastReadyRun *RunResponse `json:"last_ready_run"` }
ParseHub Project
type ProjectRunParams ¶
type ProjectRunParams struct { StartUrl string StartTemplate string StartValueOverride map[string]interface{} SendEmail bool }
Project run params
type ProjectsResponse ¶
type ProjectsResponse struct {
Projects []*ProjectResponse `json:"projects"`
}
ParseHub Projects
type Run ¶
type Run struct {
// contains filtered or unexported fields
}
ParseHub Run Wrapper
func (*Run) Cancel ¶
This cancels a run and changes its status to cancelled. Any data that was extracted so far will be available.
func (*Run) LoadData ¶
This load the data that was extracted by a run.
Example ¶
Load run data
parsehub := NewParseHub("__API_KEY__") if run, err := parsehub.GetRun("__RUN_TOKEN__"); err != nil { // handle error } else { v := map[string]interface{}{} // example struct run.LoadData(v) // load data fmt.Printf("%+v", v) }
Output:
func (*Run) WatchAndHandle ¶
func (r *Run) WatchAndHandle()
Watch for complete run and handle if handler exist Use SetHandler() for handle run data
type RunResponse ¶
type RunResponse struct { // A globally unique id representing the project that this run belongs to. ProjectToken string `json:"project_token"` // A globally unique id representing this run. RunToken string `json:"run_token"` // The status of the run. It can be one of initialized, queued, running, cancelled, complete, or error. Status string `json:"status"` // Whether the data for this run is ready to download. If the status is complete, this will always be truthy. // If the status is cancelled or error, then this may be truthy or falsy, depending on whether any // data is available. DataReady uint8 `json:"data_ready"` // The time that this run was started at, in UTC +0000. StartTime string `json:"start_time"` // The time that this run was stopped. This field will be null if the run is either initialized or running. // Time is in UTC +0000. EndTime string `json:"end_time"` // The number of pages that have been traversed by this run so far. Pages int64 `json:"pages"` // The md5sum of the results. This can be used to check if any results data has changed between two runs. Md5sum string `json:"md5sum"` // The url that this run was started on. StartURL string `json:"start_url"` // The template that this run was started with. StartTemplate string `json:"start_template"` // The starting value of the global scope for this run. StartValue string `json:"start_value"` }
ParseHub Run