gf_crawl_lib

package
v0.0.0-...-330f67a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 17, 2024 License: GPL-2.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Init

func Init(pConfig *GFcrawlerConfig,
	pMediaDomainStr string,
	pTemplatesPathsMap map[string]string,

	pEsearchClient *elastic.Client,
	pHTTPmux *http.ServeMux,
	pRuntimeSys *gf_core.RuntimeSys) *gf_core.GFerror

func RunCrawlerCycle

func RunCrawlerCycle(pCrawler gf_crawl_core.GFcrawlerDef,
	pImagesLocalDirPathStr string,
	pMediaDomainStr string,
	pS3bucketNameStr string,
	pUserID gf_core.GF_ID,
	pRuntime *gf_crawl_core.GFcrawlerRuntime,
	pRuntimeSys *gf_core.RuntimeSys) *gf_core.GFerror

Types

type GFcrawler

type GFcrawler struct {
	Name_str    string
	StartURLstr string
}

type GFcrawlerConfig

type GFcrawlerConfig struct {
	Crawled_images_s3_bucket_name_str string
	Images_s3_bucket_name_str         string
	Images_local_dir_path_str         string
	Cluster_node_type_str             string
	Crawl_config_file_path_str        string
	ImagesUseNewStorageEngineBool     bool
}

type GFcrawlerCycleRun

type GFcrawlerCycleRun struct {
	Id                   primitive.ObjectID `bson:"_id,omitempty"`
	Id_str               string             `bson:"id_str"`
	T_str                string             `bson:"t"` //"crawler_cycle_run"
	Creation_unix_time_f float64            `bson:"creation_unix_time_f"`
	Crawler_name_str     string             `bson:"crawler_name_str"`
	Target_domain_str    string             `bson:"targit_domain_str"`
	Target_url_str       string             `bson:"target_url_str"`
	Start_time_f         float64            `bson:"start_time_f"`
	End_time_f           float64            `bson:"end_time_f"`
}

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL