gf_crawl_stats

package
v0.0.0-...-a1b0e2b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 26, 2024 License: GPL-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Get_query_funs

func Get_query_funs(pRuntimeSys *gf_core.RuntimeSys) map[string]func(*gf_core.RuntimeSys) (map[string]interface{}, *gf_core.GFerror)

Types

type Gf_domain_counts_for_all_days

type Gf_domain_counts_for_all_days struct {
	Domain_str      string `json:"domain_str"      bson:"domain_str"`
	Total_count_int int    `json:"total_count_int" bson:"total_count_int"`
	Days_counts_lst []int  `json:"days_counts_lst" bson:"days_counts_lst"`
}

type Gf_stat__crawled_gifs

type Gf_stat__crawled_gifs struct {
	Domain_str             string                   `bson:"_id"                    json:"domain_str"`
	Imgs_count_int         int                      `bson:"imgs_count_int"         json:"imgs_count_int"`
	Urls_by_origin_url_lst []map[string]interface{} `bson:"urls_by_origin_url_lst" json:"urls_by_origin_url_lst"`
}

type Gf_stat__crawled_images_domain

type Gf_stat__crawled_images_domain struct {
	Domain_str              string    `bson:"_id"                     json:"domain_str"`
	Imgs_count_int          int       `bson:"imgs_count_int"          json:"imgs_count_int"`
	Creation_unix_times_lst []float64 `bson:"creation_unix_times_lst" json:"creation_unix_times_lst"`
	Urls_lst                []string  `bson:"urls_lst"                json:"urls_lst"`
	Origin_urls_lst         []string  `bson:"origin_urls_lst"         json:"origin_urls_lst"`
	Downloaded_lst          []bool    `bson:"downloaded_lst"          json:"downloaded_lst"`
	Valid_for_usage_lst     []bool    `bson:"valid_for_usage_lst"     json:"valid_for_usage_lst"`
	S3_stored_lst           []bool    `bson:"s3_stored_lst"           json:"s3_stored_lst"`
}
type Gf_stat__crawled_links_domain struct {
	Domain_str              string    `bson:"_id"                     json:"domain_str"`
	Links_count_int         int       `bson:"links_count_int"         json:"links_count_int"`
	Creation_unix_times_lst []float64 `bson:"creation_unix_times_lst" json:"creation_unix_times_lst"`
	A_href_lst              []string  `bson:"a_href_lst"              json:"a_href_lst"`
	Origin_urls_lst         []string  `bson:"origin_urls_lst"         json:"origin_urls_lst"`
	Valid_for_crawl_lst     []bool    `bson:"valid_for_crawl_lst"     json:"valid_for_crawl_lst"`  //if the link is to be crawled/followed, or should be ignored
	Fetched_lst             []bool    `bson:"fetched_lst"             json:"fetched_lst"`          //if the link's HTML was downloaded
	Images_processed_lst    []bool    `bson:"images_processed_lst"    json:"images_processed_lst"` //if the images of this links HTML page were downloaded/processed
}

type Gf_stat__crawled_url_fetches

type Gf_stat__crawled_url_fetches struct {
	Url_str         string    `bson:"_id"             json:"url_str"`
	Count_int       int       `bson:"count_int"       json:"count_int"`
	Start_times_lst []float64 `bson:"start_times_lst" json:"start_times_lst"`
}

type Gf_stat__error_type

type Gf_stat__error_type struct {
	Type_str  string   `bson:"type_str"  json:"type_str"`
	Count_int int      `bson:"count_int" json:"count_int"`
	Urls_lst  []string `bson:"urls_lst"  json:"urls_lst"`
}

type Gf_stat__errors

type Gf_stat__errors struct {
	Crawler_name_str string                `bson:"_id"              json:"crawler_name_str"`
	Errors_types_lst []Gf_stat__error_type `bson:"errors_types_lst" json:"errors_types_lst"`
}
type Gf_stat__links_in_day struct {
	Total_count_int           int `bson:"total_count_int"           json:"total_count_int"`
	Valid_for_crawl_total_int int `bson:"valid_for_crawl_total_int" json:"valid_for_crawl_total_int"`
	Fetched_total_int         int `bson:"fetched_total_int"         json:"fetched_total_int"`
}
type Gf_stat__unresolved_links struct {
	Origin_domain_str             string     `bson:"_id"                           json:"origin_domain_str"`
	Origin_urls_lst               []string   `bson:"origin_urls_lst"               json:"origin_urls_lst"`
	Counts__from_origin_urls_lst  []int      `bson:"counts__from_origin_urls_lst"  json:"counts__from_origin_urls_lst"`
	A_hrefs__from_origin_urls_lst [][]string `bson:"a_hrefs__from_origin_urls_lst" json:"a_hrefs__from_origin_urls_lst"`
}

type Gf_stats__objs_by_days

type Gf_stats__objs_by_days struct {
	Obj_type_str                     string                           `json:"obj_type_str"                     bson:"obj_type_str"`
	Counts_by_day__sorted_lst        []int                            `json:"counts_by_day__sorted_lst"        bson:"counts_by_day__sorted_lst"`        //global count of fetches per day
	Domain_counts_by_day__sorted_lst []*Gf_domain_counts_for_all_days `json:"domain_counts_by_day__sorted_lst" bson:"domain_counts_by_day__sorted_lst"` //counts of fetches per domain per day
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL