Documentation ¶
Index ¶
- func Get_query_funs(pRuntimeSys *gf_core.RuntimeSys) ...
- type Gf_domain_counts_for_all_days
- type Gf_stat__crawled_gifs
- type Gf_stat__crawled_images_domain
- type Gf_stat__crawled_links_domain
- type Gf_stat__crawled_url_fetches
- type Gf_stat__error_type
- type Gf_stat__errors
- type Gf_stat__links_in_day
- type Gf_stat__unresolved_links
- type Gf_stats__objs_by_days
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func Get_query_funs ¶
func Get_query_funs(pRuntimeSys *gf_core.RuntimeSys) map[string]func(*gf_core.RuntimeSys) (map[string]interface{}, *gf_core.GFerror)
Types ¶
type Gf_stat__crawled_gifs ¶
type Gf_stat__crawled_images_domain ¶
type Gf_stat__crawled_images_domain struct { Domain_str string `bson:"_id" json:"domain_str"` Imgs_count_int int `bson:"imgs_count_int" json:"imgs_count_int"` Creation_unix_times_lst []float64 `bson:"creation_unix_times_lst" json:"creation_unix_times_lst"` Urls_lst []string `bson:"urls_lst" json:"urls_lst"` Origin_urls_lst []string `bson:"origin_urls_lst" json:"origin_urls_lst"` Downloaded_lst []bool `bson:"downloaded_lst" json:"downloaded_lst"` Valid_for_usage_lst []bool `bson:"valid_for_usage_lst" json:"valid_for_usage_lst"` S3_stored_lst []bool `bson:"s3_stored_lst" json:"s3_stored_lst"` }
type Gf_stat__crawled_links_domain ¶
type Gf_stat__crawled_links_domain struct { Domain_str string `bson:"_id" json:"domain_str"` Links_count_int int `bson:"links_count_int" json:"links_count_int"` Creation_unix_times_lst []float64 `bson:"creation_unix_times_lst" json:"creation_unix_times_lst"` A_href_lst []string `bson:"a_href_lst" json:"a_href_lst"` Origin_urls_lst []string `bson:"origin_urls_lst" json:"origin_urls_lst"` Valid_for_crawl_lst []bool `bson:"valid_for_crawl_lst" json:"valid_for_crawl_lst"` //if the link is to be crawled/followed, or should be ignored Fetched_lst []bool `bson:"fetched_lst" json:"fetched_lst"` //if the link's HTML was downloaded Images_processed_lst []bool `bson:"images_processed_lst" json:"images_processed_lst"` //if the images of this links HTML page were downloaded/processed }
type Gf_stat__error_type ¶
type Gf_stat__errors ¶
type Gf_stat__errors struct { Crawler_name_str string `bson:"_id" json:"crawler_name_str"` Errors_types_lst []Gf_stat__error_type `bson:"errors_types_lst" json:"errors_types_lst"` }
type Gf_stat__links_in_day ¶
type Gf_stat__unresolved_links ¶
type Gf_stat__unresolved_links struct { Origin_domain_str string `bson:"_id" json:"origin_domain_str"` Origin_urls_lst []string `bson:"origin_urls_lst" json:"origin_urls_lst"` Counts__from_origin_urls_lst []int `bson:"counts__from_origin_urls_lst" json:"counts__from_origin_urls_lst"` A_hrefs__from_origin_urls_lst [][]string `bson:"a_hrefs__from_origin_urls_lst" json:"a_hrefs__from_origin_urls_lst"` }
type Gf_stats__objs_by_days ¶
type Gf_stats__objs_by_days struct { Obj_type_str string `json:"obj_type_str" bson:"obj_type_str"` Counts_by_day__sorted_lst []int `json:"counts_by_day__sorted_lst" bson:"counts_by_day__sorted_lst"` //global count of fetches per day Domain_counts_by_day__sorted_lst []*Gf_domain_counts_for_all_days `json:"domain_counts_by_day__sorted_lst" bson:"domain_counts_by_day__sorted_lst"` //counts of fetches per domain per day }
Click to show internal directories.
Click to hide internal directories.