crawler

package
v0.0.0-...-aa01819 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 5, 2023 License: Apache-2.0 Imports: 20 Imported by: 0

Documentation

Overview

Copyright 2021 Wei (Sam) Wang <sam.wang.0723@gmail.com>

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Copyright 2021 Wei (Sam) Wang <sam.wang.0723@gmail.com>

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Copyright 2021 Wei (Sam) Wang <sam.wang.0723@gmail.com>

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Copyright 2021 Wei (Sam) Wang <sam.wang.0723@gmail.com>

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Copyright 2021 Wei (Sam) Wang <sam.wang.0723@gmail.com>

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Index

Constants

View Source
const (
	TwseDailyClose   = "https://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=%s&type=ALLBUT0999"
	TwseThreePrimary = "https://www.twse.com.tw/rwd/zh/fund/T86?response=csv&date=%s&selectType=ALLBUT0999"
	TpexDailyClose   = "https://www.tpex.org.tw/web/stock/aftertrading/daily_close_quotes/stk_quote_download.php?l=zh-tw&d=%s&s=0,asc,0"
	TpexThreePrimary = "https://www.tpex.org.tw/web/stock/3insti/daily_trade/3itrade_hedge_result.php?l=zh-tw&o=csv&se=EW&t=D&d=%s"
	TWSEStocks       = "https://isin.twse.com.tw/isin/C_public.jsp?strMode=2"
	TPEXStocks       = "https://isin.twse.com.tw/isin/C_public.jsp?strMode=4"
	// backup: stockchannelnew.sinotrade.com.tw
	ConcentrationDays = "https://fubon-ebrokerdj.fbs.com.tw/z/zc/zco/zco_%s_%d.djhtm"
)
View Source
const (
	WebScraping    = "WEB_SCRAPING"
	WebScrapingUrl = "https://api.webscrapingapi.com/v1?api_key=%s"
	ProxyCrawl     = "PROXY_CRAWL"
	ProxyCrawlUrl  = "https://api.crawlbase.com/?token=%s"
)

Variables

View Source
var (
	DefaultHTTPClient = &http.Client{
		Timeout: defaultHTTPTimeout,
		Transport: &http.Transport{
			TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
		},
	}

	TypeLinkMapping = map[string]string{
		convert.TpexStockList.String():      TPEXStocks,
		convert.TwseStockList.String():      TWSEStocks,
		convert.TwseDailyClose.String():     TwseDailyClose,
		convert.TpexDailyClose.String():     TpexDailyClose,
		convert.TwseThreePrimary.String():   TwseThreePrimary,
		convert.TpexThreePrimary.String():   TpexThreePrimary,
		convert.StakeConcentration.String(): ConcentrationDays,
	}
)

Functions

This section is empty.

Types

type Config

type Config struct {
	// A URLGetter instance to fetch links.
	URLGetter URLGetter

	// A Proxy instance for avoiding remote rate limiting
	Proxy *Proxy

	// The number of concurrent workers used for retrieving links.
	FetchWorkers int

	// Rate limit interval to prevent remote site blocking
	RateLimitInterval int64

	Logger *zerolog.Logger
}

type Crawler

type Crawler interface {
	Crawl(ctx context.Context, linkIt graph.LinkIterator, interceptChan ...chan convert.InterceptData) (int, error)
}

func New

func New(cfg Config) Crawler

type Proxy

type Proxy struct {
	Type string
}

func (*Proxy) URI

func (p *Proxy) URI(source string) string

type URLGetter

type URLGetter interface {
	Do(req *http.Request) (*http.Response, error)
}

URLGetter is implemented by objects that can perform HTTP GET requests.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL