scrape

package module
v0.1.17 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 15, 2021 License: Apache-2.0 Imports: 24 Imported by: 1

README

go-scrape

    DefaultOutputPath = "video"

    //如果需要使用代理请注册代理地址
    //RegisterProxy("https://localhost:10808")
    //RegisterProxy("http://localhost:10808")
	e := RegisterProxy("socks5://localhost:10808")
	if e != nil {
		return
    }
    //创建搜刮器
	grab2 := NewGrabJavbus()
	grab3 := NewGrabJavdb()
	scrape := NewScrape(GrabOption(grab2), GrabOption(grab3), OptimizeOption(true))

    //需要查找的番号:多次或单次调用皆可
	e = scrape.Find("abp-891")
	e = scrape.Find("abp-892")

    //遍历结果
	e = scrape.Range(func(key string, content Content) error {
		t.Log("key", key, "info", content)
		return nil
	})
	//或输出到DefaultOutputPath
	e = scrape.Output()

Documentation

Index

Constants

View Source
const DefaulBp4xMainPage = "https://www.bp4x.com"

DefaulBp4xMainPage ...

View Source
const DefaultJavbusMainPage = "https://www.javbus.com"

DefaultJavbusMainPage ...

View Source
const DefaultJavdbMainPage = "https://javdb7.com"

DefaultJavdbMainPage ...

Variables

View Source
var DefaultCachePath = "tmp"

DefaultCachePath ...

View Source
var DefaultInfoName = ".info"

DefaultInfoName ...

View Source
var DefaultOutputPath = "image"

DefaultOutputPath ...

Functions

func DebugOn

func DebugOn()

DebugOn ...

func Ext added in v0.0.8

func Ext(source string) string

Ext ...

func Hash added in v0.0.8

func Hash(url string) string

Hash ...

func RegisterProxy added in v0.0.8

func RegisterProxy(addr string, args ...ProxyArgs) (e error)

RegisterProxy ...

func TrimEnd added in v0.0.8

func TrimEnd(source string) string

TrimEnd ...

Types

type AnalyzeLanguageFunc

type AnalyzeLanguageFunc func(selection *goquery.Selection, detail *javbusSearchDetail) (e error)

AnalyzeLanguageFunc ...

type Cache added in v0.0.8

type Cache struct {
	// contains filtered or unexported fields
}

Cache ...

func NewCache added in v0.0.11

func NewCache() *Cache

NewCache ...

func (*Cache) BaseQuery added in v0.1.6

func (c *Cache) BaseQuery(url string) (*goquery.Document, error)

func (*Cache) ForceGet added in v0.1.5

func (c *Cache) ForceGet(url string) (bys []byte, e error)

func (*Cache) ForceQuery added in v0.1.6

func (c *Cache) ForceQuery(url string) (*goquery.Document, error)

func (*Cache) Get added in v0.0.8

func (c *Cache) Get(url string) (bys []byte, e error)

Get ...

func (*Cache) GetBytes added in v0.0.8

func (c *Cache) GetBytes(url string) ([]byte, error)

GetBytes ...

func (*Cache) GetReader added in v0.0.8

func (c *Cache) GetReader(url string, force bool) (io.Reader, error)

GetReader ...

func (*Cache) Query added in v0.0.8

func (c *Cache) Query(url string, force bool) (*goquery.Document, error)

Query ...

func (*Cache) Save added in v0.0.8

func (c *Cache) Save(url, to string) (e error)

Save ...

type Content

type Content struct {
	From          string //where this
	Language      string
	Uncensored    bool
	ID            string
	Title         string
	OriginalTitle string
	Year          string
	ReleaseDate   time.Time
	Studio        string
	Director      string
	MovieSet      string
	Plot          string
	Genres        []*Genre
	Actors        []*Star
	Poster        string
	Thumb         string
	Sample        []*Sample
	Publisher     string
}

Content ...

func MergeOptimize added in v0.0.8

func MergeOptimize(id string, contents []*Content) *Content

MergeOptimize ...

type Genre

type Genre struct {
	URL     string
	Content string
}

Genre ...

type GrabBp4xOptions

type GrabBp4xOptions func(javbus *grabBp4x)

GrabBp4xOptions ...

func GrabBp4xTypeOption

func GrabBp4xTypeOption(grabType GrabBp4xType) GrabBp4xOptions

GrabBp4xTypeOption ...

type GrabBp4xType

type GrabBp4xType int

GrabBp4xType ...

const (
	BP4XTypeJAV GrabBp4xType = iota
	BP4XTypeAMATEUR
	BP4XTypeIV
	BP4XTypeHENTAI
)

BP4XTypeJAV ...

type GrabJavbusOptions

type GrabJavbusOptions func(javbus *grabJavbus)

GrabJavbusOptions ...

func JavbusExact

func JavbusExact(b bool) GrabJavbusOptions

JavbusExact ...

func JavbusLang

func JavbusLang(language GrabLanguage) GrabJavbusOptions

JavbusLang ...

type GrabJavdbOptions

type GrabJavdbOptions func(javdb *grabJavdb)

GrabJavdbOptions ...

func JavdbExact

func JavdbExact(b bool) GrabJavdbOptions

JavdbExact ...

type GrabLanguage

type GrabLanguage int

GrabLanguage ...

const (
	LanguageEnglish GrabLanguage = iota
	LanguageJapanese
	LanguageChineseSimple
	LanguageChineseTraditional
	LanguageKorea
)

GrabLanguage detail ...

func (GrabLanguage) String added in v0.1.1

func (g GrabLanguage) String() string

type IGrab

type IGrab interface {
	MainPage(url string)
	SetSample(bool)
	SetExact(bool)
	SetLanguage(language GrabLanguage)
	Name() string
	Find(string) (IGrab, error)
	HasNext() bool
	Next() (IGrab, error)
	Result() ([]Content, error)
	SetForce(force bool)
}

IGrab ...

func NewGrabBp4x

func NewGrabBp4x(ops ...GrabBp4xOptions) IGrab

NewGrabBp4x ...

func NewGrabJavbus

func NewGrabJavbus(ops ...GrabJavbusOptions) IGrab

NewGrabJavbus ...

func NewGrabJavdb

func NewGrabJavdb(ops ...GrabJavdbOptions) IGrab

NewGrabJavdb ...

type IScrape

type IScrape interface {
	Cache() *Cache
	Force(b bool)
	IsGrabSample() (b bool)
	Find(name string) (e error)
	Clear()
	Range(rangeFunc RangeFunc) error
	OutputCallback(f func(key string, content Content) *OutputInfo) []OutputInfo
	Output() error
}

IScrape ...

func NewScrape

func NewScrape(opts ...Options) IScrape

NewScrape ...

type Options added in v0.0.8

type Options func(impl *scrapeImpl)

Options ...

func CacheOption added in v0.0.8

func CacheOption(cache *Cache) Options

CacheOption ...

func ExactOption added in v0.0.8

func ExactOption(b bool) Options

ExactOption ...

func ForceOption added in v0.1.6

func ForceOption(b bool) Options

func GrabOption added in v0.0.8

func GrabOption(grab IGrab) Options

GrabOption ...

func SampleOption added in v0.0.8

func SampleOption(b bool) Options

SampleOption ...

type OutputInfo added in v0.1.9

type OutputInfo struct {
	Name        string
	Skip        bool
	Force       bool
	OutputPath  string
	CopyInfo    bool
	InfoPath    string
	InfoName    string
	CopyPoster  bool
	PosterPath  string
	PosterName  string
	CopyThumb   bool
	ThumbPath   string
	ThumbName   string
	CopySample  bool
	SamplePath  string
	SampleName  string
	SampleFiles []string
	ImagePath   string
	InfoExt     string
}

func DefaultOutputOption added in v0.1.3

func DefaultOutputOption() *OutputInfo

type ProxyArgs added in v0.0.8

type ProxyArgs func(cli *http.Client)

ProxyArgs ...

func TimeOut added in v0.0.8

func TimeOut(sec int) ProxyArgs

TimeOut ...

type RangeFunc added in v0.0.8

type RangeFunc func(key string, content Content) error

RangeFunc ...

type Sample

type Sample struct {
	Index int
	Thumb string
	Image string
	Title string
}

Sample ...

type Star

type Star struct {
	Image string
	Link  string
	Name  string   //english name
	Alias []string //other name(katakana,...)
}

Star ...

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL