twitterscraper

package module
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 17, 2023 License: MIT Imports: 17 Imported by: 0

README

Twitter Scraper

Go Reference

Twitter's API is annoying to work with, and has lots of limitations — luckily their frontend (JavaScript) has it's own API, which I reverse-engineered. No API rate limits. No tokens needed. No restrictions. Extremely fast.

You can use this library to get the text of any user's Tweets trivially.

Installation

go get -u github.com/Watqt/twitter-scraper

Usage

Get user tweets
package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/Watqt/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()

    for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
        if tweet.Error != nil {
            panic(tweet.Error)
        }
        fmt.Println(tweet.Text)
    }
}

It appears you can ask for up to 50 tweets (limit ~3200 tweets).

Get single tweet
package main

import (
    "fmt"

    twitterscraper "github.com/Watqt/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    tweet, err := scraper.GetTweet("1328684389388185600")
    if err != nil {
        panic(err)
    }
    fmt.Println(tweet.Text)
}
Search tweets by query standard operators

Tweets containing “twitter” and “scraper” and “data“, filtering out retweets:

package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/Watqt/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    for tweet := range scraper.SearchTweets(context.Background(),
        "twitter scraper data -filter:retweets", 50) {
        if tweet.Error != nil {
            panic(tweet.Error)
        }
        fmt.Println(tweet.Text)
    }
}

The search ends if we have 50 tweets.

See Rules and filtering for build standard queries.

Set search mode
scraper.SetSearchMode(twitterscraper.SearchLatest)

Options:

  • twitterscraper.SearchTop - default mode
  • twitterscraper.SearchLatest - live mode
  • twitterscraper.SearchPhotos - image mode
  • twitterscraper.SearchVideos - video mode
  • twitterscraper.SearchUsers - user mode
Get profile
package main

import (
    "fmt"
    twitterscraper "github.com/Watqt/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    profile, err := scraper.GetProfile("Twitter")
    if err != nil {
        panic(err)
    }
    fmt.Printf("%+v\n", profile)
}
Search profiles by query
package main

import (
    "context"
    "fmt"
    twitterscraper "github.com/Watqt/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
    for profile := range scraper.SearchProfiles(context.Background(), "Twitter", 50) {
        if profile.Error != nil {
            panic(profile.Error)
        }
        fmt.Println(profile.Name)
    }
}
package main

import (
    "fmt"
    twitterscraper "github.com/Watqt/twitter-scraper"
)

func main() {
    scraper := twitterscraper.New()
    trends, err := scraper.GetTrends()
    if err != nil {
        panic(err)
    }
    fmt.Println(trends)
}
Use authentication

Some specified user tweets are protected that you must login and follow. It is also required to search.

err := scraper.Login("username", "password")

Use username to login, not email! But if you have email confirmation, use email address in addition:

err := scraper.Login("username", "password", "email")

Status of login can be checked with:

scraper.IsLoggedIn()

Logout (clear session):

scraper.Logout()

If you want save session between restarts, you can save cookies with scraper.GetCookies() and restore with scraper.SetCookies().

For example, save cookies:

cookies := scraper.GetCookies()
// serialize to JSON
js, _ := json.Marshal(cookies)
// save to file
f, _ = os.Create("cookies.json")
f.Write(js)

and load cookies:

f, _ := os.Open("cookies.json")
// deserialize from JSON
var cookies []*http.Cookie
json.NewDecoder(f).Decode(&cookies)
// load cookies
scraper.SetCookies(cookies)
// check login status
scraper.IsLoggedIn()
Use Proxy

Support HTTP(s) and SOCKS5 proxy

with HTTP
err := scraper.SetProxy("http://localhost:3128")
if err != nil {
    panic(err)
}
with SOCKS5
err := scraper.SetProxy("socks5://localhost:1080")
if err != nil {
    panic(err)
}
Delay requests

Add delay between API requests (in seconds)

scraper.WithDelay(5)
Load timeline with tweet replies
scraper.WithReplies(true)

Documentation

Index

Constants

View Source
const DefaultClientTimeout = 10 * time.Second

default http client timeout

Variables

This section is empty.

Functions

This section is empty.

Types

type Location added in v0.1.2

type Location struct {
	Name  string  `json:"name"`
	Woeid float64 `json:"woeid"`
}

type Mention

type Mention struct {
	ID       string
	Username string
	Name     string
}

Mention type.

type Photo

type Photo struct {
	ID  string
	URL string
}

Photo type.

type Place

type Place struct {
	ID          string `json:"id"`
	PlaceType   string `json:"place_type"`
	Name        string `json:"name"`
	FullName    string `json:"full_name"`
	CountryCode string `json:"country_code"`
	Country     string `json:"country"`
	BoundingBox struct {
		Type        string        `json:"type"`
		Coordinates [][][]float64 `json:"coordinates"`
	} `json:"bounding_box"`
}

type Profile

type Profile struct {
	Avatar         string
	Banner         string
	Biography      string
	Birthday       string
	FollowersCount int
	FollowingCount int
	FriendsCount   int
	IsPrivate      bool
	IsVerified     bool
	Joined         *time.Time
	LikesCount     int
	ListedCount    int
	Location       string
	Name           string
	PinnedTweetIDs []string
	TweetsCount    int
	URL            string
	UserID         string
	Username       string
	Website        string
}

Profile of twitter user.

type ProfileResult

type ProfileResult struct {
	Profile
	Error error
}

ProfileResult of scrapping.

type Scraper

type Scraper struct {
	// contains filtered or unexported fields
}

Scraper object

func New

func New() *Scraper

New creates a Scraper object

func (*Scraper) FetchSearchProfiles

func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error)

FetchSearchProfiles gets users for a given search query, via the Twitter frontend API

func (*Scraper) FetchSearchTweets

func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API

func (*Scraper) FetchTweets

func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchTweets gets tweets for a given user, via the Twitter frontend API.

func (*Scraper) FetchTweetsByUserID added in v0.1.3

func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)

FetchTweetsByUserID gets tweets for a given userID, via the Twitter frontend API.

func (*Scraper) GetCookies

func (s *Scraper) GetCookies() []*http.Cookie

func (*Scraper) GetFlowToken added in v0.1.3

func (s *Scraper) GetFlowToken() string

func (*Scraper) GetGuestToken

func (s *Scraper) GetGuestToken() error

GetGuestToken from Twitter API

func (*Scraper) GetGuestTokenSession added in v0.1.3

func (s *Scraper) GetGuestTokenSession() string

func (*Scraper) GetProfile

func (s *Scraper) GetProfile(username string) (Profile, error)

GetProfile return parsed user profile.

func (*Scraper) GetTrends

func (s *Scraper) GetTrends(WOEID string) ([]Trend, error)

GetTrends return list of trends.

func (*Scraper) GetTweet

func (s *Scraper) GetTweet(id string) (*Tweet, error)

GetTweet get a single tweet by ID.

func (*Scraper) GetTweets

func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult

GetTweets returns channel with tweets for a given user.

func (*Scraper) GetUserIDByScreenName

func (s *Scraper) GetUserIDByScreenName(screenName string) (string, error)

GetUserIDByScreenName from API

func (*Scraper) IsGuestToken

func (s *Scraper) IsGuestToken() bool

IsGuestToken check if guest token not empty

func (*Scraper) IsLoggedIn

func (s *Scraper) IsLoggedIn() bool

IsLoggedIn check if scraper logged in

func (*Scraper) Login

func (s *Scraper) Login(credentials ...string) error

Login to Twitter Use Login(username, password) for ordinary login or Login(username, password, email) for login if you have email confirmation

func (*Scraper) Logout

func (s *Scraper) Logout() error

Logout is reset session

func (*Scraper) RequestAPI

func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error

RequestAPI get JSON from frontend API and decodes it

func (*Scraper) SearchProfiles

func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult

SearchProfiles returns channel with profiles for a given search query

func (*Scraper) SearchTweets

func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult

SearchTweets returns channel with tweets for a given search query

func (*Scraper) SetCookies

func (s *Scraper) SetCookies(cookies []*http.Cookie)

func (*Scraper) SetGuestTokenSession added in v0.1.3

func (s *Scraper) SetGuestTokenSession(token string)

func (*Scraper) SetIsLogged added in v0.1.3

func (s *Scraper) SetIsLogged(isLogged bool)

func (*Scraper) SetProxy

func (s *Scraper) SetProxy(proxyAddr string) error

SetProxy set http proxy in the format `http://HOST:PORT` set socket proxy in the format `socks5://HOST:PORT`

func (*Scraper) SetSearchMode

func (s *Scraper) SetSearchMode(mode SearchMode) *Scraper

SetSearchMode switcher

func (*Scraper) WithClientTimeout

func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper

client timeout

func (*Scraper) WithDelay

func (s *Scraper) WithDelay(seconds int64) *Scraper

WithDelay add delay between API requests (in seconds)

func (*Scraper) WithReplies

func (s *Scraper) WithReplies(b bool) *Scraper

WithReplies enable/disable load timeline with tweet replies

type SearchMode

type SearchMode int

SearchMode type

const (
	// SearchTop - default mode
	SearchTop SearchMode = iota
	// SearchLatest - live mode
	SearchLatest
	// SearchPhotos - image mode
	SearchPhotos
	// SearchVideos - video mode
	SearchVideos
	// SearchUsers - user mode
	SearchUsers
)

type Trend added in v0.1.1

type Trend struct {
	Name            string      `json:"name"`
	PromotedContent interface{} `json:"promoted_content"`
	Query           string      `json:"query"`
	TweetVolume     interface{} `json:"tweet_volume"`
	URL             string      `json:"url"`
}

type TrendData added in v0.1.2

type TrendData struct {
	AsOf      string     `json:"as_of"`
	CreatedAt string     `json:"created_at"`
	Locations []Location `json:"locations"`
	Trends    []Trend    `json:"trends"`
}

type Tweet

type Tweet struct {
	Hashtags         []string
	HTML             string
	ID               string
	InReplyToStatus  *Tweet
	IsQuoted         bool
	IsPin            bool
	IsReply          bool
	IsRetweet        bool
	Likes            int
	Name             string
	Mentions         []Mention
	PermanentURL     string
	Photos           []Photo
	Place            *Place
	QuotedStatus     *Tweet
	Replies          int
	Retweets         int
	RetweetedStatus  *Tweet
	Text             string
	TimeParsed       time.Time
	Timestamp        int64
	URLs             []string
	UserID           string
	Username         string
	Videos           []Video
	SensitiveContent bool
}

Tweet type.

type TweetResult

type TweetResult struct {
	Tweet
	Error error
}

TweetResult of scrapping.

type Video

type Video struct {
	ID      string
	Preview string
	URL     string
}

Video type.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL