gdelt

package module
v0.0.0-...-bfc50b6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 8, 2024 License: Apache-2.0 Imports: 14 Imported by: 0

README

GDELT Fetcher

Overview

This package provides tools for fetching and parsing the latest GDELT events data in Go.

Installation

To use this package, import it into your Go project:

go get -u github.com/nlpodyssey/gdelt

Example

package main

import (
	"fmt"
	"time"

	"github.com/nlpodyssey/gdelt"
	"github.com/rs/zerolog/log"
)

func main() {
	log.Info().Msg("getting latest GDELT events")

	events, err := gdelt.FetchLatestEvents(gdelt.DefaultOpts)
	if err != nil {
		log.Fatal().Err(err).Msg("error fetching latest events")
	}

	log.Info().Msgf("processing %d events", len(events))

	for _, event := range events {
		doc := struct {
			EventID     uint64
			URI         string
			Headline    string
			ImageURI    string
			PublishedAt time.Time
		}{
			EventID:     event.GlobalEventID,
			URI:         event.SourceURL,
			Headline:    event.GKGArticle.Extras.PageTitle,
			ImageURI:    event.GKGArticle.SharingImage,
			PublishedAt: event.PublishedAt(),
		}

		fmt.Printf("%+v\n", doc)
	}
}

Contributions

Contributions to this package are welcome.

License

This project is licensed under the Apache License, Version 2.0.

Documentation

Index

Constants

View Source
const (
	// LastUpdateURL provides the last 15 Minutes CSV Data File List – English.
	// (Updated every 15 minutes).
	LastUpdateURL = "http://data.gdeltproject.org/gdeltv2/lastupdate.txt"

	// LastUpdateTranslationURL provides the last 15 Minutes CSV Data File
	// List – GDELT Translingual. (Updated every 15 minutes).
	LastUpdateTranslationURL = "http://data.gdeltproject.org/gdeltv2/lastupdate-translation.txt"
)

Variables

View Source
var DefaultOpts = Opts{
	AllowedCameoRootCodes: []string{"13", "14", "15", "17", "18", "19", "20"},
	SkipDuplicates:        true,
	SkipFutureEvents:      true,
	Translingual:          false,
	MaxTitleLength:        150,
}
View Source
var FIPS104ToISO31661 = map[string]string{}/* 258 elements not displayed */

FIPS104ToISO31661 maps FIPS 10-4 country codes to ISO 3166-1 alpha2 codes

Functions

func IsBadStatusCodeError

func IsBadStatusCodeError(err error) bool

Types

type ActorData

type ActorData struct {
	Code           string
	Name           string
	CountryCode    string
	KnownGroupCode string
	EthnicCode     string
	Religion1Code  string
	Religion2Code  string
	Type1Code      string
	Type2Code      string
	Type3Code      string
}

type Article

type Article struct {
	ID                 string
	DocumentIdentifier string
	SharingImage       string
	Extras             ArticleExtras
}

type ArticleExtras

type ArticleExtras struct {
	PageTitle string `xml:"PAGE_TITLE"`
}

type BadStatusCodeError

type BadStatusCodeError struct {
	StatusCode int
}

BadStatusCodeError indicates an unexpected HTTP response status code. It provides minimal information. It can be wrapped and recognized using IsBadStatusCodeError.

func NewBadStatusCodeError

func NewBadStatusCodeError(statusCode int) BadStatusCodeError

func (BadStatusCodeError) Error

func (err BadStatusCodeError) Error() string

type Event

type Event struct {
	// GlobalEventID is the globally unique identifier assigned to each event
	// record that uniquely identifies it in GDELT master dataset.
	GlobalEventID uint64
	Day           int
	MonthYear     int
	Year          int
	FractionDate  float64

	Actor1 ActorData
	Actor2 ActorData

	IsRootEvent int
	// EventCode is the raw CAMEO action code describing the action that Actor1
	// performed upon Actor2.
	EventCode string
	// EventBaseCode is the level two leaf root node category, when applicable.
	// CAMEO event codes are defined in a three-level taxonomy. For events at
	// level three in the taxonomy, this yields its level two leaf root node.
	// For example, code "0251" ("Appeal for easing of administrative
	// sanctions") would yield an EventBaseCode of "025" ("Appeal to yield").
	// This makes it possible to aggregate events at various resolutions of
	// specificity. For events at levels two or one, this field will be set
	// to EventCode.
	EventBaseCode string
	// EventRootCode is similar to EventBaseCode and defines the root-level
	// category the event code falls under. For example, code "0251" ("Appeal
	// for easing of administrative sanctions") has a root code of "02"
	// ("Appeal"). This makes it possible to aggregate events at various
	// resolutions of specificity. For events at levels two or one, this field
	// will be set to EventCode.
	EventRootCode  string
	QuadClass      int
	GoldsteinScale NullableFloat64
	NumMentions    int
	NumSources     int
	NumArticles    int
	AvgTone        float64

	Actor1Geo GeoData
	Actor2Geo GeoData
	// ActionGeo captures the location information closest to the point in the
	// event description that contains the actual statement of action and is
	// the best location to use for placing events on a map or in other spatial
	// context.
	ActionGeo GeoData

	// DateAdded stores the date the event was added to the master database in
	// "YYYYMMDDHHMMSS" format in the UTC timezone.
	DateAdded uint64
	// SourceURL records the URL or citation of the first news report it found
	// this event in. In most cases this is the first report it saw the article
	// in, but due to the timing and flow of news reports through the processing
	// pipeline, this may not always be the very first report, but is at least
	// in the first few reports.
	SourceURL string

	GKGArticle *Article
}

func FetchLatestEvents

func FetchLatestEvents(opts Opts) (_ []*Event, err error)

FetchLatestEvents returns the latest GDELT events.

func (*Event) AllCameoEventCodes

func (e *Event) AllCameoEventCodes() []string

AllCameoEventCodes returns one or more CAMEO event codes from EventCode, EventBaseCode, and EventRootCode, keeping only one unique category code per level.

func (*Event) DateAddedTime

func (e *Event) DateAddedTime() (time.Time, error)

DateAddedTime converts DateAdded int value to time.Time.

func (*Event) PublishedAt

func (e *Event) PublishedAt() time.Time

PublishedAt returns the time the event was published. It is an alias for DateAddedTime without error.

type GeoData

type GeoData struct {
	// Type specifies the geographic resolution of the match type.
	Type GeoType
	// Fullname is the full human-readable name of the matched location. In
	// the case of a country it is simply the country name. For US and World
	// states it is in the format of "State, Country Name", while for all other
	// matches it is in the format of "City/Landmark, State, Country".
	// This can be used to label locations when placing events on a map.
	Fullname string
	// CountryCode is the 2-character FIPS10-4 country code for the location.
	CountryCode string
	ADM1Code    string
	ADM2Code    string
	// Lat is the centroid latitude of the landmark for mapping.
	Lat NullableFloat64
	// Long is the centroid longitude of the landmark for mapping.
	Long      NullableFloat64
	FeatureID string
}

func (*GeoData) CountryCodeISO31661

func (g *GeoData) CountryCodeISO31661() (string, error)

type GeoType

type GeoType uint8

GeoType specifies the geographic resolution of the match type.

const (
	NoGeoType GeoType = iota
	Country
	USState
	USCity
	WorldCity
	WorldState
)

func GeoTypeFromInt

func GeoTypeFromInt(value int) (GeoType, bool)

func (GeoType) String

func (g GeoType) String() string

type NullableFloat64

type NullableFloat64 struct {
	Float64 float64
	// Valid is true if Float64 is not NULL
	Valid bool
}

NullableFloat64 represents a float64 value that may be null.

func ParseNullableFloat64

func ParseNullableFloat64(value string) (NullableFloat64, error)

ParseNullableFloat64 parses a string value, converting it to NullableFloat64.

type Opts

type Opts struct {
	SkipDuplicates        bool
	SkipFutureEvents      bool
	MaxTitleLength        int
	Translingual          bool
	AllowedCameoRootCodes []string
}

Opts contains options for FetchLatestEvents.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL