core

package module

v0.0.0-...-8843561 Latest Latest Go to latest Published: Oct 27, 2017 License: AGPL-3.0 Imports: 28 Imported by: 17

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/datatogether/core

Links

Open Source Insights

README ¶

Documentation ¶

Overview ¶

Archive holds all common model definitions for archivers 2.0.

TODO - turn "Metadata" into github.com/datatogether/metablocks.Metablock

Index ¶

Variables
func CalcHash(data []byte) (string, error)
func ContentUrlsCount(db sqlutil.Queryable) (count int, err error)
func CountPrimers(db sqlutil.Queryable) (count int64, err error)
func CountSources(db sqlutil.Queryable) (count int, err error)
func FileUrl(url *Url) string
func MetadataCountByKey(db sqlutil.Queryable, keyId string) (count int, err error)
func NormalizeURL(u *url.URL) *url.URL
func NormalizeURLString(url string) (string, error)
func ValidArchivingUrl(db sqlutil.Queryable, url string) error
func WriteSnapshot(store datastore.Datastore, u *Url) error
type Collection
- func CollectionsByCreator(store datastore.Datastore, creator, orderby string, limit, offset int) ([]*Collection, error)
- func ListCollections(store datastore.Datastore, limit, offset int) ([]*Collection, error)
- func (c Collection) DatastoreType() string
- func (c *Collection) Delete(store datastore.Datastore) error
- func (c *Collection) DeleteItems(store datastore.Datastore, items []*CollectionItem) error
- func (c Collection) GetId() string
- func (c *Collection) ItemCount(store datastore.Datastore) (count int, err error)
- func (c Collection) Key() datastore.Key
- func (c *Collection) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (c *Collection) Read(store datastore.Datastore) error
- func (c *Collection) ReadItems(store datastore.Datastore, orderby string, limit, offset int) (items []*CollectionItem, err error)
- func (c *Collection) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (c Collection) SQLQuery(cmd sql_datastore.Cmd) string
- func (c *Collection) Save(store datastore.Datastore) (err error)
- func (c *Collection) SaveItems(store datastore.Datastore, items []*CollectionItem) error
- func (c *Collection) UnmarshalSQL(row sqlutil.Scannable) (err error)
type CollectionItem
- func (c CollectionItem) DatastoreType() string
- func (c *CollectionItem) Delete(store datastore.Datastore) error
- func (c CollectionItem) GetId() string
- func (c CollectionItem) Key() datastore.Key
- func (c *CollectionItem) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (c *CollectionItem) Read(store datastore.Datastore) error
- func (c *CollectionItem) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (c CollectionItem) SQLQuery(cmd sql_datastore.Cmd) string
- func (c *CollectionItem) Save(store datastore.Datastore) (err error)
- func (c *CollectionItem) UnmarshalSQL(row sqlutil.Scannable) (err error)
type Consensus
- func SumConsensus(subject string, blocks []*Metadata) (c Consensus, values map[string]interface{}, err error)
- func (c Consensus) Metadata(data map[string]interface{}) (map[string][]interface{}, error)
type CustomCrawl
- func ListCustomCrawls(store datastore.Datastore, limit, offset int) ([]*CustomCrawl, error)
- func (CustomCrawl) DatastoreType() string
- func (c *CustomCrawl) Delete(store datastore.Datastore) error
- func (c CustomCrawl) GetId() string
- func (u CustomCrawl) Key() datastore.Key
- func (c *CustomCrawl) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (c *CustomCrawl) Read(store datastore.Datastore) error
- func (c *CustomCrawl) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (c *CustomCrawl) SQLQuery(cmd sql_datastore.Cmd) string
- func (c *CustomCrawl) Save(store datastore.Datastore) (err error)
- func (c *CustomCrawl) UnmarshalSQL(row sqlutil.Scannable) (err error)
type DataRepo
- func (d *DataRepo) DatastoreType() string
- func (d *DataRepo) Delete(store datastore.Datastore) error
- func (d *DataRepo) GetId() string
- func (d *DataRepo) Key() datastore.Key
- func (d *DataRepo) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (d *DataRepo) Read(store datastore.Datastore) error
- func (d DataRepo) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (d DataRepo) SQLQuery(cmd sql_datastore.Cmd) string
- func (d *DataRepo) Save(store datastore.Datastore) (err error)
- func (d *DataRepo) UnmarshalSQL(row sqlutil.Scannable) (err error)
type File
- func NewFileFromRes(url string, res *http.Response) (*File, error)
- func (f *File) Delete() error
- func (f *File) Filename() (string, error)
- func (f *File) GetS3() error
- func (f *File) PutS3() error
type Link
- func ReadDstContentLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)
- func ReadDstLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)
- func ReadSrcLinks(db sqlutil.Queryable, dst *Url) ([]*Link, error)
- func (l *Link) DatastoreType() string
- func (l *Link) Delete(store datastore.Datastore) error
- func (l *Link) GetId() string
- func (l *Link) Insert(store datastore.Datastore) error
- func (l *Link) Key() datastore.Key
- func (l *Link) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (l *Link) Read(store datastore.Datastore) (err error)
- func (l *Link) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (l *Link) SQLQuery(cmd sql_datastore.Cmd) string
- func (l *Link) UnmarshalSQL(row sqlutil.Scannable) error
- func (l *Link) Update(store datastore.Datastore) error
type Meta
type Metadata
- func LatestMetadata(db sqlutil.Queryable, keyId, subject string) (m *Metadata, err error)
- func MetadataByKey(db sqlutil.Queryable, keyId string, limit, offset int) ([]*Metadata, error)
- func MetadataBySubject(db sqlutil.Queryable, subject string) ([]*Metadata, error)
- func NextMetadata(db sqlutil.Queryable, keyId, subject string) (*Metadata, error)
- func (m Metadata) DatastoreType() string
- func (m Metadata) GetId() string
- func (m *Metadata) HashMaps() (keyMap map[string]string, valueMap map[string]interface{}, err error)
- func (m *Metadata) HashableBytes() ([]byte, error)
- func (m Metadata) Key() datastore.Key
- func (m Metadata) String() string
- func (m *Metadata) UnmarshalSQL(row sqlutil.Scannable) error
- func (m *Metadata) Write(store datastore.Datastore) error
type Primer
- func BasePrimers(db sqlutil.Queryable, limit, offset int) (primers []*Primer, err error)
- func ListPrimers(store datastore.Datastore, limit, offset int) ([]*Primer, error)
- func UnmarshalBoundedPrimers(rows *sql.Rows, limit int) (primers []*Primer, err error)
- func (p *Primer) CalcStats(db *sql.DB) error
- func (p Primer) DatastoreType() string
- func (p *Primer) Delete(store datastore.Datastore) error
- func (p Primer) GetId() string
- func (p Primer) Key() datastore.Key
- func (p *Primer) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (p *Primer) Read(store datastore.Datastore) error
- func (p *Primer) ReadSources(db sqlutil.Queryable) error
- func (p *Primer) ReadSubPrimers(db sqlutil.Queryable) error
- func (p *Primer) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (p *Primer) SQLQuery(cmd sql_datastore.Cmd) string
- func (p *Primer) Save(store datastore.Datastore) (err error)
- func (p *Primer) UnmarshalSQL(row sqlutil.Scannable) error
type PrimerStats
type Snapshot
- func SnapshotsForUrl(db sqlutil.Queryable, url string) ([]*Snapshot, error)
- func (s *Snapshot) UnmarshalSQL(row sqlutil.Scannable) error
type Source
- func CrawlingSources(db sqlutil.Queryable, limit, offset int) ([]*Source, error)
- func ListSources(store datastore.Datastore, limit, offset int) ([]*Source, error)
- func UnmarshalBoundedSources(rows *sql.Rows, limit int) ([]*Source, error)
- func (c *Source) AsUrl(db *sql.DB) (*Url, error)
- func (s *Source) CalcStats(db *sql.DB) error
- func (s Source) DatastoreType() string
- func (s *Source) Delete(store datastore.Datastore) error
- func (s *Source) DescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func (s Source) GetId() string
- func (s Source) Key() datastore.Key
- func (s *Source) MatchesUrl(rawurl string) bool
- func (s *Source) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (s *Source) Read(store datastore.Datastore) error
- func (s *Source) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (s *Source) SQLQuery(cmd sql_datastore.Cmd) string
- func (s *Source) Save(store datastore.Datastore) (err error)
- func (s *Source) UndescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func (c *Source) UnmarshalSQL(row sqlutil.Scannable) error
type SourceStats
type Uncrawlable
- func ListUncrawlables(store datastore.Datastore, limit, offset int) ([]*Uncrawlable, error)
- func (u Uncrawlable) DatastoreType() string
- func (u *Uncrawlable) Delete(store datastore.Datastore) error
- func (u Uncrawlable) GetId() string
- func (u Uncrawlable) Key() datastore.Key
- func (u *Uncrawlable) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (u *Uncrawlable) Read(store datastore.Datastore) error
- func (u *Uncrawlable) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (u *Uncrawlable) SQLQuery(cmd sql_datastore.Cmd) string
- func (u *Uncrawlable) Save(store datastore.Datastore) (err error)
- func (u *Uncrawlable) UnmarshalSQL(row sqlutil.Scannable) (err error)
type Url
- func ContentUrls(db sqlutil.Queryable, limit, skip int) ([]*Url, error)
- func FetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func ListUrls(store datastore.Datastore, limit, offset int) ([]*Url, error)
- func Search(db sqlutil.Queryable, q string, limit, offset int) ([]*Url, error)
- func UnfetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func UnmarshalBoundedUrls(rows *sql.Rows, limit int) ([]*Url, error)
- func UnmarshalUrls(rows *sql.Rows) ([]*Url, error)
- func UrlsForHash(db sqlutil.Queryable, hash string) ([]*Url, error)
- func (u Url) DatastoreType() string
- func (u *Url) Delete(store datastore.Datastore) error
- func (u *Url) ExtractDocLinks(store datastore.Datastore, doc *goquery.Document) ([]*Link, error)
- func (u *Url) File() (*File, error)
- func (u *Url) Get(store datastore.Datastore) (body []byte, links []*Link, err error)
- func (u Url) GetId() string
- func (u *Url) HandleGetResponse(store datastore.Datastore, res *http.Response) (body []byte, links []*Link, err error)
- func (u *Url) HeadersMap() (headers map[string]string)
- func (u *Url) InboundLinks(db sqlutil.Queryable) ([]string, error)
- func (u Url) Key() datastore.Key
- func (u *Url) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (u *Url) OutboundLinks(db sqlutil.Queryable) ([]string, error)
- func (u *Url) ParsedUrl() (*url.URL, error)
- func (u *Url) Read(store datastore.Datastore) error
- func (u *Url) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (u *Url) SQLQuery(cmd sql_datastore.Cmd) string
- func (u *Url) Save(store datastore.Datastore) (err error)
- func (u *Url) ShouldEnqueueGet() bool
- func (u *Url) ShouldEnqueueHead() bool
- func (u *Url) ShouldPutS3() bool
- func (u *Url) SuspectedContentUrl() bool
- func (u *Url) UnmarshalSQL(row sqlutil.Scannable) (err error)
- func (u *Url) WarcRequest() *warc.Request

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// how long before a url is considered stale. default is 72 hours.
	StaleDuration = time.Hour * 72
	// all these need to be set for file saving to work
	AwsRegion          string
	AwsAccessKeyId     string
	AwsSecretAccessKey string
	AwsS3BucketName    string
	AwsS3BucketPath    string
)

View Source

var (
	ErrNotFound        = fmt.Errorf("Not Found")
	ErrInvalidResponse = fmt.Errorf("Datastore returned an invalid response")
)

Functions ¶

func CalcHash ¶

func CalcHash(data []byte) (string, error)

CalcHash calculates the multihash key for a given slice of bytes TODO - find a proper home for this

func ContentUrlsCount ¶

func ContentUrlsCount(db sqlutil.Queryable) (count int, err error)

func CountPrimers ¶

func CountPrimers(db sqlutil.Queryable) (count int64, err error)

CountPrimers returns the total number of primers

func CountSources ¶

func CountSources(db sqlutil.Queryable) (count int, err error)

CountSources grabs the total number of sources

func FileUrl ¶

func FileUrl(url *Url) string

func MetadataCountByKey ¶

func MetadataCountByKey(db sqlutil.Queryable, keyId string) (count int, err error)

func NormalizeURL ¶

func NormalizeURL(u *url.URL) *url.URL

NormalizeURL removes inconsitencincies from a given url

func NormalizeURLString ¶

func NormalizeURLString(url string) (string, error)

NormalizeURLString removes inconsitencincies from a given url string

func ValidArchivingUrl ¶

func ValidArchivingUrl(db sqlutil.Queryable, url string) error

func WriteSnapshot ¶

func WriteSnapshot(store datastore.Datastore, u *Url) error

WriteSnapshot creates a snapshot record in the DB from a given Url struct

Types ¶

type Collection ¶

type Collection struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// sha256 multihash of the public key that created this collection
	Creator string `json:"creator"`
	// human-readable title of the collection
	Title string `json:"title"`
	// description of the collection
	Description string `json:"description"`
	// url this collection originates from
	Url string `json:"url,omitempty"`
}

Collections are generic groupings of content collections can be thought of as a csv file listing content hashes as the first column, and whatever other information is necessary in subsequent columns

func CollectionsByCreator ¶

func CollectionsByCreator(store datastore.Datastore, creator, orderby string, limit, offset int) ([]*Collection, error)

func ListCollections ¶

func ListCollections(store datastore.Datastore, limit, offset int) ([]*Collection, error)

func (Collection) DatastoreType ¶

func (c Collection) DatastoreType() string

func (*Collection) Delete ¶

func (c *Collection) Delete(store datastore.Datastore) error

Delete a collection, should only do for erronious additions

func (*Collection) DeleteItems ¶

func (c *Collection) DeleteItems(store datastore.Datastore, items []*CollectionItem) error

DeleteItems removes a given list of items from the collection

func (Collection) GetId ¶

func (c Collection) GetId() string

func (*Collection) ItemCount ¶

func (c *Collection) ItemCount(store datastore.Datastore) (count int, err error)

ItemCount gets the number of items in the collection

func (Collection) Key ¶

func (c Collection) Key() datastore.Key

func (*Collection) NewSQLModel ¶

func (c *Collection) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Collection) Read ¶

func (c *Collection) Read(store datastore.Datastore) error

Read collection from db

func (*Collection) ReadItems ¶

func (c *Collection) ReadItems(store datastore.Datastore, orderby string, limit, offset int) (items []*CollectionItem, err error)

ReadItems reads a bounded set of items from the collection the orderby param currently only supports SQL-style input of a single proprty, eg: "index" or "index DESC"

func (*Collection) SQLParams ¶

func (c *Collection) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (Collection) SQLQuery ¶

func (c Collection) SQLQuery(cmd sql_datastore.Cmd) string

func (*Collection) Save ¶

func (c *Collection) Save(store datastore.Datastore) (err error)

Save a collection

func (*Collection) SaveItems ¶

func (c *Collection) SaveItems(store datastore.Datastore, items []*CollectionItem) error

SaveItems saves a slice of items to the collection. It's up to you to ensure that the "index" param doesn't get all messed up. TODO - validate / automate the Index param?

func (*Collection) UnmarshalSQL ¶

func (c *Collection) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the collection receiver it expects the request to have used collectionCols() for selection

type CollectionItem ¶

type CollectionItem struct {
	// Collection Items are Url's at heart
	Url

	// this item's index in the collection
	Index int `json:"index"`
	// unique description of this item
	Description string `json:"description"`
	// contains filtered or unexported fields
}

CollectionItem is an item in a collection. They are urls with added collection-specific information. This has the effect of storing all of the "main properties" of a collection item in the common list of urls

func (CollectionItem) DatastoreType ¶

func (c CollectionItem) DatastoreType() string

DatastoreType is to satisfy sql_datastore.Model interface

func (*CollectionItem) Delete ¶

func (c *CollectionItem) Delete(store datastore.Datastore) error

Delete a collection item

func (CollectionItem) GetId ¶

func (c CollectionItem) GetId() string

GetId returns the Id of the collectionItem, which is the id of the underlying Url

func (CollectionItem) Key ¶

func (c CollectionItem) Key() datastore.Key

Key is somewhat special as CollectionItems always have a Collection as their parent. This relationship is represented in directory-form: /Collection:[collection-id]/CollectionItem:[item-id]

func (*CollectionItem) NewSQLModel ¶

func (c *CollectionItem) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*CollectionItem) Read ¶

func (c *CollectionItem) Read(store datastore.Datastore) error

Read collection from db

func (*CollectionItem) SQLParams ¶

func (c *CollectionItem) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLQuery is to satisfy the sql_datastore.Model interface, it returns this CollectionItem's parameters for a given type of SQL command

func (CollectionItem) SQLQuery ¶

func (c CollectionItem) SQLQuery(cmd sql_datastore.Cmd) string

SQLQuery is to satisfy the sql_datastore.Model interface, it returns the concrete query for a given type of SQL command

func (*CollectionItem) Save ¶

func (c *CollectionItem) Save(store datastore.Datastore) (err error)

Save a collection item to a store

func (*CollectionItem) UnmarshalSQL ¶

func (c *CollectionItem) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the collection receiver it expects the request to have used collectionCols() for selection

type Consensus ¶

type Consensus map[string]map[string]int

Consensus is an enumeration of Meta graph values arranged by key

func SumConsensus ¶

func SumConsensus(subject string, blocks []*Metadata) (c Consensus, values map[string]interface{}, err error)

SumConsensus tallies the consensus around a given subject hash from a provided Metadata slice

func (Consensus) Metadata ¶

func (c Consensus) Metadata(data map[string]interface{}) (map[string][]interface{}, error)

Metadata takes a store and gives back the actual metadata based on a provided stringMap Any key present in the consensus that isn't found in data will write the hash value instead Returned map should be valid for JSON encoding

type CustomCrawl ¶

type CustomCrawl struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// Json Web token that created this request
	Jwt string `json:"jwt"`
	// MorphRunId
	MorphRunId string `json:"morphRunId"`
	// timestamp this run was completed
	DateCompleted time.Time
	// repository for code that ran the crawl
	GithubRepo string `json:"githubRepo"`
	// OriginalUrl
	OriginalUrl string `json:"originalUrl"`
	// SqliteChecksum
	SqliteChecksum string `json:"sqliteChecksum"`
}

CustomCrawls are urls that contain content that cannot be extracted with traditional web crawling / scraping methods. This model classifies the nature of the custom crawl, setting the stage for writing custom scripts to extract the underlying content.

func ListCustomCrawls ¶

func ListCustomCrawls(store datastore.Datastore, limit, offset int) ([]*CustomCrawl, error)

func (CustomCrawl) DatastoreType ¶

func (CustomCrawl) DatastoreType() string

func (*CustomCrawl) Delete ¶

func (c *CustomCrawl) Delete(store datastore.Datastore) error

Delete a custom crawl, should only do for erronious additions

func (CustomCrawl) GetId ¶

func (c CustomCrawl) GetId() string

func (CustomCrawl) Key ¶

func (u CustomCrawl) Key() datastore.Key

func (*CustomCrawl) NewSQLModel ¶

func (c *CustomCrawl) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*CustomCrawl) Read ¶

func (c *CustomCrawl) Read(store datastore.Datastore) error

Read custom crawl from db

func (*CustomCrawl) SQLParams ¶

func (c *CustomCrawl) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLParams formats a custom crawl struct for inserting / updating into postgres

func (*CustomCrawl) SQLQuery ¶

func (c *CustomCrawl) SQLQuery(cmd sql_datastore.Cmd) string

func (*CustomCrawl) Save ¶

func (c *CustomCrawl) Save(store datastore.Datastore) (err error)

Save a custom crawl

func (*CustomCrawl) UnmarshalSQL ¶

func (c *CustomCrawl) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the custom crawl receiver it expects the request to have used custom crawlCols() for selection

type DataRepo ¶

type DataRepo struct {
	// version 4 uuid
	Id string
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// Title of this data repository
	Title string `json:"title"`
	// Human-readable description
	Description string `json:"description"`
	// Main url link to the DataRepository
	Url string `json:"url"`
}

DataRepo is a place that holds data in a structured format

func (*DataRepo) DatastoreType ¶

func (d *DataRepo) DatastoreType() string

func (*DataRepo) Delete ¶

func (d *DataRepo) Delete(store datastore.Datastore) error

Delete a dataRepo, should only do for erronious additions

func (*DataRepo) GetId ¶

func (d *DataRepo) GetId() string

func (*DataRepo) Key ¶

func (d *DataRepo) Key() datastore.Key

func (*DataRepo) NewSQLModel ¶

func (d *DataRepo) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*DataRepo) Read ¶

func (d *DataRepo) Read(store datastore.Datastore) error

Read dataRepo from db

func (DataRepo) SQLParams ¶

func (d DataRepo) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (DataRepo) SQLQuery ¶

func (d DataRepo) SQLQuery(cmd sql_datastore.Cmd) string

func (*DataRepo) Save ¶

func (d *DataRepo) Save(store datastore.Datastore) (err error)

Save a dataRepo

func (*DataRepo) UnmarshalSQL ¶

func (d *DataRepo) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the dataRepo receiver it expects the request to have used dataRepoCols() for selection

type File ¶

type File struct {
	Url  string
	Data []byte
	Hash string
}

File is a buffered byte slice often made from a GET response body. It provides easy hash-calculation & storage to S3 TODO - depricate, use s3-datastore, or, uh... the distributed web

func NewFileFromRes ¶

func NewFileFromRes(url string, res *http.Response) (*File, error)

NewFileFromRes generates a new file by consuming & closing a given response body

func (*File) Delete ¶

func (f *File) Delete() error

Delete a file from S3

func (*File) Filename ¶

func (f *File) Filename() (string, error)

Filename returns the name of the file, which is it's sha2-256 hash

func (*File) GetS3 ¶

func (f *File) GetS3() error

GetS3 reads a given file's hash from S3

func (*File) PutS3 ¶

func (f *File) PutS3() error

PutS3 puts the file on S3 if it doesn't already exist

type Link ¶

type Link struct {
	// Calculated Hash for fixed ID purposes
	Hash string
	// created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// origin url of the linking document
	Src *Url `json:"src"`
	// absolute url of the <a> href property
	Dst *Url `json:"dst"`
}

A link represents an <a> tag in an html document src who's href attribute points to the url that resolves to dst. both src & dst must be stored as urls

func ReadDstContentLinks ¶

func ReadDstContentLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)

ReadDstContentLinks returns a list of links that specify a gien url as src that are content urls

func ReadDstLinks ¶

func ReadDstLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)

ReadDstLinks returns all links that specify a given url as src

func ReadSrcLinks ¶

func ReadSrcLinks(db sqlutil.Queryable, dst *Url) ([]*Link, error)

ReadSrcLinks returns all links that specify a given url as dst

func (*Link) DatastoreType ¶

func (l *Link) DatastoreType() string

func (*Link) Delete ¶

func (l *Link) Delete(store datastore.Datastore) error

func (*Link) GetId ¶

func (l *Link) GetId() string

func (*Link) Insert ¶

func (l *Link) Insert(store datastore.Datastore) error

func (*Link) Key ¶

func (l *Link) Key() datastore.Key

func (*Link) NewSQLModel ¶

func (l *Link) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Link) Read ¶

func (l *Link) Read(store datastore.Datastore) (err error)

func (*Link) SQLParams ¶

func (l *Link) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (*Link) SQLQuery ¶

func (l *Link) SQLQuery(cmd sql_datastore.Cmd) string

func (*Link) UnmarshalSQL ¶

func (l *Link) UnmarshalSQL(row sqlutil.Scannable) error

func (*Link) Update ¶

func (l *Link) Update(store datastore.Datastore) error

type Meta ¶

type Meta struct {
	Url           string            `json:"url"`
	Date          *time.Time        `json:"date,omitempty"`
	HeadersTook   int               `json:"headersTook,omitempty"`
	Id            string            `json:"id"`
	Status        int               `json:"status"`
	ContentSniff  string            `json:"contentSniff,omitempty"`
	RawHeaders    []string          `json:"rawHeaders""`
	Headers       map[string]string `json:"headers"`
	DownloadTook  int               `json:"downloadTook,omitempty"`
	Sha256        string            `json:"sha256"`
	Multihash     string            `json:"multihash"`
	Consensus     *Consensus        `json:"consensus"`
	InboundLinks  []string          `json:"inboundLinks,omitempty"`
	OutboundLinks []string          `json:"outboundLinks,omitempty"`
}

Meta is a struct for sharing our knowledge of a url with other services

type Metadata ¶

type Metadata struct {
	// Hash is the sha256 multihash of all other fields in metadata
	// as expressed by Metadata.HashableBytes()
	Hash string `json:"hash"`
	// Creation timestamp
	Timestamp time.Time `json:"timestamp"`
	// Sha256 multihash of the public key that signed this metadata
	KeyId string `json:"keyId"`
	// Sha256 multihash of the content this metadata is describing
	Subject string `json:"subject"`
	// Hash value of the metadata that came before this, if any
	Prev string `json:"prev"`
	// Acutal metadata, a valid json Object
	Meta map[string]interface{} `json:"meta"`
}

A snapshot is a record of a GET request to a url There can be many metadata of a given url

func LatestMetadata ¶

func LatestMetadata(db sqlutil.Queryable, keyId, subject string) (m *Metadata, err error)

LatestMetadata gives the most recent metadata timestamp for a given keyId & subject combination if one exists

func MetadataByKey ¶

func MetadataByKey(db sqlutil.Queryable, keyId string, limit, offset int) ([]*Metadata, error)

func MetadataBySubject ¶

func MetadataBySubject(db sqlutil.Queryable, subject string) ([]*Metadata, error)

MetadatasBySubject returns all metadata for a given subject hash

func NextMetadata ¶

func NextMetadata(db sqlutil.Queryable, keyId, subject string) (*Metadata, error)

NextMetadata returns the next metadata block for a given subject. If no metablock exists a new one is created

func (Metadata) DatastoreType ¶

func (m Metadata) DatastoreType() string

func (Metadata) GetId ¶

func (m Metadata) GetId() string

func (*Metadata) HashMaps ¶

func (m *Metadata) HashMaps() (keyMap map[string]string, valueMap map[string]interface{}, err error)

TODO - this is ripped from metablocks

func (*Metadata) HashableBytes ¶

func (m *Metadata) HashableBytes() ([]byte, error)

HashableBytes returns the exact structure to be used for hash

func (Metadata) Key ¶

func (m Metadata) Key() datastore.Key

func (Metadata) String ¶

func (m Metadata) String() string

String is metadata's abbreviated string representation

func (*Metadata) UnmarshalSQL ¶

func (m *Metadata) UnmarshalSQL(row sqlutil.Scannable) error

UnmarshalSQL reads an SQL result into the snapshot receiver

func (*Metadata) Write ¶

func (m *Metadata) Write(store datastore.Datastore) error

WriteMetadata creates a snapshot record in the DB from a given Url struct

type Primer ¶

type Primer struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// shortest possible expression of this primer's name, usually an acronym
	// called shortTitle b/c acronyms collide often & users should feel free to
	// expand on acronyms
	ShortTitle string `json:"shortTitle"`
	// human-readable title of this primer.
	Title string `json:"title"`
	// long-form description of this primer.
	// TODO - Maybe we should store this in markdown format?
	Description string `json:"description"`
	// parent primer (if any)
	Parent *Primer `json:"parent"`
	// child-primers list
	SubPrimers []*Primer `json:"subPrimers,omitempty"`
	// metadata to associate with this primer
	Meta map[string]interface{} `json:"meta"`
	// statistics about this primer
	Stats *PrimerStats `json:"stats"`
	// collection of child sources
	Sources []*Source `json:"sources,omitempty"`
}

Primer is tracking information about an abstract group of content. For example a government agency is a primer

func BasePrimers ¶

func BasePrimers(db sqlutil.Queryable, limit, offset int) (primers []*Primer, err error)

BasePrimers lists primers that have no parent

func ListPrimers ¶

func ListPrimers(store datastore.Datastore, limit, offset int) ([]*Primer, error)

ListPrimers

func UnmarshalBoundedPrimers ¶

func UnmarshalBoundedPrimers(rows *sql.Rows, limit int) (primers []*Primer, err error)

UnmarshalBoundedPrimers turns sql.Rows into primers, expecting len(rows) <= limit

func (*Primer) CalcStats ¶

func (p *Primer) CalcStats(db *sql.DB) error

func (Primer) DatastoreType ¶

func (p Primer) DatastoreType() string

func (*Primer) Delete ¶

func (p *Primer) Delete(store datastore.Datastore) error

func (Primer) GetId ¶

func (p Primer) GetId() string

func (Primer) Key ¶

func (p Primer) Key() datastore.Key

func (*Primer) NewSQLModel ¶

func (p *Primer) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Primer) Read ¶

func (p *Primer) Read(store datastore.Datastore) error

func (*Primer) ReadSources ¶

func (p *Primer) ReadSources(db sqlutil.Queryable) error

ReadSources reads child sources of this primer

func (*Primer) ReadSubPrimers ¶

func (p *Primer) ReadSubPrimers(db sqlutil.Queryable) error

ReadSubPrimers reads child primers of this primer

func (*Primer) SQLParams ¶

func (p *Primer) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (*Primer) SQLQuery ¶

func (p *Primer) SQLQuery(cmd sql_datastore.Cmd) string

func (*Primer) Save ¶

func (p *Primer) Save(store datastore.Datastore) (err error)

func (*Primer) UnmarshalSQL ¶

func (p *Primer) UnmarshalSQL(row sqlutil.Scannable) error

type PrimerStats ¶

type PrimerStats struct {
	UrlCount                int `json:"urlCount"`
	ArchivedUrlCount        int `json:"archivedUrlCount"`
	ContentUrlCount         int `json:"contentUrlCount"`
	ContentMetadataCount    int `json:"contentMetadataCount"`
	SourcesUrlCount         int `json:"sourcesUrlCount"`
	SourcesArchivedUrlCount int `json:"sourcesArchivedUrlCount"`
}

TODO - finish

type Snapshot ¶

type Snapshot struct {
	// The url that was requested
	Url string `json:"url"`
	// Time this request was issued
	Created time.Time `json:"date"`
	// Returned Status
	Status int `json:"status,omitempty"`
	// Time to complete response in milliseconds
	Duration int64 `json:"downloadTook,omitempty"`
	// Record of all returned headers in [key,value,key,value...]
	Headers []string `json:"headers,omitempty"`
	// Multihash of response body (if any)
	Hash string `json:"hash,omitempty"`
}

A snapshot is a record of a GET request to a url There can be many snapshots of a given url

func SnapshotsForUrl ¶

func SnapshotsForUrl(db sqlutil.Queryable, url string) ([]*Snapshot, error)

SnapshotsForUrl returns all snapshots for a given url string

func (*Snapshot) UnmarshalSQL ¶

func (s *Snapshot) UnmarshalSQL(row sqlutil.Scannable) error

UnmarshalSQL reads an SQL result into the snapshot receiver

type Source ¶

type Source struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// human-readable title for this source
	Title string `json:"title"`
	// description of the source, ideally one paragraph
	Description string `json:"description"`
	// absolute url to serve as the root of the
	Url string `json:"url"`
	// primer this source is connected to
	Primer *Primer `json:"primer"`
	// weather or not this url should be crawled be a web crawler
	Crawl bool `json:"crawl"`
	// amount of time before a link within this tree is considered in need
	// of re-checking for changes. currently not in use, but planned.
	StaleDuration time.Duration `json:"staleDuration"`
	// yeah this'll probably get depricated. Part of a half-baked alerts feature idea.
	LastAlertSent *time.Time `json:"lastAlertSent"`
	// Metadata associated with this source that should be added to all
	// child urls, currently not in use, but planned
	Meta map[string]interface{} `json:"meta"`
	// Stats about this source
	Stats *SourceStats `json:"stats"`
}

Source is a concreate handle for archiving. Crawlers use source's url as a base of a link tree. Sources are connected to a parent Primer to provide context & organization.

func CrawlingSources ¶

func CrawlingSources(db sqlutil.Queryable, limit, offset int) ([]*Source, error)

CrawlingSources lists sources with crawling = true, paginated

func ListSources ¶

func ListSources(store datastore.Datastore, limit, offset int) ([]*Source, error)

ListSources lists all sources from most to least recent, paginated

func UnmarshalBoundedSources ¶

func UnmarshalBoundedSources(rows *sql.Rows, limit int) ([]*Source, error)

UnmarshalBoundedSources turns a standard sql.Rows of Source results into a *Source slice

func (*Source) AsUrl ¶

func (c *Source) AsUrl(db *sql.DB) (*Url, error)

AsUrl retrieves the url that corresponds for the crawlUrl. If one doesn't exist & the url is saved, a new url is created

func (*Source) CalcStats ¶

func (s *Source) CalcStats(db *sql.DB) error

func (Source) DatastoreType ¶

func (s Source) DatastoreType() string

func (*Source) Delete ¶

func (s *Source) Delete(store datastore.Datastore) error

func (*Source) DescribedContent ¶

func (s *Source) DescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

TODO - this currently doesn't check the status of metadata, gonna need to do that DescribedContent returns a list of content-urls from this subprimer that need work.

func (Source) GetId ¶

func (s Source) GetId() string

func (Source) Key ¶

func (s Source) Key() datastore.Key

func (*Source) MatchesUrl ¶

func (s *Source) MatchesUrl(rawurl string) bool

MatchesUrl checks to see if the url pattern of Source is contained within the passed-in url string TODO - make this more sophisticated, checking against the beginning of the url to avoid things like accidental matches, or urls in query params matching within rawurl

func (*Source) NewSQLModel ¶

func (s *Source) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Source) Read ¶

func (s *Source) Read(store datastore.Datastore) error

func (*Source) SQLParams ¶

func (s *Source) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (*Source) SQLQuery ¶

func (s *Source) SQLQuery(cmd sql_datastore.Cmd) string

func (*Source) Save ¶

func (s *Source) Save(store datastore.Datastore) (err error)

func (*Source) UndescribedContent ¶

func (s *Source) UndescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

TODO - this currently doesn't check the status of metadata, gonna need to do that UndescribedContent returns a list of content-urls from this subprimer that need work.

func (*Source) UnmarshalSQL ¶

func (c *Source) UnmarshalSQL(row sqlutil.Scannable) error

type SourceStats ¶

type SourceStats struct {
	UrlCount             int `json:"urlCount"`
	ArchivedUrlCount     int `json:"archivedUrlCount"`
	ContentUrlCount      int `json:"contentUrlCount"`
	ContentMetadataCount int `json:"contentMetadataCount"`
}

type Uncrawlable ¶

type Uncrawlable struct {
	// version 4 uuid
	Id string `json:"id"`
	// url from urls table, must be unique
	Url string `json:"url"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// sha256 multihash of the public key that created this uncrawlable
	Creator string `json:"creator"`
	// name of person making submission
	Name string `json:"name"`
	// email address of person making submission
	Email string `json:"email"`
	// name of data rescue event where uncrawlable was added
	EventName string `json:"eventName"`
	// agency name
	Agency string `json:"agency"`
	// EDGI agency Id
	AgencyId string `json:"agencyId"`
	// EDGI subagency Id
	SubagencyId string `json:"subagencyId"`
	// EDGI organization Id
	OrgId string `json:"orgId"`
	// EDGI Suborganization Id
	SuborgId string `json:"orgId"`
	// EDGI subprimer Id
	SubprimerId string `json:"subprimerId"`
	// flag for ftp content
	Ftp bool `json:"ftp"`
	// flag for 'database'
	// TODO - refine this?
	Database bool `json:"database"`
	// flag for visualization / interactive content
	// obfuscating data
	Interactive bool `json:"interactive"`
	// flag for a page that links to many files
	ManyFiles bool `json:"manyFiles"`
	// uncrawlable comments
	Comments string `json:"comments"`
}

Uncrawlables are urls that contain content that cannot be extracted with traditional web crawling / scraping methods. This model classifies the nature of the uncrawlable, setting the stage for writing custom scripts to extract the underlying content.

func ListUncrawlables ¶

func ListUncrawlables(store datastore.Datastore, limit, offset int) ([]*Uncrawlable, error)

func (Uncrawlable) DatastoreType ¶

func (u Uncrawlable) DatastoreType() string

func (*Uncrawlable) Delete ¶

func (u *Uncrawlable) Delete(store datastore.Datastore) error

Delete a uncrawlable, should only do for erronious additions

func (Uncrawlable) GetId ¶

func (u Uncrawlable) GetId() string

func (Uncrawlable) Key ¶

func (u Uncrawlable) Key() datastore.Key

func (*Uncrawlable) NewSQLModel ¶

func (u *Uncrawlable) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Uncrawlable) Read ¶

func (u *Uncrawlable) Read(store datastore.Datastore) error

Read uncrawlable from db

func (*Uncrawlable) SQLParams ¶

func (u *Uncrawlable) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLParams formats a uncrawlable struct for inserting / updating into postgres

func (*Uncrawlable) SQLQuery ¶

func (u *Uncrawlable) SQLQuery(cmd sql_datastore.Cmd) string

func (*Uncrawlable) Save ¶

func (u *Uncrawlable) Save(store datastore.Datastore) (err error)

Save a uncrawlable

func (*Uncrawlable) UnmarshalSQL ¶

func (u *Uncrawlable) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the uncrawlable receiver it expects the request to have used uncrawlableCols() for selection

type Url ¶

type Url struct {
	// version 4 uuid
	// urls can/should/must also be be uniquely identified by Url
	Id string `json:"id,omitempty"`
	// A Url is uniquely identified by URI string without
	// any normalization. Url strings must always be absolute.
	Url string `json:"url"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created,omitempty"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated,omitempty"`

	// Timestamp for most recent GET request
	LastGet *time.Time `json:"lastGet,omitempty"`
	// Timestamp for most revent HEAD request
	LastHead *time.Time `json:"lastHead,omitempty"`

	// Returned HTTP status code
	Status int `json:"status,omitempty"`
	// Returned HTTP 'Content-Type' header
	ContentType string `json:"contentType,omitempty"`
	// Result of mime sniffing to GET response body, as detailed at https://mimesniff.spec.whatwg.org
	ContentSniff string `json:"contentSniff,omitempty"`
	// ContentLength in bytes, will be the header value if only a HEAD request has been issued
	// After a valid GET response, it will be set to the length of the returned response
	ContentLength int64 `json:"contentLength,omitempty"`

	// best guess at a filename based on url string analysis
	// if you just want to know what type of file this is, this is the field to use.
	FileName string `json:"fileName,omitempty"`

	// HTML Title tag attribute
	Title string `json:"title,omitempty"`

	// Time remote server took to transfer content in miliseconds.
	// TODO - currently not implemented
	DownloadTook int `json:"downloadTook,omitempty"`
	// Time taken to  in miliseconds. currently not implemented
	HeadersTook int `json:"headersTook,omitempty"`

	// key-value slice of returned headers from most recent HEAD or GET request
	// stored in the form [key,value,key,value...]
	Headers []string `json:"headers,omitempty"`
	// any associative metadata
	Meta map[string]interface{} `json:"meta,omitempty"`

	// Hash is a multihash sha-256 of res.Body
	Hash string `json:"hash,omitempty"`

	// Url to saved content
	ContentUrl string `json:"contentUrl,omitempty"`

	// Uncrawlable information
	Uncrawlable *Uncrawlable `json:"uncrawlable,omitempty"`
}

URL represents... a url. TODO - consider renaming to Resource

func ContentUrls ¶

func ContentUrls(db sqlutil.Queryable, limit, skip int) ([]*Url, error)

func FetchedUrls ¶

func FetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

func ListUrls ¶

func ListUrls(store datastore.Datastore, limit, offset int) ([]*Url, error)

func Search ¶

func Search(db sqlutil.Queryable, q string, limit, offset int) ([]*Url, error)

func UnfetchedUrls ¶

func UnfetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

func UnmarshalBoundedUrls ¶

func UnmarshalBoundedUrls(rows *sql.Rows, limit int) ([]*Url, error)

func UnmarshalUrls ¶

func UnmarshalUrls(rows *sql.Rows) ([]*Url, error)

UnmarshalUrls takes an sql cursor & returns a slice of url pointers expects columns to math urlCols()

func UrlsForHash ¶

func UrlsForHash(db sqlutil.Queryable, hash string) ([]*Url, error)

func (Url) DatastoreType ¶

func (u Url) DatastoreType() string

func (*Url) Delete ¶

func (u *Url) Delete(store datastore.Datastore) error

Delete a url, should only do for erronious additions

func (*Url) ExtractDocLinks ¶

func (u *Url) ExtractDocLinks(store datastore.Datastore, doc *goquery.Document) ([]*Link, error)

ExtractDocLinks extracts & stores a page's linked documents by selecting all a[href] links from a given qoquery document, using the receiver *Url as the base

func (*Url) File ¶

func (u *Url) File() (*File, error)

File leverages a url's hash to generate a file that can have it's bytes read back

func (*Url) Get ¶

func (u *Url) Get(store datastore.Datastore) (body []byte, links []*Link, err error)

Issue a GET request to this URL if it's eligible for one

func (Url) GetId ¶

func (u Url) GetId() string

func (*Url) HandleGetResponse ¶

func (u *Url) HandleGetResponse(store datastore.Datastore, res *http.Response) (body []byte, links []*Link, err error)

HandleGetResponse performs all necessary actions in response to a GET request, regardless of weather it came from a crawl or archive request

func (*Url) HeadersMap ¶

func (u *Url) HeadersMap() (headers map[string]string)

HeadersMap formats u.Headers (a string slice) as a map[header]value

func (*Url) InboundLinks ¶

func (u *Url) InboundLinks(db sqlutil.Queryable) ([]string, error)

InboundLinks returns a slice of url strings that link to this url

func (Url) Key ¶

func (u Url) Key() datastore.Key

func (*Url) NewSQLModel ¶

func (u *Url) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Url) OutboundLinks ¶

func (u *Url) OutboundLinks(db sqlutil.Queryable) ([]string, error)

Outbound returns a slice of url strings that this url links to

func (*Url) ParsedUrl ¶

func (u *Url) ParsedUrl() (*url.URL, error)

ParsedUrl is a convenience wrapper around url.Parse

func (*Url) Read ¶

func (u *Url) Read(store datastore.Datastore) error

Read url from db

func (*Url) SQLParams ¶

func (u *Url) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLArgs formats a url struct for inserting / updating into postgres

func (*Url) SQLQuery ¶

func (u *Url) SQLQuery(cmd sql_datastore.Cmd) string

func (*Url) Save ¶

func (u *Url) Save(store datastore.Datastore) (err error)

func (*Url) ShouldEnqueueGet ¶

func (u *Url) ShouldEnqueueGet() bool

ShouldEnqueueGet returns weather the url can be added to the que for a GET request. keep in mind only urls who's domain are are marked crawl : true in the domains list will be candidates for GET requests. It should return true if: * the url is of http / https scheme * has never been GET'd or hasn't been GET'd for a period longer than the stale duration

func (*Url) ShouldEnqueueHead ¶

func (u *Url) ShouldEnqueueHead() bool

ShouldEnqueueHead returns weather the url can be added to the que for a HEAD request. It should return true if: * the url is of http / https scheme * has never been GET'd or hasn't been GET'd for a period longer than the stale duration

func (*Url) ShouldPutS3 ¶

func (u *Url) ShouldPutS3() bool

ShouldPutS3 is a chance to override weather the content should be stored

func (*Url) SuspectedContentUrl ¶

func (u *Url) SuspectedContentUrl() bool

SuspectedContentUrl examines the url string, returns true if there's a reasonable chance the url leads to content

func (*Url) UnmarshalSQL ¶

func (u *Url) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the url receiver it expects the request to have used urlCols() for selection

func (*Url) WarcRequest ¶

func (u *Url) WarcRequest() *warc.Request

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL