ia

package
v0.0.0-...-3e27f85 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 2, 2021 License: MPL-2.0 Imports: 16 Imported by: 2

Documentation

Overview

Package ia contains utilities for working with files from the Internet Archive.

Index

Constants

View Source
const TimestampFormat = "20060102150405"

Variables

This section is empty.

Functions

func DecodeDigest

func DecodeDigest(digest string) (*[20]byte, error)

DecodeDigest decodes a base32-encoded SHA-1 digest.

func GetTimemap

func GetTimemap(pageURL string, options *TimemapOptions) ([][]string, error)

GetTimemap gets a list of Internet Archive captures of the given URL.

func NewReadValidator

func NewReadValidator(r io.Reader, name string, md5Sum, sha1Sum, crc32Sum []byte) io.Reader

func PageURL

func PageURL(url, timestamp string) string

func Save

func Save(pageURL string, options *SaveOptions) error

func Validate

func Validate(dir string) error

func ValidateFile

func ValidateFile(filename string, md5Sum, sha1Sum, crc32Sum []byte) error

Types

type FileMeta

type FileMeta struct {
	Name     string          `xml:"name,attr"`   // filename, relative to root
	Source   string          `xml:"source,attr"` // "original", "metadata", or "derivative"
	Format   string          `xml:"format"`      // e.g. "Text", "Metadata", "Unknown"
	Original string          `xml:"original"`
	BTIH     jsonutil.Hex    `xml:"btih"` // BitTorrent info-hash
	ModTime  timefmt.UnixSec `xml:"mtime"`
	Size     int64           `xml:"size"`
	MD5      jsonutil.Hex    `xml:"md5"`
	CRC32    jsonutil.Hex    `xml:"crc32"`
	SHA1     jsonutil.Hex    `xml:"sha1"`
	Length   float64         `xml:"length"` // audio duration
	Height   int             `xml:"height"` // image height
	Width    int             `xml:"width"`  // image width
	Private  bool            `xml:"private"`
}

FileMeta contains file metadata listed in the *_files.xml file in the root of an item. This file is excluded for torrent downloads.

func ReadFileMeta

func ReadFileMeta(dir string) ([]FileMeta, error)

func (*FileMeta) OpenValidator

func (fm *FileMeta) OpenValidator(dir string) (io.ReadCloser, error)

func (*FileMeta) Validator

func (fm *FileMeta) Validator(r io.Reader) io.Reader

type ItemMeta

type ItemMeta struct {
	Identifier     string   `xml:"identifier"`
	Collections    []string `xml:"collection"`
	Description    string   `xml:"description"`
	Mediatype      string   `xml:"mediatype"` // e.g. "software"
	Subject        string   `xml:"subject"`
	Title          string   `xml:"title"`
	Uploader       string   `xml:"uploader"`
	Publicdate     string   `xml:"publicdate"` // "2006-01-02 15:04:05" format
	Addeddate      string   `xml:"addeddate"`  // "2006-01-02 15:04:05" format
	Curation       string   `xml:"curation"`
	BackupLocation string   `xml:"backup_location"` // removed from meta in April 2020
}

ItemMeta contains item metadata in the *_meta.xml file in the root of an item.

func ReadItemMeta

func ReadItemMeta(dir string) (*ItemMeta, error)

type SaveOptions

type SaveOptions struct {
	CaptureOutlinks    bool
	CaptureAll         bool // save error pages (HTTP status 400-599)
	CaptureScreenshot  bool
	SaveInMyWebArchive bool
	EmailResult        bool
}

type TimemapOptions

type TimemapOptions struct {
	MatchPrefix bool     // whether url is a prefix (* wildcard is appended)
	Collapse    string   // field to collapse by; earliest captures with unique field is kept
	Fields      []string // e.g. urlkey,timestamp,endtimestamp,original,mimetype,statuscode,digest,redirect,robotflags,length,offset,filename,groupcount,uniqcount
	Limit       int      // e.g. 100000
}

TimemapOptions contains options for a timemap API call.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL