feedparser

package module
v0.0.0-...-de80f02 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 6, 2015 License: GPL-3.0 Imports: 7 Imported by: 4

README

go-feedparser - Simple RSS and ATOM feed parser.

To install, run
	go get -u github.com/nmeum/go-feedparser

This is a simple feed parser. Currently it supports RSS2 and ATOM web
feeds. It uses "golang.org/x/text/encoding" and thus supports non-UTF8
encoded feeds. The code was originally imported from cpod
<https://github.com/nmeum/cpod> and is licensed under GNU GPLv3 (or
later).

Documentation is missing at the moment and the tests are not completed yet.

Documentation

Overview

Package feedparser implements a simple RSS and ATOM feed parser.

Tho primary function of interest is the Parse function. You can pass an arbitrary Reader to this function and it will return the corresponding feed. The following demonstrates and example use case (reading a feed from a file):

file, err := os.Open("feed.xml");
if err != nil {
	panic(err)
}
defer file.Close()

feed, err := feedparser.Parse(file);
if err != nil {
	panic(err)
}

switch (feed.Type) {
case "rss":
	fmt.Println("RSS feed!")
case "atom":
	fmt.Println("ATOM feed!")
default:
	fmt.Println("Unknown feed format")
}

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AtomCategory

type AtomCategory struct {
	// Identifier for this category (required).
	Term string `xml:"term,attr"`

	// Categorization scheme via a URI (optional).
	Scheme string `xml:"scheme,attr"`

	// Human readable label for display (optional).
	Label string `xml:"label,attr"`
}

AtomCategory identifies the category.

type AtomEntry

type AtomEntry struct {
	// Universally unique feed ID (required).
	ID string `xml:"id"`

	// Human readable title for the entry (required).
	Title AtomText `xml:"title"`

	// Last time the feed was significantly modified (required).
	Updated string `xml:"updated"`

	// Authors of the entry (recommended).
	Authors []AtomPerson `xml:"author"`

	// Content of the entry (recommended).
	Content AtomText `xml:"content"`

	// Links which identify related web pages (recommended).
	Links []AtomLink `xml:"link"`

	// Short summary, abstract or excerpt of the entry (recommended).
	Summary AtomText `xml:"summary"`

	// Categories the entry belongs too (optional).
	Categories []AtomCategory `xml:"category"`

	// Contributors to the entry (optional).
	Contributors []AtomPerson `xml:"contributor"`

	// Time of the initial creation of the entry (optional).
	Published string `xml:"published"`

	// Information about rights, for example copyrights (optional).
	Rights AtomText `xml:"rights"`
}

AtomEntry represents an atom entry.

type AtomFeed

type AtomFeed struct {
	// XMLName.
	XMLName xml.Name `xml:"feed"`

	// Universally unique feed ID (required).
	ID string `xml:"id"`

	// Human readable title for the feed (required).
	Title AtomText `xml:"title"`

	// Last time the feed was significantly modified (required).
	Updated string `xml:"updated"`

	// Entries for the feed (required).
	Entries []AtomEntry `xml:"entry"`

	// Authors of the feed (recommended).
	Authors []AtomPerson `xml:"author"`

	// Links which identify related web pages (recommended).
	Links []AtomLink `xml:"link"`

	// Categories the feed belongs to (optional).
	Categories []AtomCategory `xml:"category"`

	// Contributors to the feed (optional).
	Contributors []AtomPerson `xml:"contributor"`

	// Software used to generate the feed (optional).
	Generator AtomGenerator `xml:"generator"`

	// Small icon used for visual identification (optional).
	Icon string `xml:"icon"`

	Logo string `xml:"logo"`

	// Information about rights, for example copyrights (optional).
	Rights AtomText `xml:"rights"`

	// Human readable description or subtitle (optional).
	Subtitle AtomText `xml:"subtitle"`
}

AtomFeed represents an atom web feed.

type AtomGenerator

type AtomGenerator struct {
	// Generator name (required).
	Name string `xml:",chardata"`

	// URI for this generator (optional).
	URI string `xml:"uri,attr"`

	// Version for this generator (optional).
	Version string `xml:"version,attr"`
}

AtomGenerator identifies the generator.

type AtomLink struct {
	// Hypertext reference (required).
	Href string `xml:"href,attr"`

	// Single Link relation type (optional).
	Rel string `xml:"rel,attr"`

	// Media type of the resource (optional).
	Type string `xml:"type,attr"`

	// Language of referenced resource (optional).
	HrefLang string `xml:"hreflang,attr"`

	// Human readable information about the link (optional).
	Title string `xml:"title,attr"`

	// Length of the resource in bytes (optional).
	Length string `xml:"length,attr"`
}

AtomLink represents the atom link tag.

type AtomPerson

type AtomPerson struct {
	// Human readable name for the person (required).
	Name string `xml:"name"`

	// Home page for the person (optional).
	URI string `xml:"uri"`

	// Email address for the person (optional).
	Email string `xml:"email"`
}

AtomPerson represents a person, corporation, et cetera.

type AtomText

type AtomText struct {
	// Text body (required).
	Body string `xml:",chardata"`

	// InnerXML data (optional).
	InnerXML string `xml:",innerxml"`

	// Text type (optional).
	Type string `xml:"type,attr"`

	// URI where the content can be found (optional for <content>).
	URI string `xml:"uri,att"`
}

AtomText identifies human readable text.

type Feed

type Feed struct {
	// Title for the feed.
	Title string

	// Feed type (either atom or rss).
	Type string

	// URL to the website.
	Link string

	// Description or subtitle for the feed.
	Description string

	// Categories the feed belongs to.
	Categories []string

	// Email address of the feed author.
	Author string

	// Last time the feed was updated.
	Updated time.Time

	// URL to image for the feed.
	Image string

	// Software used to generate the feed.
	Generator string

	// Information about rights, for example copyrights.
	Rights string

	// Feed Items
	Items []Item
}

Feed represents a generic feed.

func Parse

func Parse(r io.Reader) (f Feed, err error)

Parse tries to parse the content of the given reader. It also sorts all items by there publication date. Meaning that the first item is guaranteed to be the most recent one.

type Item

type Item struct {
	// Universally unique item ID.
	ID string

	// Title of the item.
	Title string

	// URL for the item.
	Link string

	// Content of the item.
	Content string

	// Email address of the item author.
	Author string

	// Categories the item belongs to.
	Categories []string

	// Time the item was published.
	PubDate time.Time

	// URL to media attachment.
	Attachment string
}

Item represents a generic feed item.

type RssCategory

type RssCategory struct {
	// Human readable category name (required).
	Name string `xml:",chardata"`

	// Domain that identifies categorization taxonomy (optional).
	Domain string `xml:"domain,attr"`
}

RssCategory represents the rss category tag.

type RssCloud

type RssCloud struct {
	// Domain cloud service is running on (required).
	Domain string `xml:"domain,attr"`

	// Port to use for TCP socket connection (required).
	Port int `xml:"port,attr"`

	// Path to use for the request (required).
	Path string `xml:"path,attr"`

	// Register procedure which should be used (required).
	RegisterProcedure string `xml:"registerProcedure,attr"`

	// Protocol used for registration et cetera (required).
	Protocol string `xml:"protocol,attr"`
}

RssCloud represents the rss cloud tag.

type RssDay

type RssDay struct {
	// Weekday (e.g Monday) (required).
	Day string `xml:"day"`
}

RssDay represents the day tag, a subelement of the skipDays tag.

type RssEnclosure

type RssEnclosure struct {
	// Where the enclosure is located (required).
	URL string `xml:"url,attr"`

	// Size of the enclosure in bytes (required).
	Length string `xml:"length,attr"`

	// MIME type of the enclosure (required).
	Type string `xml:"type,attr"`
}

RssEnclosure represents an rss enclosure.

type RssFeed

type RssFeed struct {
	// XMLName.
	XMLName xml.Name `xml:"rss"`

	// Name of the channel (required).
	Title string `xml:"channel>title"`

	// URL to the website (required).
	Link string `xml:"channel>link"`

	// Description for the channel (required).
	Description string `xml:"channel>description"`

	// Items for the feed (required).
	Items []RssItem `xml:"channel>item"`

	// Language the channel is written in (optional).
	Language string `xml:"channel>language"`

	// Copyright notice for the content (optional).
	Copyright string `xml:"channel>copyright"`

	// Email address of the editor (optional).
	Editor string `xml:"channel>managingEditor"`

	// Email address of the web master (optional).
	WebMaster string `xml:"channel>webMaster"`

	// Publication date for the content (optional).
	PubDate string `xml:"channel>pubDate"`

	// Last time the content was updated (optional).
	LastBuildDate string `xml:"channel>lastBuildDate"`

	// Categories the feed belongs to (optional).
	Categories []RssCategory `xml:"channel>category"`

	// Program used to generate the channel (optional).
	Generator string `xml:"channel>generator"`

	// URL that points to documentation for the used format (optional).
	Docs string `xml:"channel>docs"`

	// Cloud for update notifications (optional).
	Cloud RssCloud `xml:"channel>cloud"`

	// How long the channel can be cached (optional).
	TTL int `xml:"channel>ttl"`

	// Image that can be displayed with the channel (optional).
	Image RssImage `xml:"channel>image"`

	// PICS rating for the channel (optional).
	Rating string `xml:"channel>rating"`

	// Text input box related to the channel (optional).
	TextInput RssTextInput `xml:"channel>textInput"`

	// Hint for aggregators telling them which hours can be skipped (optional).
	SkipHours []RssHour `xml:"channel>skipHours"`

	// Hint for aggregators telling them which days can be skipped (optional).
	SkipDays []RssDay `xml:"channel>skipDays"`
}

RssFeed represents an rss web feed.

type RssHour

type RssHour struct {
	// Number between 0 and 23 representing time in GMT (required).
	Hour int `xml:"hour"`
}

RssHour represents the hour tag, a subelement of the skipHours tag.

type RssImage

type RssImage struct {
	// URL to image that represents the channel (required).
	URL string `xml:"url"`

	// Title which describes the image (required).
	Title string `xml:"title"`

	// URL of the site itself (required).
	Link string `xml:"link"`

	// Width of the image (optional).
	Width int `xml:"width"`

	// Height of the image (optional).
	Height int `xml:"height"`

	// Additional description of the image (optional).
	Description string `xml:"description"`
}

RssImage represents an rss image.

type RssItem

type RssItem struct {
	// Title of the item (required if description isn't present).
	Title string `xml:"title"`

	// The item synopsis (required if title isn't present).
	Description string `xml:"description"`

	// The URL of the item (optional).
	Link string `xml:"link"`

	// Email address of the author of the item (optional).
	Author string `xml:"author"`

	// Includes item in one or more categories (optional).
	Categories []RssCategory `xml:"category"`

	// URL to a page for comments (optional).
	Comments string `xml:"comments"`

	// Media object that is attached to the item (optional).
	Enclosure RssEnclosure `xml:"enclosure"`

	// String that uniquely identifies the item (optional).
	GUID string `xml:"guid"`

	// Time the item was published (optional).
	PubDate string `xml:"pubDate"`

	// The RSS channel the item came from (optional).
	Source RssSource `xml:"source"`
}

RssItem represents an rss item.

type RssSource

type RssSource struct {
	// URL which links to the XMLization source (required).
	URL string `xml:"url,attr"`

	// Source name (required).
	Name string `xml:",chardata"`
}

RssSource represents the rss source tag.

type RssTextInput

type RssTextInput struct {
	// The label of the Submit button in the text input area (required).
	Title string `xml:"title"`

	// Explains the text input area (required).
	Description string `xml:"description"`

	// The name of the text object in the text input area (required).
	Name string `xml:"name"`

	// The URL of the CGI script that processes text input requests (required).
	Link string `xml:"link"`
}

RssTextInput represents the rss textInput tag.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL