fullfeed

package module
v0.0.0-...-2baeb36 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 21, 2023 License: GPL-3.0 Imports: 20 Imported by: 1

README

fullfeed

Go Reference

Convert partial feed to full-text feed with golang

Usage

import "github.com/n0madic/fullfeed"

feed, errors := fullfeed.GetFullFeed(fullfeed.Config{
    Method:        fullfeed.QueryMethod,
    MethodRequest: ".article",
    URL:           "https://blog.golang.org/feed.atom",
})

if len(errors) > 0 {
    fmt.Println(errors)
    return
}

fmt.Println(feed.ToRss())

Documentation

Overview

Package fullfeed converts partial feed to full-text feed

Index

Constants

This section is empty.

Variables

View Source
var UserAgent string = "fullfeed/1.0"

UserAgent header

Functions

func ContentCacheLength

func ContentCacheLength() int

ContentCacheLength returns the number of cached entries

func GetFullContent

func GetFullContent(config Config, link string) (fullContent string, err error)

GetFullContent for the specified link

func GetFullFeed

func GetFullFeed(config Config) (feed *feeds.Feed, errors []error)

GetFullFeed with full text content

func GetURL

func GetURL(url string) (io.Reader, error)

GetURL return content from internet or cache

func InitContentCache

func InitContentCache(n int) (err error)

InitContentCache setup optional download cache

func LoadSourceFeed

func LoadSourceFeed(config Config) (feed *feeds.Feed, err error)

LoadSourceFeed return source feed

Types

type Config

type Config struct {
	// Base URL for all relative URLs
	// Must be specified if different from the feed domain
	BaseHref string `json:"base_href" yaml:"base_href"`

	// Feed description
	Description string `json:"description" yaml:"description"`

	// Feed cleaning filters
	Filters struct {
		// Skip article with the following words in the description
		Descriptions []string `json:"descriptions" yaml:"descriptions"`

		// Remove the following selectors from content
		Selectors []string `json:"selectors" yaml:"selectors"`

		// Remove blocks of text that contain the following words
		Text []string `json:"text" yaml:"text"`

		// Skip article with the following words in the title
		Titles []string `json:"titles" yaml:"titles"`
	} `json:"filters" yaml:"filters"`

	// Maximum number of processing workers (default 10)
	MaxWorkers uint `json:"max_workers" yaml:"max_workers"`

	// Full text extract method
	// Supported Methods: query (like jquery), xpath, readability (default)
	Method ExtractMethod `json:"method" yaml:"method"`

	// Full text extract request
	MethodRequest string `json:"method_request" yaml:"method_request"`

	// Link to the original feed
	URL string `json:"url" yaml:"url"`
}

Config for feed

type ExtractMethod

type ExtractMethod string

ExtractMethod for full text

var (
	// QueryMethod with goquery
	QueryMethod ExtractMethod = "query"
	// ReadabilityMethod by default
	ReadabilityMethod ExtractMethod = "readability"
	// XPathMethod with XML Path Language
	XPathMethod ExtractMethod = "xpath"
)

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL