rewrite

package module
v0.0.0-...-2186fc4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 14, 2018 License: GPL-3.0 Imports: 11 Imported by: 3

README

rewrite

GitHub Slack GoDoc License

rewrite is a package for modifying the contents of html & other web-related content types. it's primarily used as a tool to maintain the functionality of a web resource within the context of an archive

Copyright (C) 2017 Data Together This program is free software: you can redistribute it and/or modify it under the terms of the GNU AFFERO General Public License as published by the Free Software Foundation, version 3.0.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

See the LICENSE file for details.

Getting Involved

We would love involvement from more people! If you notice any errors or would like to submit changes, please see our Contributing Guidelines.

We use GitHub issues for tracking bugs and feature requests and Pull Requests (PRs) for submitting changes

Installation

Use in any golang package with:

import "github.com/datatogether/rewrite"

Documentation

Overview

rewrite is a package for modifying the contents of html & other web-related content types. it's primarily used as a tool to maintain the functionality of a web resource within the context of an archive

rewrite is a package for modifying the contents of html & other web-related content types. it's primarily used as a tool to maintain the functionality of a web resource within the context of an archive

Index

Constants

This section is empty.

Variables

View Source
var (
	CharsetRegex = regexp.MustCompile(`<meta[^>]*?[\s;"\']charset\s*=[\s"\']*([^\s"\'/>]*)`)
	CssUrlRegex  = regexp.MustCompile(`(?m)url\s*\(\s*(?:[\"']|(?:&.{1,4};))*\s*([^)'\"]+)\s*(?:["']|(?:&.{1,4};))*\s*\)`)
	// CssImportNoUrlRegex = regexp.MustCompile(`@import\\s+(?!url)\\(?\\s*['\"]?(?!url[\\s\\(])([\w.:/\\\\-]+)`)
	CssImportNoUrlRegex = regexp.MustCompile(``)
	HttpxMatchString    = regexp.MustCompile(`https?:\\?/\\?/[A-Za-z0-9:_@.-]+`)

	JsHttpx = regexp.MustCompile(``)
)
View Source
var DefaultHeaderRewriters = map[string]RewriteRule{
	"Access-Control-Allow-Origin":      PrefixIfUrlRewrite,
	"Access-Control-Allow-Credentials": PrefixIfUrlRewrite,
	"Access-Control-Expose-Headers":    PrefixIfUrlRewrite,
	"Access-Control-Max-Age":           PrefixIfUrlRewrite,
	"Access-Control-Allow-Methods":     PrefixIfUrlRewrite,
	"Access-Control-Allow-Headers":     PrefixIfUrlRewrite,

	"Accept-Patch":  Keep,
	"Accept-Ranges": Keep,

	"Age": Prefix,

	"Allow": Keep,

	"Alt-Svc":       Prefix,
	"Cache-Control": Prefix,

	"Connection": Prefix,

	"Content-Base":                        UrlRewrite,
	"Content-Disposition":                 Keep,
	"Content-Encoding":                    PrefixIfContentRewrite,
	"Content-Language":                    Keep,
	"Content-Length":                      ContentLength,
	"Content-Location":                    UrlRewrite,
	"Content-Md5":                         Prefix,
	"Content-Range":                       Keep,
	"Content-Security-Policy":             Prefix,
	"Content-Security-Policy-Report-Only": Prefix,
	"Content-Type":                        Keep,

	"Date": Keep,

	"Etag":    Prefix,
	"Expires": Prefix,

	"Last-Modified": Prefix,
	"Link":          Keep,
	"Location":      UrlRewrite,

	"P3p":    Prefix,
	"Pragma": Prefix,

	"Proxy-Authenticate": Keep,

	"Public-Key-Pins": Prefix,
	"Retry-After":     Prefix,
	"Server":          Prefix,

	"Set-Cookie": Cookie,

	"Strict-Transport-Security": Prefix,

	"Trailer":           Prefix,
	"Transfer-Encoding": Prefix,
	"Tk":                Prefix,

	"Upgrade":                   Prefix,
	"Upgrade-Insecure-Requests": Prefix,

	"Vary": Prefix,

	"Via": Prefix,

	"Warning": Prefix,

	"Www-Authenticate": Keep,

	"X-Frame-Options":  Prefix,
	"X-Xss-Protection": Prefix,
}
View Source
var DefaultWarcRecordRewriters = map[string]Rewriter{

	"header": NoopRewriter,

	"cookie": NoopRewriter,

	"html": NoopRewriter,

	"html-banner-only": NoopRewriter,

	"css": NoopRewriter,

	"js": NoopRewriter,

	"js-proxy": NoopRewriter,

	"json": NoopRewriter,

	"xml": NoopRewriter,

	"dash": NoopRewriter,

	"hls": NoopRewriter,

	"amf": NoopRewriter,
}
View Source
var ErrNotFinished = errors.New("not finished")
View Source
var NoopRewriter = PrefixRewriter{}
View Source
var RewriteTypes = map[string]string{

	"text/html":             "html",
	"application/xhtml":     "html",
	"application/xhtml+xml": "html",

	"text/css": "css",

	"text/javascript":          "js",
	"application/javascript":   "js",
	"application/x-javascript": "js",

	"application/json": "json",

	"application/x-mpegURL":         "hls",
	"application/vnd.apple.mpegurl": "hls",

	"application/dash+xml": "dash",

	"application/x-amf": "amf",

	"text/plain": "guess-text",

	"":                         "guess-text",
	"application/octet-stream": "guess-bin",
}

Functions

func ReplaceAllSubmatchFunc

func ReplaceAllSubmatchFunc(re *regexp.Regexp, b []byte, f func(s []byte) []byte) []byte

Shameless copy pasta from Stack Overflow https://stackoverflow.com/questions/28000832/how-to-access-a-capturing-group-from-regexp-replaceallfunc

Types

type Buffer

type Buffer struct {
	bytes.Buffer
	// contains filtered or unexported fields
}

Buffer behaves just like a bytes.Buffer, but uses a rewriter to adjust any bytes written with buffer.Write

func NewBuffer

func NewBuffer(data []byte, rw Rewriter) *Buffer

NewBuffer allocates a new rewriting buffer. Unlike bytes.Buffer, users should always use NewBuffer, even if passing nil for data

func (*Buffer) Write

func (rwb *Buffer) Write(p []byte) (int, error)

type Config

type Config struct {
	DestUrl      string
	Defmod       Rewriter
	Rewriters    []RewriterType
	HeaderPrefix string
	HeaderRules  map[string]RewriteRule
}

func DefaultConfig

func DefaultConfig() *Config

type CookieRewriter

type CookieRewriter struct {
}

func NewCookieRewriter

func NewCookieRewriter(configs ...func(*Config)) *CookieRewriter

func (*CookieRewriter) Rewrite

func (crw *CookieRewriter) Rewrite(p []byte) []byte

type CssRewriter

type CssRewriter struct {
	Rw *UrlRewriter
}

func NewCssRewriter

func NewCssRewriter(urlrw *UrlRewriter) *CssRewriter

func (*CssRewriter) Rewrite

func (rerw *CssRewriter) Rewrite(p []byte) []byte

type HeaderRewriter

type HeaderRewriter struct {
	Prefix           string
	Rules            map[string]RewriteRule
	Urlrw            Rewriter
	Cookierw         Rewriter
	RewritingContent bool
}

func NewHeaderRewriter

func NewHeaderRewriter(configs ...func(cfg *Config)) *HeaderRewriter

func (HeaderRewriter) RewriteHeaders

func (hrw HeaderRewriter) RewriteHeaders(headers http.Header) http.Header

type HtmlRewriter

type HtmlRewriter struct {
	// contains filtered or unexported fields
}

func NewHtmlRewriter

func NewHtmlRewriter(urlrw Rewriter, configs ...func(*Config)) *HtmlRewriter

func (*HtmlRewriter) Rewrite

func (hrw *HtmlRewriter) Rewrite(p []byte) []byte

type PrefixRewriter

type PrefixRewriter struct {
	Prefix []byte
}

PrefixRewriter adds a prefix if not present

func (PrefixRewriter) Rewrite

func (prw PrefixRewriter) Rewrite(p []byte) []byte

type RegexRewriter

type RegexRewriter struct {
	Re *regexp.Regexp
	Rw Rewriter
	// TODO - implement counts
	Count int
}

func (*RegexRewriter) Rewrite

func (rerw *RegexRewriter) Rewrite(p []byte) []byte

type RewriteRule

type RewriteRule int
const (
	Keep RewriteRule = iota
	PrefixIfUrlRewrite
	Prefix
	UrlRewrite
	PrefixIfContentRewrite
	ContentLength
	Cookie
)

type Rewriter

type Rewriter interface {
	Rewrite(i []byte) (o []byte)
}

Rewriter takes an input byte slice of and returns an output slice of rewritten bytes, the length of input & output will not necessarily match, implementations *may* alter input bytes

type RewriterType

type RewriterType int

RewriterType enumerates rewriters that operate on different types of content

const (
	RwTypeUnknown RewriterType = iota
	RwTypeUrl
	RwTypeHeader
	RwTypeContent
	RwTypeCookie
	RwTypeHtml
	RwTypeJavascript
	RwTypeCss
)

func (RewriterType) String

func (rwt RewriterType) String() string

type UrlRewriter

type UrlRewriter struct {
	// contains filtered or unexported fields
}

func NewHostRelativeUrlRewriter

func NewHostRelativeUrlRewriter(from string) *UrlRewriter

func NewRelativeUrlRewriter

func NewRelativeUrlRewriter(from string) *UrlRewriter

NewRelativeUrlRewriter turns urls that match from's hostname into relative urls

func NewUrlRewriter

func NewUrlRewriter(from, to string) *UrlRewriter

func (*UrlRewriter) Rewrite

func (urw *UrlRewriter) Rewrite(p []byte) []byte

func (*UrlRewriter) RewriteString

func (urw *UrlRewriter) RewriteString(p string) string

type WarcRecordRewriter

type WarcRecordRewriter struct {
	Index    cdxj.Writer
	Urlrw    *UrlRewriter
	Cookierw *CookieRewriter
	// contains filtered or unexported fields
}

func NewWarcRecordRewriter

func NewWarcRecordRewriter(urlstr string, config ...func(o *Config)) *WarcRecordRewriter

NewWarcRecordRewriter allocates a Rewriter, config funcs are optional. NewWarcRecordRewriter(urlstr) will return a default rewriter that rewrites content urls that match the domain of urlstr to relative urls

func (*WarcRecordRewriter) Rewrite

func (wrr *WarcRecordRewriter) Rewrite(in []byte) (out []byte)

Rewrite exists to conform WarcRecordRewriter to the rewriter interface, but doesn't handle malformed data very well. If you're confident that the supplied bytes represents a valid warc record, this'll work just fine, for better error reporting, use RewriteRecord

func (*WarcRecordRewriter) RewriteRecord

func (wrr *WarcRecordRewriter) RewriteRecord(rec *warc.Record) (*warc.Record, error)

RewriteRecord takes a record and rewrites it according to rules defined on the Rewriter.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL