pipeline

package module
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 19, 2021 License: MIT Imports: 9 Imported by: 0

README

HTML Pipeline for Go

This is go version of html-pipeline

Other versions

Usage

package main

import (
	"fmt"

	"github.com/PuerkitoBio/goquery"
	pipeline "github.com/longbridgeapp/html-pipeline"
)

// ImageMaxWidthFilter a custom filter example
type ImageMaxWidthFilter struct{}

func (f ImageMaxWidthFilter) Call(doc *goquery.Document) (err error) {
	doc.Find("img").Each(func(i int, node *goquery.Selection) {
		node.SetAttr("style", `max-width: 100%`)
	})

	return
}

func main() {
	pipe := pipeline.NewPipeline([]pipeline.Filter{
		pipeline.MarkdownFilter{},
		pipeline.SanitizationFilter{},
		ImageMaxWidthFilter{},
		pipeline.MentionFilter{
			Prefix: "#",
			Format: func(name string) string {
				return fmt.Sprintf(`<a href="https://github.com/topic/%s">#%s</a>`, name, name)
			},
		},
		pipeline.MentionFilter{
			Prefix: "@",
			Format: func(name string) string {
				return fmt.Sprintf(`<a href="https://github.com/%s">@%s</a>`, name, name)
			},
		},
	})

	markdown := `# Hello world

![](javascript:alert) [Click me](javascript:alert)

This is #html-pipeline example, @huacnlee created.`
	out, _ := pipe.Call(markdown)
	fmt.Println(out)

	/*
		<h1>Hello world</h1>

		<p><img alt="" style="max-width: 100%"/> Click me</p>

		<p>This is <a href="https://github.com/topic/html-pipeline">#html-pipeline</a> example, <a href="https://github.com/huacnlee">@huacnlee</a> created.</p>
	*/
}

https://play.golang.org/p/zB0T7KczdB4

Use for Plain Text case

Sometimes, you may want use html-pipeline to manage the Plain Text process.

For example:

  • Match mentions, and then send notifications.
  • Convert Mention / HashTag or other text into other format.

But in HTML mode, it will escape some chars (", ', &) ... We don't wants that.

So, there have NewPlainPipeline method for you to create a plain mode pipeline without any escape.

NOTE: For secruity, this pipeline will remove all HTML tags <.+?>

package main

import (
	"fmt"
	"github.com/longbridgeapp/html-pipeline"
)

func main() {
	pipe := pipeline.NewPlainPipeline([]pipeline.Filter{
		pipeline.MentionFilter{
			Prefix: "#",
			Format: func(name string) string {
				return fmt.Sprintf(`[hashtag name="%s"]%s[/hashtag]`, name, name)
			},
		},
		pipeline.MentionFilter{
			Prefix: "@",
			Format: func(name string) string {
				return fmt.Sprintf(`[mention name="%s"]@%s[/mention]`, name, name)
			},
		},
	})

	text := `"Hello" & 'world' this <script>danger</script> is #html-pipeline created by @huacnlee.`
	out, _ := pipe.Call(text)
	fmt.Println(out)
	// "Hello" & 'world' this danger is [hashtag name="html-pipeline"]html-pipeline[/hashtag] created by [mention name="huacnlee"]@huacnlee[/mention].
}

https://play.golang.org/p/vxKZU9jJi3u

Built-in filters

License

MIT License

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func TraverseTextNodes

func TraverseTextNodes(node *html.Node, fn func(*html.Node))

TraverseTextNodes map nested node to find all text node

Types

type AutoCorrectFilter

type AutoCorrectFilter struct{}

AutoCorrectFilter Automatically add whitespace between CJK and half-width characters (alphabetical letters, numerical digits and symbols).

Example
raw := "<p>演示html-pipeline实现自动修正空格</p><p>这是第2个段落</p>"

pipe := NewPipeline([]Filter{
	AutoCorrectFilter{},
})

out, _ := pipe.Call(raw)
fmt.Println(out)
Output:

<p>演示 html-pipeline 实现自动修正空格</p><p>这是第 2 个段落</p>

func (AutoCorrectFilter) Call

func (f AutoCorrectFilter) Call(doc *goquery.Document) (err error)

Call AutoCorrectFilter

type ExternalLinkFilter

type ExternalLinkFilter struct {
	// IgnoreHosts hosts will ignore
	IgnoreHosts []string
}

ExternalLinkFilter a filter to match external links to add rel="nofollow" target="_blank"

func (ExternalLinkFilter) Call

func (f ExternalLinkFilter) Call(doc *goquery.Document) (err error)

Call render

type Filter

type Filter interface {
	Call(doc *goquery.Document) (err error)
}

Filter base filter interface

type HTMLEscapeFilter

type HTMLEscapeFilter struct{}

HTMLEscapeFilter HTML escape for Plain text

Example
raw := `<div>Hello "Foo's Bar"</div>`

pipe := NewPipeline([]Filter{
	HTMLEscapeFilter{},
})

out, _ := pipe.Call(raw)
fmt.Printf(out)
Output:

&lt;div&gt;Hello &#34;Foo&#39;s Bar&#34;&lt;/div&gt;

func (HTMLEscapeFilter) Call

func (f HTMLEscapeFilter) Call(doc *goquery.Document) (err error)

Call HTMLEscapeFilter

type ImageProxyFilter

type ImageProxyFilter struct {
	// IgnoreHosts, Host list that will ignore, ["your-host.com", "your-assets.com"]
	IgnoreHosts []string
	// Formatter method with
	Formatter func(src string) string
}

ImageProxyFilter DEPRECATED, use ImageURLFilter replace img src for use image proxy

func (ImageProxyFilter) Call

func (f ImageProxyFilter) Call(doc *goquery.Document) (err error)

Call render

func (ImageProxyFilter) IsIgnoreHost

func (f ImageProxyFilter) IsIgnoreHost(src string) bool

type ImageURLFilter

type ImageURLFilter struct {
	// IgnoreHosts, Host list that will ignore, ["your-host.com", "your-assets.com"], if empty will match all.
	IgnoreHosts []string
	// MatchHosts, ["some-host.com", "some-assets.com"], host list that match will do format,
	// If present will ignore IgnoreHosts rules,
	// Otherwice will use IgnoreHosts rules.
	MatchHosts []string
	// Format method with
	Format func(src string) string
}

ImageURLFilter will match image src and replace with custom format.

func (ImageURLFilter) Call

func (f ImageURLFilter) Call(doc *goquery.Document) (err error)

Call render

type MarkdownFilter

type MarkdownFilter struct {
	Opts []blackfriday.Option
}

MarkdownFilter render Markdown with blackfriday

Example
// Custom blackfriday HTML render options
renderer := blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{
	Flags: blackfriday.UseXHTML |
		blackfriday.NofollowLinks |
		blackfriday.CompletePage,
})

// Custom blackfriday extensions
extensions := blackfriday.Tables |
	blackfriday.FencedCode |
	blackfriday.Autolink |
	blackfriday.Strikethrough |
	blackfriday.SpaceHeadings |
	blackfriday.HardLineBreak |
	blackfriday.NoEmptyLineBeforeBlock

pipe := NewPipeline([]Filter{
	MarkdownFilter{
		Opts: []blackfriday.Option{
			blackfriday.WithRenderer(renderer),
			blackfriday.WithExtensions(extensions),
		},
	},
	SanitizationFilter{},
})

raw := `# Hello world
<script>alert;</script>
<style>body {}</style>

| Name | Location |
| ---- | --- |
| Jason | Chengdu |

This is [html-pipeline](https://github.com/longbridgeapp/html-pipeline) Markdown filter.`

out, _ := pipe.Call(raw)
fmt.Println(out)
Output:

<h1>Hello world</h1>

<p>alert;<br/>
body {}</p>

<table>
<thead>
<tr>
<th>Name</th>
<th>Location</th>
</tr>
</thead>

<tbody>
<tr>
<td>Jason</td>
<td>Chengdu</td>
</tr>
</tbody>
</table>
<p>This is <a href="https://github.com/longbridgeapp/html-pipeline" rel="nofollow">html-pipeline</a> Markdown filter.</p>

func (MarkdownFilter) Call

func (f MarkdownFilter) Call(doc *goquery.Document) (err error)

Call render

type MentionFilter

type MentionFilter struct {
	// Mention prefix char, default: @
	Prefix string
	// Format func for format matched names to HTML or other
	Format func(name string) string
	// NamesCallback return matched names
	NamesCallback func(names []string)
	// contains filtered or unexported fields
}

MentionFilter mention with @ or # or other prefix

Example
text := `This is a @test_huacn-lee of some cool @中文名称 features that @mi_asd be
@use-ful but @don't. look at this email@address.com. @bla! I like #nylas but I don't
like to go to this apple.com?a#url. I also don't like the ### comment blocks.
But #msft is cool.`

pipe := NewPipeline([]Filter{
	MentionFilter{
		Format: func(name string) string {
			return fmt.Sprintf(`<mention>%s</mention>`, name)
		},
	},
})

out, _ := pipe.Call(text)
fmt.Println(out)
Output:

This is a <mention>test_huacn-lee</mention> of some cool <mention>中文名称</mention> features that <mention>mi_asd</mention> be
<mention>use-ful</mention> but <mention>don</mention>'t. look at this email@address.com. <mention>bla</mention>! I like #nylas but I don't
like to go to this apple.com?a#url. I also don't like the ### comment blocks.
But #msft is cool.
Example (Complex)
text := `This is a @test_huacn-lee of some cool @中文名称 features that @mi_asd be
@use-ful but @don't. look at this email@address.com. @bla! I like #nylas but I don't
like to go to this apple.com?a#url. I also don't like the ### comment blocks.
But #msft is cool.`

pipe := NewPipeline([]Filter{
	MentionFilter{
		Format: func(name string) string {
			return fmt.Sprintf(`<mention>@%s</mention>`, name)
		},
	},
	MentionFilter{
		Prefix: "#",
		Format: func(name string) string {
			return fmt.Sprintf(`<hashtag>#%s</hashtag>`, name)
		},
	},
})

out, _ := pipe.Call(text)
fmt.Println(out)
Output:

This is a <mention>@test_huacn-lee</mention> of some cool <mention>@中文名称</mention> features that <mention>@mi_asd</mention> be
<mention>@use-ful</mention> but <mention>@don</mention>'t. look at this email@address.com. <mention>@bla</mention>! I like <hashtag>#nylas</hashtag> but I don't
like to go to this apple.com?a#url. I also don't like the ### comment blocks.
But <hashtag>#msft</hashtag> is cool.

func (MentionFilter) Call

func (f MentionFilter) Call(doc *goquery.Document) (err error)

func (MentionFilter) ExtractMentionNames

func (f MentionFilter) ExtractMentionNames(text string) (names []string)

ExtractMentionNames 从一段纯文本中提取提及的用户名

Example
text := `@huacnlee This is a @test_huacn-lee of some cool @中文名称 features that @mi_asd be
	@use-ful but @don't. look at this email@address.com. @bla! I like #nylas but I don't
	like to go to this apple.com?a#url. I also don't like the ### comment blocks.
	But #msft is cool.`

mentionFilter := MentionFilter{}

names := mentionFilter.ExtractMentionNames(text)
fmt.Println(names)
Output:

[huacnlee test_huacn-lee 中文名称 mi_asd use-ful don bla]

type Pipeline

type Pipeline struct {
	Filters []Filter
	// contains filtered or unexported fields
}

Pipeline stuct

Example
pipe := NewPipeline([]Filter{
	MarkdownFilter{},
	SanitizationFilter{},
	MentionFilter{
		Prefix: "#",
		Format: func(name string) string {
			return fmt.Sprintf(`<a href="https://github.com/topic/%s">#%s</a>`, name, name)
		},
	},
	MentionFilter{
		Prefix: "@",
		Format: func(name string) string {
			return fmt.Sprintf(`<a href="https://github.com/%s">@%s</a>`, name, name)
		},
	},
})

markdown := `# Hello world

![](javascript:alert) [Click me](javascript:alert)

This is #html-pipeline example, @huacnlee created.`
out, _ := pipe.Call(markdown)
fmt.Printf(out)
Output:

<h1>Hello world</h1>

<p><img alt=""/> Click me</p>

<p>This is <a href="https://github.com/topic/html-pipeline">#html-pipeline</a> example, <a href="https://github.com/huacnlee">@huacnlee</a> created.</p>
Example (CustomFilter)
package main

import (
	"fmt"

	"github.com/PuerkitoBio/goquery"
)

type TestFilter struct{}

func (f TestFilter) Call(doc *goquery.Document) (err error) {
	doc.Find("img").Each(func(i int, node *goquery.Selection) {
		node.SetAttr("style", "max-width: 100%")
	})

	return
}

func main() {
	/*
		type TestFilter struct{}

		func (f TestFilter) Call(doc *goquery.Document) (err error) {
			doc.Find("img").Each(func(i int, node *goquery.Selection) {
				node.SetAttr("style", "max-width: 100%")
			})

			return
		}
	*/
	pipe := NewPipeline([]Filter{
		SanitizationFilter{},
		TestFilter{},
	})

	html := `<img onclick="javascript:alert" src="https://google.com/foo.jpg"/>`

	out, _ := pipe.Call(html)
	fmt.Println(out)
}
Output:

<img src="https://google.com/foo.jpg" style="max-width: 100%"/>

func NewPipeline

func NewPipeline(filters []Filter) Pipeline

NewPipeline create pipeline with HTML mode

func NewPlainPipeline

func NewPlainPipeline(filters []Filter) Pipeline

NewPlainPipeline create pipeline with Plain mode (HTML tags will remove)

func (Pipeline) Call

func (p Pipeline) Call(raw string) (out string, err error)

Call to Render with Pipleline

type SanitizationFilter

type SanitizationFilter struct {
	Policy *bluemonday.Policy
}

SanitizationFilter use bluemonday default UGCPolicy to sanitize html

Example
pipe := NewPipeline([]Filter{
	SanitizationFilter{},
})

html := `<p style="margin: 0pt;"><img alt="" src="https://helloworld.com/images/a4c7e5612772b2429791790c7e54eeba.jpg" width="100px" style="width: 600px; height: 484px;"></p>
<p style="margin: 0pt;"><span style="font-family: 宋体; font-size: 10.5pt; mso-spacerun: &quot;yes&quot;; mso-ascii-font-family: Calibri; mso-hansi-font-family: Calibri; mso-bidi-font-family: &quot;Times New Roman&quot;; mso-font-kerning: 1.0000pt;"><font color="#000000" face="宋体">美股研究社</font><font color="#000000">1</font><font color="#000000"><font face="宋体">月</font><font face="Calibri">8</font><font face="宋体">日消息,知名投资机构</font></span></p>
<blockquote>This is blockquote</blockquote>
<table><tr><th>Foo</th><tr><td width="100">Bar</td></tr></table>
<ul><li>First line</li><li><strong>S<b>e</b><i>c</i>ond</strong> line</li></ul>
<ol><li>First line</li><li>Second line</li></ol>`

out, _ := pipe.Call(html)
fmt.Println(out)
Output:

<p><img alt="" src="https://helloworld.com/images/a4c7e5612772b2429791790c7e54eeba.jpg"/></p>
<p><span>美股研究社1月8日消息,知名投资机构</span></p>
<blockquote>This is blockquote</blockquote>
<table><tbody><tr><th>Foo</th></tr><tr><td width="100">Bar</td></tr></tbody></table>
<ul><li>First line</li><li><strong>S<b>e</b><i>c</i>ond</strong> line</li></ul>
<ol><li>First line</li><li>Second line</li></ol>

func (SanitizationFilter) Call

func (f SanitizationFilter) Call(doc *goquery.Document) (err error)

func (SanitizationFilter) PolicyWithDefault

func (f SanitizationFilter) PolicyWithDefault() *bluemonday.Policy

func (SanitizationFilter) Type

func (f SanitizationFilter) Type() string

type SimpleFormatFilter

type SimpleFormatFilter struct {
}

SimpleFormatFilter covnert simple plain text into breakable html

Example
pipe := NewPipeline([]Filter{
	SimpleFormatFilter{},
})

raw := `Guided tours of Go programs.

First-Class Functions in Go
Generating arbitrary text: a Markov chain algorithm`

out, _ := pipe.Call(raw)
fmt.Println(out)
Output:

<p>Guided tours of Go programs.</p><p>First-Class Functions in Go<br/>Generating arbitrary text: a Markov chain algorithm</p>

func (SimpleFormatFilter) Call

func (f SimpleFormatFilter) Call(doc *goquery.Document) (err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL