emailmessage

package module
v0.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 22, 2021 License: MIT Imports: 11 Imported by: 0

README

emailmessage

Remove the reply chain to extract the latest message from an email

Documentation

Index

Constants

View Source
const (
	HTMLDoc = DocType(iota)
	TextDoc
)

Variables

View Source
var (
	DefaultRegexpDelimiters []RegexpPair
	DefaultTextDelimiters   []TextPair
)
View Source
var DefaultHTMLFilters = []HTMLFilterPair{

	HTMLFilterPair{
		Key: `reply_message_removed`,
		Filter: func(n *html.Node) (*html.Node, bool) {
			if n.Type == html.ElementNode && n.Data == `div` {
				if attrName := GetAttributeValue(n, `name`); attrName == `quote` && CountChildElements(n) == 2 {
					if firstChild := NextSiblingElement(n.FirstChild); firstChild != nil && firstChild.Data == `div` {
						if secondChild := NextSiblingElement(firstChild); secondChild != nil {
							if attrName := GetAttributeValue(secondChild, `name`); attrName == `quoted-content` {
								RemoveThisAndAllNextSiblings(n)
								return nil, true
							}
						}
					}
				}
			}
			return n, false
		},
	},

	HTMLFilterPair{
		Key: `gmail_quote_removed`,
		Filter: func(n *html.Node) (*html.Node, bool) {
			if n.Type == html.ElementNode && n.Data == `div` && GetAttributeValue(n, `class`) == `gmail_quote` {
				var foundAttr, foundBlockquote bool
				for n2 := n.FirstChild; n2 != nil; n2 = n2.NextSibling {
					foundAttr = foundAttr || (n2.Data == `div` && GetAttributeValue(n2, `class`) == `gmail_attr`)
					foundBlockquote = foundBlockquote || (n2.Data == `blockquote` && GetAttributeValue(n2, `class`) == `gmail_quote`)
				}
				n.Parent.RemoveChild(n)
				return nil, true
			}
			return n, false
		},
	},

	HTMLFilterPair{
		Key: `microsoft_outlook_node_removed`,
		Filter: func(n *html.Node) (*html.Node, bool) {
			if n.Type == html.ElementNode && n.Data == `div` {
				if attrStyle := GetAttributeValue(n, `style`); microsoftQuotationMark.MatchString(attrStyle) {
					if n.Parent.Data == `div` && CountChildElements(n.Parent) == 1 && n.Parent.Parent != nil {
						RemoveThisAndAllNextSiblings(n.Parent)
						return nil, true
					}

					RemoveThisAndAllNextSiblings(n)
					return nil, true
				}
			}
			return n, false
		},
	},

	HTMLFilterPair{
		Key: `mozilla_signature_removed`,
		Filter: func(n *html.Node) (*html.Node, bool) {
			if n.Type == html.ElementNode && n.Data == `div` {
				if attrID := GetAttributeValue(n, `class`); attrID == `moz-signature` {
					next := n.NextSibling
					n.Parent.RemoveChild(n)
					return next, true
				}
			}
			return n, false
		},
	},

	HTMLFilterPair{
		Key: `outlook_div_removed`,
		Filter: func(n *html.Node) (*html.Node, bool) {
			if n.Type == html.ElementNode && n.Data == `div` {
				if attrID := GetAttributeValue(n, `id`); attrID == `divRplyFwdMsg` {
					RemoveThisAndAllNextSiblings(n)
					return nil, true
				}
			}
			return n, false
		},
	},

	HTMLFilterPair{
		Key: `bluewin_node_removed`,
		Filter: func(n *html.Node) (*html.Node, bool) {
			if attrID := GetAttributeValue(n, `id`); n.Type == html.ElementNode && attrID == `bw_signature` {
				if attrStyle := GetAttributeValue(n, `style`); attrStyle == `font-family: TheSansB-W5Plain, Arial, serif; font-size : 14px; color: rgb(153, 153, 153);` {
					RemoveThisAndAllNextSiblings(n)
					return nil, true
				}
			}
			return n, false
		},
	},
}

Functions

func Compare

func Compare(left []byte, right []byte) bool

func CountChildElements

func CountChildElements(parent *html.Node) (count int)

func GetAttributeValue

func GetAttributeValue(node *html.Node, attrName string) string

GetAttributeValue returns the attribute named 'attrName' or "" hence it cannot distinguish between not-found and empty value.

func GetParent

func GetParent(n *html.Node, path ...string) *html.Node

func NextSiblingElement

func NextSiblingElement(n *html.Node) (next *html.Node)

func RemoveThisAndAllNextSiblings

func RemoveThisAndAllNextSiblings(n *html.Node)

func SanitizeText

func SanitizeText(text []byte) (_ []byte, lastQuote []byte)

Types

type DocType

type DocType int

type Filter

type Filter struct {
	HTML []byte
	Text []byte

	TextDelimiterKeys []string
	HTMLDelimiterKeys []string

	ContentIDs       []string // found in the current text of the email
	QuotedContentIDs []string // all content-ids that are not part of the current text. In other words the ones to ignore

	Equal          bool
	ErrExtractHTML error
}

func Extract

func Extract(req Request) *Filter

Extract reads both the html and text version of an email and returns the plain message without full-quote of previous messages as text. HTML message is converted to a sensible text-representation. The byte slices for HTML and Text are reused.

func (Filter) PeekHTMLDelimiterKey

func (e Filter) PeekHTMLDelimiterKey() (string, bool)

func (Filter) PeekTextDelimiterKey

func (e Filter) PeekTextDelimiterKey() (string, bool)

func (Filter) SelectBestMessage

func (e Filter) SelectBestMessage() []byte

SelectBestMessage chooses automatically if we output should be based on HTML or Text version, if one is empty then the other one is chosen automatically.

type HTMLFilter

type HTMLFilter func(n *html.Node) (next *html.Node, useNext bool)

type HTMLFilterPair

type HTMLFilterPair struct {
	Key    string
	Filter HTMLFilter
}

type RegexpPair

type RegexpPair struct {
	Key    string
	Regexp *regexp.Regexp
}

type Request

type Request struct {
	HTML []byte
	Text []byte

	TextDelimiters   []TextPair
	RegexpDelimiters []RegexpPair
	HTMLFilters      []HTMLFilterPair
}

type TextPair

type TextPair struct {
	Key  string
	Text string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL