Documentation ¶
Index ¶
- Constants
- Variables
- func Compare(left []byte, right []byte) bool
- func CountChildElements(parent *html.Node) (count int)
- func GetAttributeValue(node *html.Node, attrName string) string
- func GetParent(n *html.Node, path ...string) *html.Node
- func NextSiblingElement(n *html.Node) (next *html.Node)
- func RemoveThisAndAllNextSiblings(n *html.Node)
- func SanitizeText(text []byte) (_ []byte, lastQuote []byte)
- type DocType
- type Filter
- type HTMLFilter
- type HTMLFilterPair
- type RegexpPair
- type Request
- type TextPair
Constants ¶
View Source
const ( HTMLDoc = DocType(iota) TextDoc )
Variables ¶
View Source
var ( DefaultRegexpDelimiters []RegexpPair DefaultTextDelimiters []TextPair )
View Source
var DefaultHTMLFilters = []HTMLFilterPair{ HTMLFilterPair{ Key: `reply_message_removed`, Filter: func(n *html.Node) (*html.Node, bool) { if n.Type == html.ElementNode && n.Data == `div` { if attrName := GetAttributeValue(n, `name`); attrName == `quote` && CountChildElements(n) == 2 { if firstChild := NextSiblingElement(n.FirstChild); firstChild != nil && firstChild.Data == `div` { if secondChild := NextSiblingElement(firstChild); secondChild != nil { if attrName := GetAttributeValue(secondChild, `name`); attrName == `quoted-content` { RemoveThisAndAllNextSiblings(n) return nil, true } } } } } return n, false }, }, HTMLFilterPair{ Key: `gmail_quote_removed`, Filter: func(n *html.Node) (*html.Node, bool) { if n.Type == html.ElementNode && n.Data == `div` && GetAttributeValue(n, `class`) == `gmail_quote` { var foundAttr, foundBlockquote bool for n2 := n.FirstChild; n2 != nil; n2 = n2.NextSibling { foundAttr = foundAttr || (n2.Data == `div` && GetAttributeValue(n2, `class`) == `gmail_attr`) foundBlockquote = foundBlockquote || (n2.Data == `blockquote` && GetAttributeValue(n2, `class`) == `gmail_quote`) } n.Parent.RemoveChild(n) return nil, true } return n, false }, }, HTMLFilterPair{ Key: `microsoft_outlook_node_removed`, Filter: func(n *html.Node) (*html.Node, bool) { if n.Type == html.ElementNode && n.Data == `div` { if attrStyle := GetAttributeValue(n, `style`); microsoftQuotationMark.MatchString(attrStyle) { if n.Parent.Data == `div` && CountChildElements(n.Parent) == 1 && n.Parent.Parent != nil { RemoveThisAndAllNextSiblings(n.Parent) return nil, true } RemoveThisAndAllNextSiblings(n) return nil, true } } return n, false }, }, HTMLFilterPair{ Key: `mozilla_signature_removed`, Filter: func(n *html.Node) (*html.Node, bool) { if n.Type == html.ElementNode && n.Data == `div` { if attrID := GetAttributeValue(n, `class`); attrID == `moz-signature` { next := n.NextSibling n.Parent.RemoveChild(n) return next, true } } return n, false }, }, HTMLFilterPair{ Key: `outlook_div_removed`, Filter: func(n *html.Node) (*html.Node, bool) { if n.Type == html.ElementNode && n.Data == `div` { if attrID := GetAttributeValue(n, `id`); attrID == `divRplyFwdMsg` { RemoveThisAndAllNextSiblings(n) return nil, true } } return n, false }, }, HTMLFilterPair{ Key: `bluewin_node_removed`, Filter: func(n *html.Node) (*html.Node, bool) { if attrID := GetAttributeValue(n, `id`); n.Type == html.ElementNode && attrID == `bw_signature` { if attrStyle := GetAttributeValue(n, `style`); attrStyle == `font-family: TheSansB-W5Plain, Arial, serif; font-size : 14px; color: rgb(153, 153, 153);` { RemoveThisAndAllNextSiblings(n) return nil, true } } return n, false }, }, }
Functions ¶
func CountChildElements ¶
func GetAttributeValue ¶
GetAttributeValue returns the attribute named 'attrName' or "" hence it cannot distinguish between not-found and empty value.
func SanitizeText ¶
Types ¶
type Filter ¶
type Filter struct { HTML []byte Text []byte TextDelimiterKeys []string HTMLDelimiterKeys []string ContentIDs []string // found in the current text of the email QuotedContentIDs []string // all content-ids that are not part of the current text. In other words the ones to ignore Equal bool ErrExtractHTML error }
func Extract ¶
Extract reads both the html and text version of an email and returns the plain message without full-quote of previous messages as text. HTML message is converted to a sensible text-representation. The byte slices for HTML and Text are reused.
func (Filter) PeekHTMLDelimiterKey ¶
func (Filter) PeekTextDelimiterKey ¶
func (Filter) SelectBestMessage ¶
SelectBestMessage chooses automatically if we output should be based on HTML or Text version, if one is empty then the other one is chosen automatically.
type HTMLFilterPair ¶
type HTMLFilterPair struct { Key string Filter HTMLFilter }
type RegexpPair ¶
type Request ¶
type Request struct { HTML []byte Text []byte TextDelimiters []TextPair RegexpDelimiters []RegexpPair HTMLFilters []HTMLFilterPair }
Click to show internal directories.
Click to hide internal directories.