converter

package
v0.0.0-...-56eae22 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 28, 2015 License: Apache-2.0 Imports: 42 Imported by: 0

Documentation

Overview

Package converter implements function for converting files to PDF

Index

Constants

View Source
const ErrTextFn = "ZZZ-errors.txt"

name of errors list in resulting archive

View Source
const LofficeLockPort = 27999

port for LibreOffice locking (only one instance should be running)

View Source
const SaveOriginalHTML = false

save original html (do not delete it)

Variables

View Source
var (
	// ConfPdftk is the path for PdfTk
	ConfPdftk = config.String("pdftk", "pdftk")

	// ConfPdfseparate is the path for pdfseparate (member of poppler-utils
	ConfPdfseparate = config.String("pdfseparate", "pdfseparate")

	// ConfLoffice is the path for LibreOffice
	ConfLoffice = config.String("loffice", "loffice")

	// ConfGm is the path for GraphicsMagick
	ConfGm = config.String("gm", "gm")

	// ConfGs is the path for GhostScript
	ConfGs = config.String("gs", "gs")

	// ConfPdfClean is the path for pdfclean
	ConfPdfClean = config.String("pdfclean", "pdfclean")

	// ConfMutool is the path for mutool
	ConfMutool = config.String("mutool", "mutool")

	// ConfSortBeforeMerge should be true if generally we should sort files by filename before merge
	ConfSortBeforeMerge = config.Bool("sortBeforeMerge", false)

	// ConfChildTimeout is the time before the child gets killed
	ConfChildTimeout = config.Duration("childTimeout", 1*time.Hour)

	// ConcLimit limits the concurrently running child processes
	ConcLimit = NewRateLimiter(Concurrency)

	// ConfWorkdir is the working directory (will be os.TempDir() if empty)
	ConfWorkdir = config.String("workdir", "")

	// ConfListenAddr is a listen address for HTTP requests
	ConfListenAddr = config.String("listen", ":9500")

	// ConfDefaultIsService decides whether start as service without args
	ConfDefaultIsService = config.Bool("defaultIsService", false)
)
View Source
var Concurrency = int(8)

Concurrency is the default concurrent goroutines number

View Source
var ExtContentType = map[string]string{
	"doc":  "application/vnd.ms-word",
	"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
	"dotx": "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
	"xls":  "application/vnd.ms-excel",
	"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	"xltx": "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
	"ppt":  "application/vnd.ms-powerpoint",
	"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
	"ppsx": "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
	"potx": "application/vnd.openxmlformats-officedocument.presentationml.template",

	"odg": "application/vnd.oasis.opendocument.graphics",
	"otg": "application/vnd.oasis.opendocument.graphics-template",
	"otp": "application/vnd.oasis.opendocument.presentation-template",
	"odp": "application/vnd.oasis.opendocument.presentation",
	"odm": "application/vnd.oasis.opendocument.text-master",
	"odt": "application/vnd.oasis.opendocument.text",
	"oth": "application/vnd.oasis.opendocument.text-web",
	"ott": "application/vnd.oasis.opendocument.text-template",
	"ods": "application/vnd.oasis.spreadsheet",
	"ots": "application/vnd.oasis.spreadsheet-template",
	"odc": "application/vnd.oasis.chart",
	"odf": "application/vnd.oasis.formula",
	"odb": "application/vnd.oasis.database",
	"odi": "application/vnd.oasis.image",

	"txt": "text/plain",
	"msg": "application/x-ole-storage",

	"jpg":  "image/jpeg",
	"jpeg": "image/jpeg",
	"gif":  "image/gif",
	"png":  "image/png",
}

file extension -> content-type map

View Source
var Filters = make([]FilterFunc, 0, 4)

Filters is the filter pipeline - order is application order

View Source
var LeaveTempFiles = false

LeaveTempFiles should be true only for debugging purposes (leaves temp files)

View Source
var Logger = new(log.SwapLogger)

Swappable logger,

View Source
var OtherToPdf = OfficeToPdf

OtherToPdf is the default converter

View Source
var PrependHeaders = []string{"From", "To", "Cc", "Subject", "Date"}

PrependHeaders are the headers which should be prepended to the printed mail

View Source
var Workdir = os.TempDir()

Workdir is the main working directory

Functions

func ExtractingFilter

func ExtractingFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart,
	files chan<- ArchFileItem, errch chan<- error, ctx *Context)

ExtractingFilter is a filter for the mail pipeline which extracts archives

func FixContentType

func FixContentType(body []byte, contentType, fileName string) string

FixContentType ensures proper content-type (uses mimemagic for "" and application/octet-stream)

func GetCidMap

func GetCidMap(text []byte, subDir string) ([]byte, map[string]string, error)

GetCidMap returns the cid-filename mapping

func HTMLPartFilter

func HTMLPartFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart,
	files chan<- ArchFileItem, errch chan<- error, ctx *Context)

HTMLPartFilter reads multipart/alternative (text/plain + text/html), preferring the html part + groups the multipart/related images which are referred in the html.

multipart/related encapsulates multipart/alternative, which contains text/plain and text/html, the related part contains images, too - at least usually.

func HTMLToPdf

func HTMLToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error

HTMLToPdf converts HTML (text/html) to PDF

func ImageToPdf

func ImageToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error

ImageToPdf convert image (image/...) to PDF

func ImageToPdfGm

func ImageToPdfGm(w io.Writer, r io.Reader, contentType string) error

ImageToPdfGm converts image to PDF using GraphicsMagick

func LoadConfig

func LoadConfig(fn string) error

LoadConfig loads TOML config file

func MPRelatedToPdf

func MPRelatedToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error

MPRelatedToPdf converts multipart/related to PDF

func MailToPdfZip

func MailToPdfZip(destfn string, body io.Reader, contentType string, ctx *Context) error

MailToPdfZip converts mail to ZIP of PDFs

func MailToSplittedPdfZip

func MailToSplittedPdfZip(destfn string, body io.Reader,
	contentType string, split bool, imgmime, imgsize string,
	ctx *Context) error

MailToSplittedPdfZip converts mail to ZIP of PDFs and images

func MailToTree

func MailToTree(outdir string, r io.Reader) error

MailToTree writes mail parts as files starting at outdir as root, trying to reimplement the mime hierarchy in the directory hierarchy

func MailToZip

func MailToZip(destfn string, body io.Reader, contentType string, ctx *Context) error

MailToZip dumps mail and all parts into ZIP

func NewB64QuoPriDecoder

func NewB64QuoPriDecoder(r io.Reader) io.Reader

NewB64QuoPriDecoder replaces bork encoding (+base64-)

func NewCidMapper

func NewCidMapper(cids map[string]string, subDir string, r io.Reader) io.Reader

NewCidMapper remaps Content-Id urls to ContentDir/filename and returns the map

func NewEqsignStripper

func NewEqsignStripper(r io.Reader) io.Reader

NewEqsignStripper returns a reader which strips equal signs from line endings

func NewOLEStorageReader

func NewOLEStorageReader(r io.Reader) (io.ReadCloser, error)

NewOLEStorageReader converts Outlook .msg files to .eml RFC822 email files. For this it uses perl Email::Outlook::Message (thanks, @matijs), and returns an io.Reader with the converted data.

This calls out to perl, and needs Email::Outlook::Message (can be installed with `cpan -i Email::Outlook::Message`).

See http://www.matijs.net/software/msgconv

func NewQuoPriDecoder

func NewQuoPriDecoder(r io.Reader) io.Reader

NewQuoPriDecoder replaces =A0= with \n

func NewScannerReader

func NewScannerReader(s *bufio.Scanner) io.Reader

NewScannerReader turns a bufio.Scanner to an io.Reader

func NewTextReader

func NewTextReader(r io.Reader, charset string) io.Reader

NewTextReader wraps a reader with a proper charset converter

func OfficeToPdf

func OfficeToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error

OfficeToPdf converts other to PDF with LibreOffice

func PdfClean

func PdfClean(fn string) (err error)

PdfClean cleans PDF from restrictions

func PdfDumpFdf

func PdfDumpFdf(destfn, inpfn string) error

PdfDumpFdf dumps the FDF from the given PDF.

func PdfDumpFields

func PdfDumpFields(inpfn string) ([]string, error)

PdfDumpFields dumps the field names from the given PDF.

func PdfFillFdf

func PdfFillFdf(destfn, inpfn string, values map[string]string) error

PdfFillFdf fills the FDF and generates PDF.

func PdfMerge

func PdfMerge(ctx context.Context, destfn string, filenames ...string) error

PdfMerge merges pdf files into destfn

func PdfPageNum

func PdfPageNum(srcfn string) (numberofpages int, err error)

PdfPageNum returns the number of pages

func PdfRewrite

func PdfRewrite(destfn, srcfn string) error

PdfRewrite converts PDF to PDF (rewrites as PDF->PS->PDF)

func PdfSplit

func PdfSplit(srcfn string) (filenames []string, err error)

PdfSplit splits pdf to pages, returns those filenames

func PdfToImage

func PdfToImage(w io.Writer, r io.Reader, contentType, size string) error

PdfToImage converts PDF to image using PdfToImageGm if available and the result is OK, then PdfToImageCairo.

func PdfToImageCairo

func PdfToImageCairo(w io.Writer, r io.Reader, contentType, size string) error

PdfToImageCairo converts PDF to image using pdftocairo from poppler-utils.

func PdfToImageGm

func PdfToImageGm(w io.Writer, r io.Reader, contentType, size string) error

PdfToImageGm converts PDF to image using GraphicsMagick.

func PdfToImageMulti

func PdfToImageMulti(sfiles []string, imgmime, imgsize string) (imgfilenames []string, err error)

PdfToImageMulti converts PDF pages to images, using parallel threads

func PdfToPdf

func PdfToPdf(destfn string, r io.Reader, _ string, ctx *Context) error

PdfToPdf "converts" PDF (application/pdf) to PDF (just copies)

func PdfToPs

func PdfToPs(destfn, srcfn string) error

PdfToPs converts PDF to postscript

func PrependHeaderFilter

func PrependHeaderFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart,
	files chan<- ArchFileItem, errch chan<- error, ctx *Context,
)

PrependHeaderFilter writes Subject, From... headers at the beginning of the html/plain parts.

func PsToPdf

func PsToPdf(destfn, srcfn string) error

PsToPdf converts postscript to PDF

func ScanLines

func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanLines is a split function for a Scanner that returns each line of text, unmodified. The returned line may be empty. The end-of-line marker is one optional carriage return followed by one mandatory newline. In regular expression notation, it is `\r?\n`. The last non-empty line of input will be returned even if it has no newline.

func SetupFilters

func SetupFilters(inch <-chan i18nmail.MailPart, resultch chan<- ArchFileItem,
	errch chan<- error, ctx *Context,
) <-chan i18nmail.MailPart

SetupFilters applies filters on parts received on inch, and returns them on outch

func SlurpMail

func SlurpMail(partch chan<- i18nmail.MailPart, errch chan<- error, body io.Reader)

SlurpMail splits mail to parts, returns parts and/or error on the given channels

func TextDecodeFilter

func TextDecodeFilter(inch <-chan i18nmail.MailPart, outch chan<- i18nmail.MailPart,
	files chan<- ArchFileItem, errch chan<- error, ctx *Context)

TextDecodeFilter writes Subject, From... headers at the beginning of the html/plain parts.

func TextToPdf

func TextToPdf(destfn string, r io.Reader, contentType string, ctx *Context) error

TextToPdf converts text (text/plain) to PDF

func ZipFiles

func ZipFiles(dest io.Writer, skipOnError, unsafeArchFn bool, files ...ArchFileItem) (err error)

ZipFiles adds files (by handle) to zip (writer)

func ZipTree

func ZipTree(dest io.Writer, root string, skipOnError, unsafeArchFn bool) (err error)

ZipTree adds all files in the tree originating the given path to zip (writer)

Types

type ArchFileItem

type ArchFileItem struct {
	File     FileLike //opened file handle
	Filename string   //name of the file
	Archive  string   //name in the archive
	Error    error    //error
}

ArchFileItem groups an archive item

func MailToPdfFiles

func MailToPdfFiles(r io.Reader, ctx *Context) (files []ArchFileItem, err error)

MailToPdfFiles converts email to PDF files all mail part goes through all filter in Filters, in reverse order (last first)

func (ArchFileItem) ArchiveName

func (a ArchFileItem) ArchiveName() string

ArchiveName returns the archive name - Archive, Filename if set, otherwise File's name

type ArchItems

type ArchItems []ArchFileItem

ArchItems is a wrapper for []ArchFileItem for sort.Sort

func (ArchItems) Len

func (a ArchItems) Len() int

Len returns the length of ArchItems

func (ArchItems) Less

func (a ArchItems) Less(i, j int) bool

Less returns whether a[i] < a[j]

func (ArchItems) Sort

func (a ArchItems) Sort() ArchItems

Sort sorts ArchItems ArchiveName-ordered

func (ArchItems) Swap

func (a ArchItems) Swap(i, j int)

Swap swaps items i and j for sort.Sort

type Context

type Context struct {
	Dir string //base workdir
}

Context contains the execution context (workdir, atm)

type Converter

type Converter func(string, io.Reader, string, *Context) error

Converter converts to Pdf (destination filename, source reader and source content-type)

func GetConverter

func GetConverter(contentType string, mediaType map[string]string) (converter Converter)

GetConverter gets converter for the content-type

func NewTextConverter

func NewTextConverter(charset string) Converter

NewTextConverter converts encoded text to pdf - by decoding it

type FieldSetter

type FieldSetter interface {
	Set(key, value string) error
}

type FileLike

type FileLike interface {
	io.Reader
	io.Closer
	Stat() (os.FileInfo, error)
}

FileLike is a minimal needed interface for ArchFileItem.File

type FilterFunc

type FilterFunc func(<-chan i18nmail.MailPart, chan<- i18nmail.MailPart, chan<- ArchFileItem, chan<- error, *Context)

FilterFunc is the type for the pipeline filter function must close out channel on finish!

type PortLock

type PortLock struct {
	// contains filtered or unexported fields
}

PortLock is a locker which locks by binding to a port on the loopback IPv4 interface

func NewPortLock

func NewPortLock(port int) *PortLock

NewPortLock returns a lock for port

func (*PortLock) Lock

func (p *PortLock) Lock()

Lock locks on port

func (*PortLock) Unlock

func (p *PortLock) Unlock()

Unlock unlocks the port lock

type RateLimiter

type RateLimiter interface {
	//Acquire acquires a token (blocks if none accessible)
	Acquire() Token
	//Release releases the token
	Release(Token)
}

RateLimiter is the interface for rate limiting

func NewRateLimiter

func NewRateLimiter(n int) RateLimiter

NewRateLimiter returns a RateLimiter

type ScannerReader

type ScannerReader struct {
	// contains filtered or unexported fields
}

ScannerReader uses a bufio.Scanner as an io.Reader

func (*ScannerReader) Read

func (sr *ScannerReader) Read(p []byte) (n int, err error)

Implements io.Reader: reads at most len(p) bytes into p, returns the number of bytes read and/or the error encountered

type Token

type Token struct{}

Token is a token

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL