parser

package
v0.0.0-...-75434e3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 10, 2024 License: MIT Imports: 9 Imported by: 0

Documentation

Overview

Package parser provides the parser for the telegram channel page.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func FixEmoji

func FixEmoji(s *goquery.Selection) *goquery.Selection

FixEmoji removes all emoji tags

func FixLinks(s *goquery.Selection) *goquery.Selection

FixLinks removes all attributes from links

func GetChannelWebURL

func GetChannelWebURL(chName string) string

GetChannelWebURL returns the channel web url based on the channel name

func GetImages

func GetImages(s *goquery.Selection) []string

GetImages returns all images from the post

func GetPageDescriptionHTML

func GetPageDescriptionHTML(doc *goquery.Document) string

GetPageDescriptionHTML returns the page description html

func GetPageImageURL

func GetPageImageURL(doc *goquery.Document) string

GetPageImageURL returns the page image url

func GetPageLink(doc *goquery.Document) string

GetPageLink returns the page link

func GetPageTitle

func GetPageTitle(doc *goquery.Document) string

GetPageTitle returns the page title

func GetPostCreated

func GetPostCreated(s *goquery.Selection) time.Time

GetPostCreated returns the post created datetime

func GetPostLink(s *goquery.Selection) string

GetPostLink returns the post link

func GetPostTextHTML

func GetPostTextHTML(s *goquery.Selection) string

GetPostTextHTML returns the post text as HTML

func GetPostTitle

func GetPostTitle(text string) string

GetPostTitle returns the post title

func GetSafeHTML

func GetSafeHTML(s *goquery.Selection) string

GetSafeHTML returns the HTML string without unsafe tags

func GetVideos

func GetVideos(s *goquery.Selection) []string

GetVideos returns all videos from the post

func ParseDateTime

func ParseDateTime(dt string) (time.Time, error)

ParseDateTime parses the datetime string and returns the time.Time object

func RemoveUnsafeTags

func RemoveUnsafeTags(s *goquery.Selection) *goquery.Selection

RemoveUnsafeTags removes all tags except <a>, <i>, <b>, <br>

func ShortenText

func ShortenText(text string, maxLength int) string

ShortenText shortens the text to the specified length.

Types

type Page

type Page struct {
	Title       string
	Link        string
	Description string
	ImageURL    string
	Posts       []*Post
}

Page represents a page from the telegram channel

func GetPage

func GetPage(doc *goquery.Document) *Page

GetPage returns the page object

func Parse

func Parse(chName string) (*Page, error)

Parse returns the page object

type Post

type Post struct {
	Title   string
	Text    string
	Link    string
	ID      string
	Created time.Time
	Videos  []string
	Images  []string
}

Post represents a post from the telegram channel

func GetPosts

func GetPosts(doc *goquery.Document) []*Post

GetPosts returns all posts from the page

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL