parser

package
v0.3.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 7, 2023 License: Apache-2.0 Imports: 3 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// TextTypeIPv4 represents the IPv4 type in the text.
	TextTypeIPv4 = "ipv4"

	// TextTypeIPv6 represents the IPv6 type in the text.
	TextTypeIPv6 = "ipv6"

	// TextTypeDomain represents the domain type in the text.
	TextTypeDomain = "domain"

	// TextTypeText represents any other text type.
	TextTypeText = "text"
)

Variables

View Source
var (
	// IPv4Regexp IPv4 正则表达式
	IPv4Regexp = regexp.MustCompile(`(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)(\.(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)){3}`)

	// IPv6Regexp IPv6 正则表达式
	// [fF][eE]80:(:[0-9a-fA-F]{1,4}){0,4}(%\w+)?| # IPv6 Link-local (`net.ParseIP` does not support this format)
	// ([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}| # IPv6
	// ::([fF]{4}){1}:(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)(\.(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)){3}| # IPv4-mapped IPv6 address
	// (([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})? # IPv6 with two colons
	IPv6Regexp = regexp.MustCompile(`([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}|::([fF]{4}){1}:(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)(\.(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)){3}|(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?::(([0-9a-fA-F]{1,4}:){0,6}[0-9a-fA-F]{1,4})?`)

	// DomainRegexp 域名正则表达式
	// [a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,}
	// ([a-zA-Z0-9][-a-zA-Z0-9]{0,62}\.)+([a-zA-Z][-a-zA-Z]{0,62})
	// ^(xn--|_)?[a-zA-Z0-9]([a-zA-Z0-9-_]{0,61}[a-zA-Z0-9])?(\.(xn--|_)?[a-zA-Z0-9]([a-zA-Z0-9-_]{0,61}[a-zA-Z0-9])?)*(\.[a-zA-Z]{2,})$
	DomainRegexp = regexp.MustCompile(`(xn--|_)?[a-zA-Z0-9]([a-zA-Z0-9-_]{0,61}[a-zA-Z0-9])?(\.(xn--|_)?[a-zA-Z0-9]([a-zA-Z0-9-_]{0,61}[a-zA-Z0-9])?)*(\.[a-zA-Z]{2,})`)
)

Functions

func TypeWeight

func TypeWeight(t string) int

TypeWeight provides a weight for segment types to help in sorting. IPv4 segments have the highest weight, followed by IPv6, then Domain, and then any other type.

Types

type Segment

type Segment struct {
	Start   int    // Start position of the segment
	End     int    // End position of the segment
	Type    string // Type of the segment (IPv4, IPv6, Domain, or Text)
	Content string // Actual content of the segment
}

Segment represents a section or segment of the text.

type SorterSegment

type SorterSegment []Segment

SorterSegment is a type to help in sorting a slice of Segments based on their Start position.

func (SorterSegment) Len

func (s SorterSegment) Len() int

func (SorterSegment) Less

func (s SorterSegment) Less(i, j int) bool

func (SorterSegment) Swap

func (s SorterSegment) Swap(i, j int)

type TextParser

type TextParser struct {
	Text     string    // Text to be parsed
	Segments []Segment // Parsed segments from the text
}

TextParser is a parser designed to extract IPv4, IPv6, and other text segments from a given text.

func NewTextParser

func NewTextParser(text string) *TextParser

NewTextParser initializes and returns a new TextParser.

func (*TextParser) Distinct

func (t *TextParser) Distinct() *TextParser

Distinct removes duplicate segments and completes the text slices.

func (*TextParser) Parse

func (t *TextParser) Parse() *TextParser

Parse parses the text to extract IPv4, IPv6, and other segments.

func (*TextParser) ParseDomain added in v0.2.8

func (t *TextParser) ParseDomain() *TextParser

ParseDomain extracts domain segments from the text.

func (*TextParser) ParseIPv4

func (t *TextParser) ParseIPv4() *TextParser

ParseIPv4 extracts IPv4 segments from the text.

func (*TextParser) ParseIPv6

func (t *TextParser) ParseIPv6() *TextParser

ParseIPv6 extracts IPv6 segments from the text.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL