mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-12-21 07:59:03 +00:00
12a1f914f4
* update github.com/alecthomas/chroma v0.8.0 -> v0.8.1 * github.com/blevesearch/bleve v1.0.10 -> v1.0.12 * editorconfig-core-go v2.1.1 -> v2.3.7 * github.com/gliderlabs/ssh v0.2.2 -> v0.3.1 * migrate editorconfig.ParseBytes to Parse * github.com/shurcooL/vfsgen to 0d455de96546 * github.com/go-git/go-git/v5 v5.1.0 -> v5.2.0 * github.com/google/uuid v1.1.1 -> v1.1.2 * github.com/huandu/xstrings v1.3.0 -> v1.3.2 * github.com/klauspost/compress v1.10.11 -> v1.11.1 * github.com/markbates/goth v1.61.2 -> v1.65.0 * github.com/mattn/go-sqlite3 v1.14.0 -> v1.14.4 * github.com/mholt/archiver v3.3.0 -> v3.3.2 * github.com/microcosm-cc/bluemonday 4f7140c49acb -> v1.0.4 * github.com/minio/minio-go v7.0.4 -> v7.0.5 * github.com/olivere/elastic v7.0.9 -> v7.0.20 * github.com/urfave/cli v1.20.0 -> v1.22.4 * github.com/prometheus/client_golang v1.1.0 -> v1.8.0 * github.com/xanzy/go-gitlab v0.37.0 -> v0.38.1 * mvdan.cc/xurls v2.1.0 -> v2.2.0 Co-authored-by: Lauris BH <lauris@nix.lv>
110 lines
3.6 KiB
Go
Vendored
110 lines
3.6 KiB
Go
Vendored
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
|
// See LICENSE for licensing information
|
|
|
|
// Package xurls extracts urls from plain text using regular expressions.
|
|
package xurls
|
|
|
|
import (
|
|
"bytes"
|
|
"regexp"
|
|
)
|
|
|
|
//go:generate go run generate/tldsgen/main.go
|
|
//go:generate go run generate/schemesgen/main.go
|
|
|
|
const (
|
|
letter = `\p{L}`
|
|
mark = `\p{M}`
|
|
number = `\p{N}`
|
|
iriChar = letter + mark + number
|
|
currency = `\p{Sc}`
|
|
otherSymb = `\p{So}`
|
|
endChar = iriChar + `/\-_+&~%=#` + currency + otherSymb
|
|
otherPunc = `\p{Po}`
|
|
midChar = endChar + "_*" + otherPunc
|
|
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
|
|
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
|
|
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`
|
|
wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
|
|
pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+`
|
|
|
|
iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?`
|
|
domain = `(` + iri + `\.)+`
|
|
octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
|
|
ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b`
|
|
ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:`
|
|
ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)`
|
|
port = `(:[0-9]*)?`
|
|
)
|
|
|
|
// AnyScheme can be passed to StrictMatchingScheme to match any possibly valid
|
|
// scheme, and not just the known ones.
|
|
var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)`
|
|
|
|
// SchemesNoAuthority is a sorted list of some well-known url schemes that are
|
|
// followed by ":" instead of "://".
|
|
var SchemesNoAuthority = []string{
|
|
`bitcoin`, // Bitcoin
|
|
`file`, // Files
|
|
`magnet`, // Torrent magnets
|
|
`mailto`, // Mail
|
|
`sms`, // SMS
|
|
`tel`, // Telephone
|
|
`xmpp`, // XMPP
|
|
}
|
|
|
|
func anyOf(strs ...string) string {
|
|
var b bytes.Buffer
|
|
b.WriteByte('(')
|
|
for i, s := range strs {
|
|
if i != 0 {
|
|
b.WriteByte('|')
|
|
}
|
|
b.WriteString(regexp.QuoteMeta(s))
|
|
}
|
|
b.WriteByte(')')
|
|
return b.String()
|
|
}
|
|
|
|
func strictExp() string {
|
|
schemes := `(` + anyOf(Schemes...) + `://|` + anyOf(SchemesNoAuthority...) + `:)`
|
|
return `(?i)` + schemes + `(?-i)` + pathCont
|
|
}
|
|
|
|
func relaxedExp() string {
|
|
punycode := `xn--[a-z0-9-]+`
|
|
knownTLDs := anyOf(append(TLDs, PseudoTLDs...)...)
|
|
site := domain + `(?i)(` + punycode + `|` + knownTLDs + `)(?-i)`
|
|
hostName := `(` + site + `|` + ipAddr + `)`
|
|
webURL := hostName + port + `(/|/` + pathCont + `)?`
|
|
return strictExp() + `|` + webURL
|
|
}
|
|
|
|
// Strict produces a regexp that matches any URL with a scheme in either the
|
|
// Schemes or SchemesNoAuthority lists.
|
|
func Strict() *regexp.Regexp {
|
|
re := regexp.MustCompile(strictExp())
|
|
re.Longest()
|
|
return re
|
|
}
|
|
|
|
// Relaxed produces a regexp that matches any URL matched by Strict, plus any
|
|
// URL with no scheme.
|
|
func Relaxed() *regexp.Regexp {
|
|
re := regexp.MustCompile(relaxedExp())
|
|
re.Longest()
|
|
return re
|
|
}
|
|
|
|
// StrictMatchingScheme produces a regexp similar to Strict, but requiring that
|
|
// the scheme match the given regular expression. See AnyScheme too.
|
|
func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
|
|
strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont
|
|
re, err := regexp.Compile(strictMatching)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
re.Longest()
|
|
return re, nil
|
|
}
|