forgejo/build/generate-disposable-email.go
James Hatfield 16d06705b3 feat: add setting to block disposable emails
- Add a new setting `EMAIL_DOMAIN_BLOCK_DISPOSABLE` that will append a list of
  domains that are known for being used by temporary or disposable email
  services.

- Add a utility to automatically download and format the list of domains from
  the disposable-email-domains project on github.

  (https://github.com/disposable-email-domains/disposable-email-domains)
  license: CC0 1.0 Universal (CC0 1.0) [Public Domain]

  from README:
  """
  This repo contains a list of disposable and temporary email address domains often used to register dummy users in order to spam or abuse some services.

  We cannot guarantee all of these can still be considered disposable but we do basic checking so chances are they were disposable at one point in time.
  """
2024-11-20 23:17:37 -06:00

204 lines
4.1 KiB
Go

// Copyright 2024 James Hatfield
// SPDX-License-Identifier: MIT
//go:build ignore
package main
import (
"bufio"
"bytes"
"crypto"
"flag"
"fmt"
"go/format"
"io"
"log"
"net/http"
"os"
"regexp"
"strings"
)
const disposableEmailListURL string = "https://raw.githubusercontent.com/disposable-email-domains/disposable-email-domains/%s/disposable_email_blocklist.conf"
var (
gitRef *string = flag.String("r", "master", "Git reference of the domain list version")
outPat *string = flag.String("o", "modules/setting/disposable_email_domain_data.go", "Output path")
check *bool = flag.Bool("check", false, "Check if the current output file matches the current upstream list")
)
func main() {
flag.Parse()
if *check {
// read in the local copy of the domain list
local, err := get_local_file()
if err != nil {
log.Fatalf("File Read Error: %v", err)
}
// generate the remote copy of the domain list
remote, err := generate()
if err != nil {
log.Fatalf("Generation Error: %v", err)
}
// strip the comments from both (so we dont fail simply due to git ref difference)
local = strip_comments(local)
remote = strip_comments(remote)
// generate the hash of the local copy
local_sha, err := hash(local)
if err != nil {
log.Fatalf("Local Hash Generation Error: %v", err)
}
// generate the hash of the remote copy
remote_sha, err := hash(remote)
if err != nil {
log.Fatalf("Remote Hash Generation Error: %v", err)
}
// if the hashes dont match then the local copy needs to be updated
if local_sha != remote_sha {
log.Fatalf("Disposable email domain list needs to be updated!! \"make lint-disposable-emails-fix\"")
}
} else {
// generate the source code (array of domains)
res, err := generate()
if err != nil {
log.Fatalf("Generation Error: %v", err)
}
// write result to a file
err = os.WriteFile(*outPat, res, 0o644)
if err != nil {
log.Fatalf("File Write Error: %v", err)
}
}
}
func strip_comments(data []byte) []byte {
result := make([]byte, 0, len(data))
re := regexp.MustCompile(`^\W*//.*$`)
for _, line := range bytes.Split(data, []byte("\n")) {
if !re.Match(line) {
result = append(result, line...)
}
}
return result
}
func hash(data []byte) (string, error) {
var err error
hash := crypto.SHA3_256.New()
_, err = hash.Write(data)
if err != nil {
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), err
}
func get_local_file() ([]byte, error) {
var err error
f, err := os.Open(*outPat)
if err != nil {
return nil, err
}
defer f.Close()
data, err := io.ReadAll(f)
if err != nil {
return nil, err
}
return data, err
}
func get_remote() ([]string, error) {
var err error
var url string = fmt.Sprintf(disposableEmailListURL, *gitRef)
// download the domain list
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
// go through all entries (1 domain per line)
scanner := bufio.NewScanner(bytes.NewReader(body))
var arrDomains []string
for scanner.Scan() {
line := scanner.Text()
arrDomains = append(arrDomains, line)
}
return arrDomains, err
}
func generate() ([]byte, error) {
var err error
var url string = fmt.Sprintf(disposableEmailListURL, *gitRef)
// download the domains list
arrDomains, err := get_remote()
if err != nil {
return nil, err
}
// build the string in a readable way
var sb strings.Builder
_, err = sb.WriteString("[]string{\n")
if err != nil {
return nil, err
}
for _, item := range arrDomains {
_, err = sb.WriteString(fmt.Sprintf("\t%q,\n", item))
if err != nil {
return nil, err
}
}
_, err = sb.WriteString("}")
if err != nil {
return nil, err
}
// insert the values into file
final := fmt.Sprintf(hdr, url, sb.String())
return format.Source([]byte(final))
}
const hdr = `
// Copyright 2024 James Hatfield
// SPDX-License-Identifier: MIT
//
// Code generated by build/generate-disposable-email.go. DO NOT EDIT
// Sourced from %s
package setting
import "sync"
var DisposableEmailDomains = sync.OnceValue(func() []string {
return %s
})
`