aboutsummaryrefslogtreecommitdiff
path: root/internal/sanitize/sanitize.go
blob: 6fcdf091ea193b903b1abf0b4aae7743b59c5e88 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// Copyright (C) 2016 Opsmate, Inc.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License, v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
//
// This software is distributed WITHOUT A WARRANTY OF ANY KIND.
// See the Mozilla Public License for details.
//
// From: https://github.com/SSLMate/certspotter/blob/master/identifiers.go
// Minor tweaks to get a SanitizeDNSName function for ASCII names only.
package sanitize

import (
	"fmt"
	"strings"
)

// Try to canonicalize/sanitize the DNS name:
//
//  1. Trim leading and trailing whitespace
//  2. Trim trailing dots
//  3. Trim http:// and https:// prefix
//  4. Convert to lower case
//  5. Error if the DNS labels are not composed of ASCII characters 32-126 or "\t"
//
// Please note that the above is not necessarily a good filter for real CT
// monitoring (this is why we're not applying it in the collect stage).  It is
// also not a good filter for getting rid of non-domain names like "funny str".
// It is however simple to understand ("printable ascii chars"), and should be
// good enough for the purpose of assembling a SANs data set from CT logs.
func SanitizeDNSName(value string) (string, error) {
	value = trimHttpPrefixString(strings.ToLower(trimTrailingDots(strings.TrimSpace(value))))
	if !isASCIIString([]byte(value)) {
		return "", fmt.Errorf("not an ascii string: %x", []byte(value))
	}

	labels := strings.Split(value, ".")
	for _, label := range labels {
		if !isSaneDNSLabel(label) {
			return "", fmt.Errorf("process label %x", []byte(label))
		}
	}
	return strings.Join(labels, "."), nil
}

func trimTrailingDots(value string) string {
	length := len(value)
	for length > 0 && value[length-1] == '.' {
		length--
	}
	return value[0:length]
}

func trimHttpPrefixString(value string) string {
	if strings.HasPrefix(value, "http://") {
		return value[7:]
	} else if strings.HasPrefix(value, "https://") {
		return value[8:]
	} else {
		return value
	}
}

func isASCIIString(value []byte) bool {
	for _, b := range value {
		if b > 127 {
			return false
		}
	}
	return true
}

func isSaneDNSLabel(label string) bool {
	for _, ch := range label {
		if !isSaneDNSLabelChar(ch) {
			return false
		}
	}
	return true
}

func isSaneDNSLabelChar(ch rune) bool {
	return ch == '\t' || (ch >= 32 && ch <= 126)
}