aboutsummaryrefslogtreecommitdiff
path: root/internal/sanitize
diff options
context:
space:
mode:
authorRasmus Dahlberg <rasmus@rgdd.se>2023-03-20 14:57:25 +0100
committerRasmus Dahlberg <rasmus@rgdd.se>2023-03-20 14:57:25 +0100
commit8e8cd8214d579e26e05dcb44fcd53d909e23879c (patch)
treebbb4fc1e129c5fe867399178802abfdafceed8b9 /internal/sanitize
parent86e3a2a1ec6a7acdaf14a3d11ca47964ddc80d74 (diff)
Fork code snippets to sanitize DNS names (ascii)
Diffstat (limited to 'internal/sanitize')
-rw-r--r--internal/sanitize/sanitize.go85
1 files changed, 85 insertions, 0 deletions
diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go
new file mode 100644
index 0000000..6fcdf09
--- /dev/null
+++ b/internal/sanitize/sanitize.go
@@ -0,0 +1,85 @@
+// Copyright (C) 2016 Opsmate, Inc.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License, v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+//
+// This software is distributed WITHOUT A WARRANTY OF ANY KIND.
+// See the Mozilla Public License for details.
+//
+// From: https://github.com/SSLMate/certspotter/blob/master/identifiers.go
+// Minor tweaks to get a SanitizeDNSName function for ASCII names only.
+package sanitize
+
+import (
+ "fmt"
+ "strings"
+)
+
+// Try to canonicalize/sanitize the DNS name:
+//
+// 1. Trim leading and trailing whitespace
+// 2. Trim trailing dots
+// 3. Trim http:// and https:// prefix
+// 4. Convert to lower case
+// 5. Error if the DNS labels are not composed of ASCII characters 32-126 or "\t"
+//
+// Please note that the above is not necessarily a good filter for real CT
+// monitoring (this is why we're not applying it in the collect stage). It is
+// also not a good filter for getting rid of non-domain names like "funny str".
+// It is however simple to understand ("printable ascii chars"), and should be
+// good enough for the purpose of assembling a SANs data set from CT logs.
+func SanitizeDNSName(value string) (string, error) {
+ value = trimHttpPrefixString(strings.ToLower(trimTrailingDots(strings.TrimSpace(value))))
+ if !isASCIIString([]byte(value)) {
+ return "", fmt.Errorf("not an ascii string: %x", []byte(value))
+ }
+
+ labels := strings.Split(value, ".")
+ for _, label := range labels {
+ if !isSaneDNSLabel(label) {
+ return "", fmt.Errorf("process label %x", []byte(label))
+ }
+ }
+ return strings.Join(labels, "."), nil
+}
+
+func trimTrailingDots(value string) string {
+ length := len(value)
+ for length > 0 && value[length-1] == '.' {
+ length--
+ }
+ return value[0:length]
+}
+
+func trimHttpPrefixString(value string) string {
+ if strings.HasPrefix(value, "http://") {
+ return value[7:]
+ } else if strings.HasPrefix(value, "https://") {
+ return value[8:]
+ } else {
+ return value
+ }
+}
+
+func isASCIIString(value []byte) bool {
+ for _, b := range value {
+ if b > 127 {
+ return false
+ }
+ }
+ return true
+}
+
+func isSaneDNSLabel(label string) bool {
+ for _, ch := range label {
+ if !isSaneDNSLabelChar(ch) {
+ return false
+ }
+ }
+ return true
+}
+
+func isSaneDNSLabelChar(ch rune) bool {
+ return ch == '\t' || (ch >= 32 && ch <= 126)
+}