From 8e8cd8214d579e26e05dcb44fcd53d909e23879c Mon Sep 17 00:00:00 2001 From: Rasmus Dahlberg Date: Mon, 20 Mar 2023 14:57:25 +0100 Subject: Fork code snippets to sanitize DNS names (ascii) --- internal/sanitize/sanitize.go | 85 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 internal/sanitize/sanitize.go (limited to 'internal') diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go new file mode 100644 index 0000000..6fcdf09 --- /dev/null +++ b/internal/sanitize/sanitize.go @@ -0,0 +1,85 @@ +// Copyright (C) 2016 Opsmate, Inc. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License, v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +// This software is distributed WITHOUT A WARRANTY OF ANY KIND. +// See the Mozilla Public License for details. +// +// From: https://github.com/SSLMate/certspotter/blob/master/identifiers.go +// Minor tweaks to get a SanitizeDNSName function for ASCII names only. +package sanitize + +import ( + "fmt" + "strings" +) + +// Try to canonicalize/sanitize the DNS name: +// +// 1. Trim leading and trailing whitespace +// 2. Trim trailing dots +// 3. Trim http:// and https:// prefix +// 4. Convert to lower case +// 5. Error if the DNS labels are not composed of ASCII characters 32-126 or "\t" +// +// Please note that the above is not necessarily a good filter for real CT +// monitoring (this is why we're not applying it in the collect stage). It is +// also not a good filter for getting rid of non-domain names like "funny str". +// It is however simple to understand ("printable ascii chars"), and should be +// good enough for the purpose of assembling a SANs data set from CT logs. +func SanitizeDNSName(value string) (string, error) { + value = trimHttpPrefixString(strings.ToLower(trimTrailingDots(strings.TrimSpace(value)))) + if !isASCIIString([]byte(value)) { + return "", fmt.Errorf("not an ascii string: %x", []byte(value)) + } + + labels := strings.Split(value, ".") + for _, label := range labels { + if !isSaneDNSLabel(label) { + return "", fmt.Errorf("process label %x", []byte(label)) + } + } + return strings.Join(labels, "."), nil +} + +func trimTrailingDots(value string) string { + length := len(value) + for length > 0 && value[length-1] == '.' { + length-- + } + return value[0:length] +} + +func trimHttpPrefixString(value string) string { + if strings.HasPrefix(value, "http://") { + return value[7:] + } else if strings.HasPrefix(value, "https://") { + return value[8:] + } else { + return value + } +} + +func isASCIIString(value []byte) bool { + for _, b := range value { + if b > 127 { + return false + } + } + return true +} + +func isSaneDNSLabel(label string) bool { + for _, ch := range label { + if !isSaneDNSLabelChar(ch) { + return false + } + } + return true +} + +func isSaneDNSLabelChar(ch rune) bool { + return ch == '\t' || (ch >= 32 && ch <= 126) +} -- cgit v1.2.3