From f7628adee5986e5d418c7c4ca82ca5535d4e3d30 Mon Sep 17 00:00:00 2001 From: Andrea Luzzardi Date: Fri, 18 Mar 2022 12:29:44 -0700 Subject: [PATCH] telemetry: Normalize git URLs Normalize `https://github.com/dagger/dagger` and `git@github.com:dagger/dagger.git` to `github.com/dagger/dagger` Signed-off-by: Andrea Luzzardi --- telemetry/git.go | 65 ++++++++++++++++++++++++++++++++++++++++++ telemetry/git_test.go | 22 ++++++++++++++ telemetry/telemetry.go | 19 ++++++++---- 3 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 telemetry/git.go create mode 100644 telemetry/git_test.go diff --git a/telemetry/git.go b/telemetry/git.go new file mode 100644 index 00000000..1c0a8fc7 --- /dev/null +++ b/telemetry/git.go @@ -0,0 +1,65 @@ +package telemetry + +import ( + "fmt" + "net/url" + "regexp" + "strings" +) + +var ( + urlSchemeRegExp = regexp.MustCompile(`^[^:]+://`) + scpLikeURLRegExp = regexp.MustCompile(`^(?:(?P[^@]+)@)?(?P[^:\s]+):(?:(?P[0-9]{1,5})(?:\/|:))?(?P[^\\].*\/[^\\].*)$`) +) + +func parseGitURL(endpoint string) (string, error) { + if e, ok := parseSCPLike(endpoint); ok { + return e, nil + } + + return parseURL(endpoint) +} + +func parseURL(endpoint string) (string, error) { + u, err := url.Parse(endpoint) + if err != nil { + return "", err + } + + if !u.IsAbs() { + return "", fmt.Errorf( + "invalid endpoint: %s", endpoint, + ) + } + + return fmt.Sprintf("%s%s", u.Hostname(), u.Path), nil +} + +func parseSCPLike(endpoint string) (string, bool) { + if matchesURLScheme(endpoint) || !matchesScpLike(endpoint) { + return "", false + } + + _, host, _, path := findScpLikeComponents(endpoint) + + return fmt.Sprintf("%s/%s", host, strings.TrimSuffix(path, ".git")), true +} + +// matchesURLScheme returns true if the given string matches a URL-like +// format scheme. +func matchesURLScheme(url string) bool { + return urlSchemeRegExp.MatchString(url) +} + +// matchesScpLike returns true if the given string matches an SCP-like +// format scheme. +func matchesScpLike(url string) bool { + return scpLikeURLRegExp.MatchString(url) +} + +// findScpLikeComponents returns the user, host, port and path of the +// given SCP-like URL. +func findScpLikeComponents(url string) (user, host, port, path string) { + m := scpLikeURLRegExp.FindStringSubmatch(url) + return m[1], m[2], m[3], m[4] +} diff --git a/telemetry/git_test.go b/telemetry/git_test.go new file mode 100644 index 00000000..e38e82c9 --- /dev/null +++ b/telemetry/git_test.go @@ -0,0 +1,22 @@ +package telemetry + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestParseGit(t *testing.T) { + var ( + endpoint string + err error + ) + + endpoint, err = parseGitURL("https://github.com/dagger/dagger") + require.NoError(t, err) + require.Equal(t, endpoint, "github.com/dagger/dagger") + + endpoint, err = parseGitURL("git@github.com:dagger/dagger.git") + require.NoError(t, err) + require.Equal(t, endpoint, "github.com/dagger/dagger") +} diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 6177e4b1..547e212f 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -50,7 +50,7 @@ func Track(ctx context.Context, eventName string, properties ...*Property) { return } - repo := gitRepoURL(".") + repo := gitRepoURL(ctx, ".") // Base properties props := map[string]interface{}{ @@ -194,8 +194,8 @@ func hash(s string) string { return fmt.Sprintf("%x", sha256.Sum256([]byte(s))) } -// // gitRepoURL returns the git repository remote, if any. -func gitRepoURL(path string) string { +// gitRepoURL returns the git repository remote, if any. +func gitRepoURL(ctx context.Context, path string) string { repo, err := git.PlainOpenWithOptions(path, &git.PlainOpenOptions{ DetectDotGit: true, }) @@ -208,9 +208,16 @@ func gitRepoURL(path string) string { return "" } - if urls := origin.Config().URLs; len(urls) > 0 { - return urls[0] + urls := origin.Config().URLs + if len(urls) == 0 { + return "" } - return "" + endpoint, err := parseGitURL(urls[0]) + if err != nil { + log.Ctx(ctx).Debug().Err(err).Str("url", urls[0]).Msg("failed to parse git URL") + return "" + } + + return endpoint }