Skip to content

Commit

Permalink
Merge pull request #362 from systemli/Include-Fragment-to-ExtractURL
Browse files Browse the repository at this point in the history
  • Loading branch information
0x46616c6b authored Feb 5, 2025
2 parents 0a5dbc8 + 12fcec0 commit 3e998ad
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 30 deletions.
2 changes: 1 addition & 1 deletion internal/util/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ import "regexp"

// ExtractURLs extracts URLs from a text.
func ExtractURLs(text string) []string {
urlRegex := regexp.MustCompile(`(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)`)
urlRegex := regexp.MustCompile(`(https?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(),]|%[0-9a-fA-F][0-9a-fA-F]|#)+)`)
return urlRegex.FindAllString(text, -1)
}
66 changes: 37 additions & 29 deletions internal/util/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,41 @@ import (
)

func TestExtractURL(t *testing.T) {
text := "This is a text with a URL https://example.com"
urls := ExtractURLs(text)

assert.Equal(t, 1, len(urls))
assert.Equal(t, "https://example.com", urls[0])

text = "This is a text with a URL https://example.com and another URL http://example.org"
urls = ExtractURLs(text)

assert.Equal(t, 2, len(urls))
assert.Equal(t, "https://example.com", urls[0])
assert.Equal(t, "http://example.org", urls[1])

text = "This is a text without a URL"
urls = ExtractURLs(text)

assert.Equal(t, 0, len(urls))

text = "This is a text with a URL https://www.systemli.org/en/contact/"
urls = ExtractURLs(text)

assert.Equal(t, 1, len(urls))
assert.Equal(t, "https://www.systemli.org/en/contact/", urls[0])

text = "This is a text with a URL https://www.systemli.org/en/contact/?key=value"
urls = ExtractURLs(text)

assert.Equal(t, 1, len(urls))
assert.Equal(t, "https://www.systemli.org/en/contact/?key=value", urls[0])
testCases := []struct {
text string
expected []string
}{
{
"This is a text with a URL https://example.com",
[]string{"https://example.com"},
},
{
"This is a text with a URL https://example.com and another URL http://example.org",
[]string{"https://example.com", "http://example.org"},
},
{
"This is a text without a URL",
[]string{},
},
{
"This is a text with a URL https://www.systemli.org/en/contact/",
[]string{"https://www.systemli.org/en/contact/"},
},
{
"This is a text with a URL https://www.systemli.org/en/contact/?key=value",
[]string{"https://www.systemli.org/en/contact/?key=value"},
},
{
"This is a text with a URL https://www.systemli.org/en/contact/?key=value#fragment",
[]string{"https://www.systemli.org/en/contact/?key=value#fragment"},
},
}

for _, tc := range testCases {
urls := ExtractURLs(tc.text)
assert.Equal(t, len(tc.expected), len(urls))
for i, url := range tc.expected {
assert.Equal(t, url, urls[i])
}
}
}

0 comments on commit 3e998ad

Please sign in to comment.