Skip to content

Commit

Permalink
add file to url utility
Browse files Browse the repository at this point in the history
Signed-off-by: Pablo Chacin <[email protected]>
  • Loading branch information
pablochacin committed Oct 1, 2024
1 parent b500cff commit e832919
Show file tree
Hide file tree
Showing 2 changed files with 314 additions and 0 deletions.
138 changes: 138 additions & 0 deletions pkg/util/url.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Package util implements utility functions
package util

import (
"errors"
"fmt"
"net/url"
"path/filepath"
"runtime"
"strings"
)

// Adapted from https://go-review.googlesource.com/c/vuln/+/438175/7/internal/web/url.go

// URLFromFilePath converts the given absolute path to a URL.
func URLFromFilePath(path string) (*url.URL, error) {
if !filepath.IsAbs(path) {
return nil, fmt.Errorf("path is not absolute %q", path)
}

// If path has a Windows volume name, convert the volume to a host and prefix
// per https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.
if vol := filepath.VolumeName(path); vol != "" {
if strings.HasPrefix(vol, `\\`) {
path = filepath.ToSlash(path[2:])
i := strings.IndexByte(path, '/')

if i < 0 {
// A degenerate case.
// \\host.example.com (without a share name)
// becomes
// file://host.example.com/
return &url.URL{
Scheme: "file",
Host: path,
Path: "/",
}, nil
}

// \\host.example.com\Share\path\to\file
// becomes
// file://host.example.com/Share/path/to/file
return &url.URL{
Scheme: "file",
Host: path[:i],
Path: filepath.ToSlash(path[i:]),
}, nil
}

// C:\path\to\file
// becomes
// file:///C:/path/to/file
return &url.URL{
Scheme: "file",
Path: "/" + filepath.ToSlash(path),
}, nil
}

// /path/to/file
// becomes
// file:///path/to/file
return &url.URL{
Scheme: "file",
Path: filepath.ToSlash(path),
}, nil
}


// URLToFilePath converts a file-scheme url to a file path.
func URLToFilePath(u *url.URL) (string, error) {
if u.Scheme != "file" {
return "", errors.New("non-file URL")
}

checkAbs := func(path string) (string, error) {
if !filepath.IsAbs(path) {
return "", fmt.Errorf("path is not absolute %q", path)
}
return path, nil
}

if u.Path == "" {
if u.Host != "" || u.Opaque == "" {
return "", errors.New("file URL missing path")
}
return checkAbs(filepath.FromSlash(u.Opaque))
}

path, err := convertFileURLPath(u.Host, u.Path)
if err != nil {
return path, err
}
return checkAbs(path)
}

func convertFileURLPath(host, path string) (string, error) {
if runtime.GOOS == "windows" {
return convertFileURLPathWindows(host, path)
}
switch host {
case "", "localhost":
default:
return "", errors.New("file URL specifies non-local host")
}
return filepath.FromSlash(path), nil
}

func convertFileURLPathWindows(host, path string) (string, error) {
if len(path) == 0 || path[0] != '/' {
return "", fmt.Errorf("path is not absolute %q", path)
}

path = filepath.FromSlash(path)

// We interpret Windows file URLs per the description in
// https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/.

// The host part of a file URL (if any) is the UNC volume name,
// but RFC 8089 reserves the authority "localhost" for the local machine.
if host != "" && host != "localhost" {
// A common "legacy" format omits the leading slash before a drive letter,
// encoding the drive letter as the host instead of part of the path.
// (See https://blogs.msdn.microsoft.com/freeassociations/2005/05/19/the-bizarre-and-unhappy-story-of-file-urls/.)
// We do not support that format, but we should at least emit a more
// helpful error message for it.
if filepath.VolumeName(host) != "" {
return "", errors.New("file URL encodes volume in host field: too few slashes?")
}
return `\\` + host + path, nil
}

// If host is empty, path must contain an initial slash followed by a
// drive letter and path. Remove the slash and verify that the path is valid.
if vol := filepath.VolumeName(path[1:]); vol == "" || strings.HasPrefix(vol, `\\`) {
return "", errors.New("file URL missing drive letter")
}
return path[1:], nil
}
176 changes: 176 additions & 0 deletions pkg/util/url_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@

package util
import (
"net/url"
"runtime"
"testing"
)
func TestURLToFilePath(t *testing.T) {
for _, tc := range urlTests() {
if tc.url == "" {
continue
}
tc := tc
t.Run(tc.url, func(t *testing.T) {
u, err := url.Parse(tc.url)
if err != nil {
t.Fatalf("url.Parse(%q): %v", tc.url, err)
}
path, err := URLToFilePath(u)
if err != nil {
if err.Error() == tc.wantErr {
return
}
if tc.wantErr == "" {
t.Fatalf("urlToFilePath(%v): %v; want <nil>", u, err)
} else {
t.Fatalf("urlToFilePath(%v): %v; want %s", u, err, tc.wantErr)
}
}
if path != tc.filePath || tc.wantErr != "" {
t.Fatalf("urlToFilePath(%v) = %q, <nil>; want %q, %s", u, path, tc.filePath, tc.wantErr)
}
})
}
}
func TestURLFromFilePath(t *testing.T) {
for _, tc := range urlTests() {
if tc.filePath == "" {
continue
}
tc := tc
t.Run(tc.filePath, func(t *testing.T) {
u, err := URLFromFilePath(tc.filePath)
if err != nil {
if err.Error() == tc.wantErr {
return
}
if tc.wantErr == "" {
t.Fatalf("urlFromFilePath(%v): %v; want <nil>", tc.filePath, err)
} else {
t.Fatalf("urlFromFilePath(%v): %v; want %s", tc.filePath, err, tc.wantErr)
}
}
if tc.wantErr != "" {
t.Fatalf("urlFromFilePath(%v) = <nil>; want error: %s", tc.filePath, tc.wantErr)
}
wantURL := tc.url
if tc.canonicalURL != "" {
wantURL = tc.canonicalURL
}
if u.String() != wantURL {
t.Errorf("urlFromFilePath(%v) = %v; want %s", tc.filePath, u, wantURL)
}
})
}
}
func urlTests() []urlTest {
if runtime.GOOS == "windows" {
return urlTestsWindows
}
return urlTestsOthers
}
type urlTest struct {
url string
filePath string
canonicalURL string // If empty, assume equal to url.
wantErr string
}
var urlTestsOthers = []urlTest{
// Examples from RFC 8089:
{
url: `file:///path/to/file`,
filePath: `/path/to/file`,
},
{
url: `file:/path/to/file`,
filePath: `/path/to/file`,
canonicalURL: `file:///path/to/file`,
},
{
url: `file://localhost/path/to/file`,
filePath: `/path/to/file`,
canonicalURL: `file:///path/to/file`,
},
// We reject non-local files.
{
url: `file://host.example.com/path/to/file`,
wantErr: "file URL specifies non-local host",
},
}
var urlTestsWindows = []urlTest{
// Examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/:
{
url: `file://laptop/My%20Documents/FileSchemeURIs.doc`,
filePath: `\\laptop\My Documents\FileSchemeURIs.doc`,
},
{
url: `file:///C:/Documents%20and%20Settings/davris/FileSchemeURIs.doc`,
filePath: `C:\Documents and Settings\davris\FileSchemeURIs.doc`,
},
{
url: `file:///D:/Program%20Files/Viewer/startup.htm`,
filePath: `D:\Program Files\Viewer\startup.htm`,
},
{
url: `file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO`,
filePath: `C:\Program Files\Music\Web Sys\main.html`,
canonicalURL: `file:///C:/Program%20Files/Music/Web%20Sys/main.html`,
},
{
url: `file://applib/products/a-b/abc_9/4148.920a/media/start.swf`,
filePath: `\\applib\products\a-b\abc_9\4148.920a\media\start.swf`,
},
{
url: `file:////applib/products/a%2Db/abc%5F9/4148.920a/media/start.swf`,
wantErr: "file URL missing drive letter",
},
{
url: `C:\Program Files\Music\Web Sys\main.html?REQUEST=RADIO`,
wantErr: "non-file URL",
},
// The example "file://D:\Program Files\Viewer\startup.htm" errors out in
// url.Parse, so we substitute a slash-based path for testing instead.
{
url: `file://D:/Program Files/Viewer/startup.htm`,
wantErr: "file URL encodes volume in host field: too few slashes?",
},
// The blog post discourages the use of non-ASCII characters because they
// depend on the user's current codepage. However, when we are working with Go
// strings we assume UTF-8 encoding, and our url package refuses to encode
// URLs to non-ASCII strings.
{
url: `file:///C:/exampleㄓ.txt`,
filePath: `C:\exampleㄓ.txt`,
canonicalURL: `file:///C:/example%E3%84%93.txt`,
},
{
url: `file:///C:/example%E3%84%93.txt`,
filePath: `C:\exampleㄓ.txt`,
},
// Examples from RFC 8089:
// We allow the drive-letter variation from section E.2, because it is
// simpler to support than not to. However, we do not generate the shorter
// form in the reverse direction.
{
url: `file:c:/path/to/file`,
filePath: `c:\path\to\file`,
canonicalURL: `file:///c:/path/to/file`,
},
// We encode the UNC share name as the authority following section E.3.1,
// because that is what the Microsoft blog post explicitly recommends.
{
url: `file://host.example.com/Share/path/to/file.txt`,
filePath: `\\host.example.com\Share\path\to\file.txt`,
},
// We decline the four- and five-slash variations from section E.3.2.
// The paths in these URLs would change meaning under path.Clean.
{
url: `file:////host.example.com/path/to/file`,
wantErr: "file URL missing drive letter",
},
{
url: `file://///host.example.com/path/to/file`,
wantErr: "file URL missing drive letter",
},
}

0 comments on commit e832919

Please sign in to comment.