-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgocomply.go
603 lines (496 loc) · 15.4 KB
/
gocomply.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
// Give open source Golang developers the credit they deserve, follow your
// legal obligations, and save time with `gocomply`.
//
// This little program scans the Go module in the current
// directory for all direct and indirect dependencies, and attempts to download
// and write all of their license files to stdout. Progress or warnings are
// written to stderr.
//
// See https://www.tawesoft.co.uk/gopkg/gocomply
//
package main
import (
"bytes"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"os/exec"
"os/user"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/jdxcode/netrc"
)
var divider = strings.Repeat("-", 80)
const httpTimeout = 10 * time.Second
// httpLicenseFiles to check, in order. For GitHub repos we have a more
// efficient way of detecting licenses. These are case sensitive if the remote
// server is case sensitive. This should be as small a list as possible.
var httpLicenseFiles = []string{
"NOTICE", // apache, must come first
"LICENSE",
"LICENSE.txt",
"LICENSE.md",
"COPYING",
"COPYING.txt",
"COPYING.md",
}
// repoLicensesFiles, in order of precedence for checking in a remote
// repository. Unlike the httpLicenseFiles, we can check this case
// insensitively.
//
// This sorting is informed by the go-license-detector dataset.zip:
// `find | xargs -L1 -I{} basename "{}" | sort | uniq -c > all.txt`
// and https://pkg.go.dev/license-policy - but we want the actual copyright
// notice and to exclude anything that's just a full copy of the GPL verbatim.
//
var repoLicenseFiles = []string{
"NOTICE", // apache, must come first
"NOTICE.txt", // apache, rarely
"LICENSE",
"LICENSE.txt",
"LICENSE.md",
"LICENSE.markdown",
"LICENSE.rst",
"LICENCE", // uncommon
"LICENCE.txt", // uncommon
"LICENCE.md", // uncommon
"LICENCE.markdown", // uncommon
"LICENCE.rst", // uncommon
"COPYING",
"COPYING.txt",
"COPYRIGHT",
"COPYRIGHT.txt",
"MIT-LICENSE",
"MIT-LICENSE.txt",
"MIT-LICENCE", // uncommon
"MIT-LICENCE.txt", // uncommon
}
type BasicAuth struct {
Username string
Token string
}
var githubAuth = &BasicAuth{}
func (a BasicAuth) IsSet() bool {
return a.Username != "" && a.Token != ""
}
func httpGet(rsc string, auth *BasicAuth) (string, error) {
out := &bytes.Buffer{}
client := http.Client{
Timeout: httpTimeout,
}
req, err := http.NewRequest("GET", rsc, nil)
if err != nil {
return "", err
}
if (auth != nil) && auth.IsSet() {
req.SetBasicAuth(
url.QueryEscape(auth.Username),
url.QueryEscape(auth.Token),
)
}
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return "", fmt.Errorf("http status code %d when downloading %q", resp.StatusCode, rsc)
}
_, err = io.Copy(out, resp.Body)
if err != nil {
return "", err
}
return out.String(), nil
}
type GoImport struct {
ImportPrefix string
Vcs string
RepoRoot string
}
type GoSource struct {
ImportPrefix string
Home string
Directory string
File string
}
// parsing HTML with regex is wrong, but this works well enough to do it anyway
var regexpGoImport = []*regexp.Regexp{
regexp.MustCompile(`(?i)<\s*meta\s*name\s*=\s*"go-import"\s*content\s*=\s*"(?P<import_prefix>\S+)\s+(?P<vcs>\S+)\s+(?P<repo_root>\S+)"\s*/?>`),
// source hut has the arguments the other way round
regexp.MustCompile(`(?i)<\s*meta\s*content\s*=\s*"(?P<import_prefix>\S+)\s+(?P<vcs>\S+)\s+(?P<repo_root>\S+)"\s*name\s*=\s*"go-import"\s*/?>`),
}
func parseGoImport(data string) (GoImport, bool) {
for _, r := range regexpGoImport {
if !r.MatchString(data) {
continue
}
matches := r.FindStringSubmatch(data)
return GoImport{
ImportPrefix: matches[r.SubexpIndex("import_prefix")],
Vcs: matches[r.SubexpIndex("vcs")],
RepoRoot: matches[r.SubexpIndex("repo_root")],
}, true
}
return GoImport{}, false
}
var regexpGoSource = regexp.MustCompile(`(?i)<\s*meta\s*name\s*="go-source"\s*content\s*=\s*"(?P<import_prefix>\S+) (?P<home>\S+) (?P<directory>\S+) (?P<file>\S+)"\s*/?>`)
func parseGoSource(data string) (GoSource, bool) {
r := regexpGoSource
if !r.MatchString(data) {
return GoSource{}, false
}
matches := r.FindStringSubmatch(data)
return GoSource{
ImportPrefix: matches[r.SubexpIndex("import_prefix")],
Home: matches[r.SubexpIndex("home")],
Directory: matches[r.SubexpIndex("directory")],
File: matches[r.SubexpIndex("file")],
}, true
}
func listModules() ([]string, error) {
stdout, err := exec.Command("go", "list", "-m", "all").Output()
if err != nil {
return nil, fmt.Errorf("go list error: %+v: %s", err, err.(*exec.ExitError).Stderr)
}
stdout = bytes.TrimSpace(stdout)
lines := bytes.Split(stdout, []byte{'\n'})
if len(lines) < 1 {
return nil, fmt.Errorf("empty go list output")
}
// discard first line
lines = lines[1:]
names := make([]string, 0)
for _, line := range lines {
// e.g. golang.org/x/text v0.3.3
words := bytes.SplitN(line, []byte{' '}, 2)
if len(words) != 2 {
return nil, fmt.Errorf("invalid go list output format (line %q)", line)
}
name := string(words[0])
required, err := isRequiredModule(name)
if err != nil { return nil, err }
if !required { continue }
names = append(names, name)
}
return names, nil
}
func isRequiredModule(name string) (bool, error) {
// "download is split into two parts: downloading the go.mod and
// downloading the actual code. If you have dependencies only needed for
// tests, then they will show up in your go.mod, and go get will download
// their go.mods, but it will not download their code."
//
// "This applies not just to test-only dependencies but also os-specific
// dependencies."
//
// -- https://github.com/golang/go/issues/26913#issuecomment-411976222
//
// "The -vendor flag causes why to exclude tests of dependencies.
//
// "If the package or module is not
// referenced from the main module, the stanza will display a single
// parenthesized note indicating that fact."
stdout, err := exec.Command("go", "mod", "why", "-m", "-vendor", name).Output()
if err != nil {
return false, fmt.Errorf("go why error: %+v: %s", err, err.(*exec.ExitError).Stderr)
}
lines := bytes.Split(stdout, []byte{'\n'})
if len(lines) < 2 {
return false, fmt.Errorf("unexpected go why output format")
}
// "# golang.org/x/text/encoding"
if !bytes.Equal(bytes.TrimSpace(lines[0]), []byte("# " + name)) {
return false, fmt.Errorf("unexpected go why output format")
}
// "(main module does not need package golang.org/x/text/encoding)"
line := bytes.TrimSpace(lines[1])
if (len(line) > 2) && line[0] == '(' && line[len(line)-1] == ')' {
return false, nil
}
// any other result means its used
return true, nil
}
func stringDecoderIdentity(str string) (string, error) {
return str, nil
}
func stringDecoderBase64(str string) (string, error) {
bytes, err := base64.StdEncoding.DecodeString(str)
if err != nil {
return "", err
}
return string(bytes), nil
}
func resolveFileURL(gi GoImport, gs GoSource, file string) ([]string, func(string) (string, error), error) {
vcs := gi.Vcs
repoRoot := gi.RepoRoot
if vcs != "git" {
return nil, nil, fmt.Errorf("vcs %q not implemented", vcs)
}
if strings.HasPrefix(repoRoot, "https://go.googlesource.com/") {
return []string{fmt.Sprintf("%s/+/refs/heads/master/%s?format=text", repoRoot, file)},
stringDecoderBase64, nil
}
if strings.HasPrefix(repoRoot, "https://git.sr.ht/") {
dir := strings.TrimSuffix(repoRoot, ".git")
return []string{fmt.Sprintf("%s/blob/master/%s", dir, file)},
stringDecoderIdentity, nil
}
if strings.HasPrefix(repoRoot, "https://gopkg.in/") {
// Find correct branch including minor version.
// The go-source meta tag for gopkg.in is the simplest place where
// this info is exposed over HTTP, to avoid speaking git protocol.
// e.g. gs.Directory
// https://github.com/natefinch/lumberjack/tree/v2.1{/dir}
user, repo, branch, ok := func() (user string, repo string, branch string, ok bool) {
dir := strings.TrimPrefix(gs.Directory, "https://github.com/")
parts := strings.SplitN(dir, "/", 4)
if len(parts) != 4 {
ok = false
return
}
user = parts[0]
repo = parts[1]
rest := parts[3]
idx := strings.IndexByte(rest, '{')
if idx < 0 {
ok = false
return
}
branch = rest[0:idx]
ok = true
return
}()
if !ok {
return nil, nil, fmt.Errorf("gopkg.in parse error")
}
return []string{
fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", user, repo, branch, file),
},
stringDecoderIdentity, nil
}
if strings.HasPrefix(repoRoot, "https://github.com/") {
dir := strings.TrimPrefix(repoRoot, "https://github.com/")
dir = strings.TrimSuffix(dir, ".git")
return []string{
fmt.Sprintf("https://raw.githubusercontent.com/%s/main/%s", dir, file),
fmt.Sprintf("https://raw.githubusercontent.com/%s/master/%s", dir, file), // historical
},
stringDecoderIdentity, nil
}
if strings.HasPrefix(repoRoot, "https://gitlab.com/") {
dir := strings.TrimSuffix(repoRoot, ".git")
return []string{
fmt.Sprintf("%s/-/raw/main/%s", dir, file),
fmt.Sprintf("%s/-/raw/master/%s", dir, file), // historical
},
stringDecoderIdentity, nil
}
return nil, nil, fmt.Errorf("repo %q not supported (please open an issue)", repoRoot)
}
func getLicense(module string, gi GoImport, gs GoSource) (string, error) {
// try API
if gi.Vcs == "git" && strings.HasPrefix(gi.RepoRoot, "https://github.com/") && githubAuth.IsSet() {
// TODO check rate limits
license, missing, err := func() (string, bool, error) {
// rate limit is 5000 hour once authenticated - as low as 50/hour when anonymous!
// TODO we could reduce this timeout when rate is high
time.Sleep(2 * 1230 * time.Millisecond)
// TODO if we refactor resolveFileURL to make it more general purpose
// then this could work for gopkg.in too
// TODO make this a method on gi to stop repeating this
dir := strings.TrimPrefix(gi.RepoRoot, "https://github.com/")
dir = strings.TrimSuffix(dir, ".git")
data, err := httpGet(fmt.Sprintf("https://api.github.com/repos/%s/git/trees/HEAD", dir), githubAuth)
if err != nil {
return "", false, fmt.Errorf("trouble getting listing for %s: %v", gi.RepoRoot, err)
}
type APITree struct {
Path string
Type string // we want "blob"
Url string
}
type APIResponse struct {
Tree []APITree
}
type APIBlob struct {
Content string
Encoding string
}
var response APIResponse
err = json.Unmarshal([]byte(data), &response)
if err != nil {
return "", false, fmt.Errorf("json decode error: %v", err)
}
for _, t := range response.Tree {
if t.Type != "blob" { continue }
for _, name := range repoLicenseFiles {
if !strings.EqualFold(t.Path, name) { continue }
data, err := httpGet(t.Url, githubAuth)
if err != nil {
return "", false, fmt.Errorf("trouble getting blob for %s: %v", gi.RepoRoot, err)
}
var blob APIBlob
err = json.Unmarshal([]byte(data), &blob)
if err != nil {
return "", false, fmt.Errorf("json decode error: %v", err)
}
if strings.EqualFold(blob.Encoding, "utf-8") {
return strings.TrimSpace(blob.Content), false, nil
} else if strings.EqualFold(blob.Encoding, "base64") {
raw, err := base64.StdEncoding.DecodeString(blob.Content)
if err != nil {
return "", false, fmt.Errorf("base64 decode error: %v", err)
}
return strings.TrimSpace(string(raw)), false, nil
} else {
return "", false, fmt.Errorf("unknown encoding type %q", blob.Encoding)
}
}
}
return "", true, fmt.Errorf("no license found")
}()
if err == nil {
return license, nil
} else {
err = fmt.Errorf("api.github.com error: %s", err)
if missing {
return "", err
} else {
fmt.Fprintf(os.Stderr, "%s\n", err)
// proceed to fallback
}
}
}
return tryGetLicense(module, gi, gs, httpLicenseFiles)
}
func tryGetLicense(module string, gi GoImport, gs GoSource, files []string) (string, error) {
for _, license := range files {
// be a good citizen
time.Sleep(1 * time.Second)
licenseUrls, decoder, err := resolveFileURL(gi, gs, license)
if err != nil {
return "", fmt.Errorf("no known license URL for module %q: %v", module, err)
}
for _, licenseUrl := range licenseUrls {
data, err := httpGet(licenseUrl, nil)
if err != nil {
continue
}
data, err = decoder(data)
if err != nil {
return "", fmt.Errorf("error decoding %q: %v", licenseUrl, err)
}
return strings.TrimSpace(data), nil
}
}
return "", fmt.Errorf("no license found for module %q", module)
}
func lookup(module string) (gi GoImport, gs GoSource, err error) {
var data string
var ok bool
data, err = httpGet(fmt.Sprintf("https://%s?go-get=1", module), nil)
if err != nil {
// Attempt module root, for example:
// https://github.com/go-gl/glfw/v3.3/glfw -> https://github.com/go-gl/glfw
// https://github.com/russross/blackfriday/v2 -> https://github.com/russross/blackfriday
parts := strings.Split(module, "/")
if len(parts) > 3 {
moduleroot := strings.Join(parts[:3], "/")
data, err = httpGet(fmt.Sprintf("https://%s?go-get=1", moduleroot), nil)
}
if err != nil {
// Assume its a private repo
// TODO should check this against go env GOPRIVATE
// and should do that before attempting module root
gi = GoImport{
ImportPrefix: module,
Vcs: "git",
RepoRoot: fmt.Sprintf("https://%s.git", module),
}
return gi, gs, nil
}
}
gi, ok = parseGoImport(data)
if !ok {
err = fmt.Errorf("unrecognised import %q (no go-import meta tags)", module)
return
}
gs, _ = parseGoSource(data)
return gi, gs, nil
}
func parseNetrc() error {
usr, err := user.Current()
if err != nil {
return fmt.Errorf("user lookup error: %v", err)
}
netrcPath := os.Getenv("NETRC")
if netrcPath == "" {
netrcPath = filepath.Join(usr.HomeDir, ".netrc")
}
n, err := netrc.Parse(netrcPath)
if err != nil {
if errors.Is(err, os.ErrNotExist) { return nil }
return fmt.Errorf(".netrc parse error: %v", err)
}
github := n.Machine("github.com")
if github != nil {
githubAuth = &BasicAuth{
Username: github.Get("login"),
Token: github.Get("password"),
}
}
return nil
}
func main() {
parseNetrc()
if githubAuth == nil || !githubAuth.IsSet() {
fmt.Fprintf(os.Stderr, "warning: no credentials set for GitHub API\n -- gocomply may be slower and less accurate\n")
}
err := func() error {
var modules []string
if len(os.Args) > 1 {
modules = os.Args[1:]
} else {
var err error
modules, err = listModules()
if err != nil {
return err
}
}
// the standard library
modules = append(modules, "github.com/golang/go")
for _, module := range modules {
fmt.Fprintf(os.Stderr, "> %s\n", module)
// future-proof - might take arguments in future
if strings.HasPrefix(module, "-") {
return fmt.Errorf("unrecognised argument %q", module)
}
// "golang.org is a known non-module"
// if strings.HasPrefix(module, "golang.org") {
// continue
// }
gi, gs, err := lookup(module)
if err != nil {
fmt.Fprintf(os.Stderr, "unable to lookup module %q: %v\n", module, err)
continue
}
license, err := getLicense(module, gi, gs)
if err != nil {
fmt.Fprintf(os.Stderr, "unable to find a license for module %q: %v\n", module, err)
continue
}
fmt.Printf("%s\n\n%s\n\n%s\n\n", module, license, divider)
}
return nil
}()
if err != nil {
panic(fmt.Sprintf("error: %v", err))
}
}