From 4a1b6faf26ede830e84b9903651ce673327b43ba Mon Sep 17 00:00:00 2001 From: Eduardo Cuducos <4732915+cuducos@users.noreply.github.com> Date: Tue, 27 Feb 2024 15:36:03 -0500 Subject: [PATCH] Adds mirror download option Closes #206 --- cmd/download.go | 5 ++++ docs/servidor.md | 5 ++++ download/download.go | 12 +++++++++ download/mirror.go | 60 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+) create mode 100644 download/mirror.go diff --git a/cmd/download.go b/cmd/download.go index 1c14b40f..5f898547 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -36,6 +36,7 @@ var ( chunkSize int64 skipExistingFiles bool restart bool + mirror string ) var downloadCmd = &cobra.Command{ @@ -50,6 +51,9 @@ var downloadCmd = &cobra.Command{ if err != nil { return err } + if mirror != "" { + return download.DownloadFromMirror(mirror, dir, dur, skipExistingFiles, restart, parallelDownloads, downloadRetries, chunkSize) + } return download.Download(dir, dur, skipExistingFiles, restart, parallelDownloads, downloadRetries, chunkSize) }, } @@ -93,6 +97,7 @@ func downloadCLI() *cobra.Command { downloadCmd.Flags().IntVarP(¶llelDownloads, "parallel", "p", download.DefaultMaxParallel, "maximum parallel downloads") downloadCmd.Flags().Int64VarP(&chunkSize, "chunk-size", "c", download.DefaultChunkSize, "max length of the bytes range for each HTTP request") downloadCmd.Flags().BoolVarP(&restart, "restart", "e", false, "restart all downloads from the beginning") + downloadCmd.Flags().StringVarP(&mirror, "mirror", "m", "", "download from the mirror, not from the original source (YYYY-MM-DD)") return downloadCmd } diff --git a/docs/servidor.md b/docs/servidor.md index 9fd1d61a..58ef4ee2 100644 --- a/docs/servidor.md +++ b/docs/servidor.md @@ -29,6 +29,10 @@ Caso o download falhe, é recomendado variar as configurações explicadas no `- Em último caso, é possível listar as URLs para download dos arquivos com comando `urls`; e, então, tentar fazer o download de outra forma (manualmente, com alguma ferramenta que permite recomeçar downloads interrompidos, etc.). +### Espelho dos dados + +O _Minha Receita_ mantém um [espelho dos dados em uma diretório compartilhado](https://mirror.minhareceita.org). Você pode fazer o download dos arquivos de lá (ao invés de utilisar o servidor oficial) com a opção `--mirror YYYY-MM-DD` substituindo a data por alguma das disponíveis no espelho. + ### Exemplos de uso Sem Docker: @@ -36,6 +40,7 @@ Sem Docker: ```console $ minha-receita download --urls-only $ minha-receita download --timeout 1h42m12s +$ minha-receita download --mirror 2022-12-17 ``` Com Docker: diff --git a/download/download.go b/download/download.go index 1971ac76..0a0e1527 100644 --- a/download/download.go +++ b/download/download.go @@ -75,6 +75,18 @@ func Download(dir string, timeout time.Duration, skip, restart bool, parallel in return nil } +// Download all the files from the project's mirror +func DownloadFromMirror(mirror string, dir string, timeout time.Duration, skip, restart bool, parallel int, retries uint, chunkSize int64) error { + urls, err := getMirrorURLs(mirror) + if err != nil { + return fmt.Errorf("error getting mirror urls: %w", err) + } + if err := download(dir, urls, parallel, retries, chunkSize, timeout, restart); err != nil { + return fmt.Errorf("error downloading files from the mirror: %w", err) + } + return nil +} + // URLs shows the URLs to be downloaded. func URLs(dir string, skip bool) error { urls := []string{federalRevenueURL, nationalTreasureBaseURL} diff --git a/download/mirror.go b/download/mirror.go new file mode 100644 index 00000000..afa713d5 --- /dev/null +++ b/download/mirror.go @@ -0,0 +1,60 @@ +package download + +import ( + "encoding/json" + "fmt" + "net/http" + "strings" +) + +const mirrorURL = "https://mirror.minhareceita.org" + +// TODO: do we need to export these structs? +type MirrorFile struct { + URL string `json:"url"` + Size uint `json:"size"` +} + +type MirrorGroup struct { + Name string `json:"name"` + URLs []MirrorFile `json:"urls"` +} + +type MirrorResponse struct { + Data []MirrorGroup `json:"data"` +} + +func getMirrorURLs(t string) ([]string, error) { + c := &http.Client{} + req, err := http.NewRequest("GET", mirrorURL, nil) + if err != nil { + return []string{}, fmt.Errorf("error creating request for mirror: %w", err) + } + req.Header.Set("Accept", "application/json") + r, err := c.Do(req) + if err != nil { + return []string{}, fmt.Errorf("error sending request to mirror: %w", err) + } + defer r.Body.Close() + var gs MirrorResponse + if err = json.NewDecoder(r.Body).Decode(&gs); err != nil { + return []string{}, fmt.Errorf("error decoding response body: %w", err) + } + var urls []string + var opts []string + for _, g := range gs.Data { + if g.Name == t { + for _, u := range g.URLs { + if u.Size > 0 { + urls = append(urls, u.URL) + } + } + break + } + opts = append(opts, g.Name) + } + if len(urls) == 0 { + return []string{}, fmt.Errorf("unknown mirror identifier `%s`, options are: %s", t, strings.Join(opts, ", ")) + } + return urls, nil +}