From 2bcf8813d392641ea3dd2fb47b7df92bbb3ecdbb Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 11 Nov 2023 19:09:02 -0500 Subject: [PATCH] Add a `--files` option (#1426) * mlr --files * doc mods --- docs/src/keystroke-savers.md | 10 ++++++ docs/src/keystroke-savers.md.in | 10 ++++++ docs/src/manpage.md | 5 ++- docs/src/manpage.txt | 5 ++- docs/src/reference-main-flag-list.md | 1 + man/manpage.txt | 5 ++- man/mlr.1 | 3 ++ pkg/cli/option_parse.go | 47 ++++++++++++++++++++++++++++ 8 files changed, 83 insertions(+), 3 deletions(-) diff --git a/docs/src/keystroke-savers.md b/docs/src/keystroke-savers.md index 1cc2485a12..c62231709e 100644 --- a/docs/src/keystroke-savers.md +++ b/docs/src/keystroke-savers.md @@ -87,6 +87,16 @@ If there's more than one input file, you can use `--mfrom`, then however many fi mlr --c2p --mfrom data/*.csv -- sort -n index +Alternatively, you may place filenames within another file, one per line: + +
+cat data/filenames.txt
+
+ +
+mlr --c2p --files data/filenames.txt cat
+
+ ## Shortest flags for CSV, TSV, and JSON The following have even shorter versions: diff --git a/docs/src/keystroke-savers.md.in b/docs/src/keystroke-savers.md.in index b8cb2b3c50..720dfad148 100644 --- a/docs/src/keystroke-savers.md.in +++ b/docs/src/keystroke-savers.md.in @@ -32,6 +32,16 @@ GENMD-SHOW-COMMAND mlr --c2p --mfrom data/*.csv -- sort -n index GENMD-EOF +Alternatively, you may place filenames within another file, one per line: + +GENMD-SHOW-COMMAND +cat data/filenames.txt +GENMD-EOF + +GENMD-SHOW-COMMAND +mlr --c2p --files data/filenames.txt cat +GENMD-EOF + ## Shortest flags for CSV, TSV, and JSON The following have even shorter versions: diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 068cee8c55..3a60469192 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -513,6 +513,9 @@ MILLER(1) MILLER(1) large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. + --files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the @@ -3645,5 +3648,5 @@ MILLER(1) MILLER(1) - 2023-11-11 MILLER(1) + 2023-11-12 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 41dfa078cd..ef75697117 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -492,6 +492,9 @@ MILLER(1) MILLER(1) large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. + --files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the @@ -3624,4 +3627,4 @@ MILLER(1) MILLER(1) - 2023-11-11 MILLER(1) + 2023-11-12 MILLER(1) diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index 0a93e12e2d..f9ce597ff0 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -266,6 +266,7 @@ These are flags which don't fit into any other category. **Flags:** * `--fflush`: Force buffered output to be written after every output record. The default is flush output after every record if the output is to the terminal, or less often if the output is to a file or a pipe. The default is a significant performance optimization for large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. +* `--files {filename}`: Use this to specify a file which itself contains, one per line, names of input files. May be used more than once. * `--from {filename}`: Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the same as `mlr cat a.dat b.dat`. * `--hash-records`: This is an internal parameter which normally does not need to be modified. It controls the mechanism by which Miller accesses fields within records. In general --no-hash-records is faster, and is the default. For specific use-cases involving data having many fields, and many of them being processed during a given processing run, --hash-records might offer a slight performance benefit. * `--infer-int-as-float or -A`: Cast all integers in data files to floats. diff --git a/man/manpage.txt b/man/manpage.txt index 41dfa078cd..ef75697117 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -492,6 +492,9 @@ MILLER(1) MILLER(1) large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. + --files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the @@ -3624,4 +3627,4 @@ MILLER(1) MILLER(1) - 2023-11-11 MILLER(1) + 2023-11-12 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index 9a6c75d5ca..c61b2cb67e 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -611,6 +611,9 @@ These are flags which don't fit into any other category. large files. Use this flag to force frequent updates even when output is to a pipe or file, at a performance cost. +--files {filename} Use this to specify a file which itself contains, one + per line, names of input files. May be used more than + once. --from {filename} Use this to specify an input file before the verb(s), rather than after. May be used more than once. Example: `mlr --from a.dat --from b.dat cat` is the diff --git a/pkg/cli/option_parse.go b/pkg/cli/option_parse.go index 37ee1fcc3c..c9732b0255 100644 --- a/pkg/cli/option_parse.go +++ b/pkg/cli/option_parse.go @@ -7,7 +7,9 @@ package cli import ( + "bufio" "fmt" + "io" "os" "strings" @@ -2797,6 +2799,51 @@ var MiscFlagSection = FlagSection{ }, }, + { + name: "--files", + arg: "{filename}", + help: "Use this to specify a file which itself contains, one per line, names of input files. May be used more than once.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + CheckArgCount(args, *pargi, argc, 2) + + fileName := args[*pargi+1] + handle, err := os.Open(fileName) + if err != nil { + /// XXXX return false + fmt.Fprintln(os.Stderr, "mlr", err) + os.Exit(1) + } + defer handle.Close() + + lineReader := bufio.NewReader(handle) + + eof := false + lineno := 0 + for !eof { + line, err := lineReader.ReadString('\n') + if err == io.EOF { + err = nil + eof = true + break + } + lineno++ + + if err != nil { + fmt.Fprintln(os.Stderr, "mlr", err) + os.Exit(1) + } + + // This is how to do a chomp: + // TODO: handle \r\n with libified solution. + line = strings.TrimRight(line, "\n") + + options.FileNames = append(options.FileNames, line) + } + + *pargi += 2 + }, + }, + { name: "--ofmt", arg: "{format}",