Skip to content

Commit

Permalink
using go-flags to manage command line input.
Browse files Browse the repository at this point in the history
  • Loading branch information
arunsupe committed Aug 1, 2024
1 parent db1f8b3 commit 441fb51
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 43 deletions.
25 changes: 13 additions & 12 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Search for words similar to "death" in Hemingway's "The Old Man and the Sea" wit

```bash
curl -s 'https://gutenberg.ca/ebooks/hemingwaye-oldmanandthesea/hemingwaye-oldmanandthesea-00-t.txt' \
| w2vgrep -C 2 -n -threshold 0.55 death
| w2vgrep -C 2 -n --threshold=0.55 death
```

Output:
Expand Down Expand Up @@ -86,16 +86,17 @@ Basic usage:
If no file is specified, w2vgrep reads from standard input.

### Command-line Options

- `-model_path`: Path to the Word2Vec model file ('models/glove/glove.6B.300d.bin'). Overrides config file.
- `-threshold`: Similarity threshold for matching (default: 0.7)
- `-A`: Number of lines to display after a match
- `-B`: Number of lines to display before a match
- `-C`: Number of lines to display before and after a match
- `-n`: Print line numbers
- `-o`: Print only matching word
- `-l`: Print only matched lines
- `-i`: Case insensitive matching
```
-m, --model_path= Path to the Word2Vec model file. Overrides config file
-t, --threshold= Similarity threshold for matching (default: 0.7)
-A, --before-context= Number of lines before matching line
-B, --after-context= Number of lines after matching line
-C, --context= Number of lines before and after matching line
-n, --line-number Print line numbers
-i, --ignore-case Ignore case.
-o, --only-matching Output only matching words
-l, --only-lines Output only matched lines without similarity scores
```

## Configuration

Expand All @@ -119,7 +120,7 @@ curl -s 'https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.fr.300.vec.gz'

# use it like so:
# curl -s 'https://www.gutenberg.org/cache/epub/17989/pg17989.txt' \
# | w2vgrep -C 2 -n -threshold 0.55 \
# | w2vgrep -C 2 -n -t 0.55 \
# -model_path model_processing_utils/cc.fr.300.bin 'château'
```

Expand Down
10 changes: 8 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ module w2vgrep

go 1.22.5

require github.com/clipperhouse/uax29 v1.13.0
require (
github.com/clipperhouse/uax29 v1.13.0
github.com/jessevdk/go-flags v1.6.1
)

require golang.org/x/text v0.16.0 // indirect
require (
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
github.com/clipperhouse/uax29 v1.13.0 h1:5q58IRS9gBATd+NtnPXAmul5PLTGFeQ3lv0C51zhIEk=
github.com/clipperhouse/uax29 v1.13.0/go.mod h1:paNABhygWmmjkg0ROxKQoenJAX4dM9AS8biVkXmAK0c=
github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4=
github.com/jessevdk/go-flags v1.6.1/go.mod h1:Mk8T1hIAWpOiJiHa9rJASDK2UGWji0EuPGBnNLMooyc=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
72 changes: 43 additions & 29 deletions w2vgrep.go
Original file line number Diff line number Diff line change
@@ -1,44 +1,59 @@
package main

import (
"flag"
"fmt"
"os"

"w2vgrep/modules/config"
"w2vgrep/modules/model"
"w2vgrep/modules/processor"
"w2vgrep/modules/similarity"

"github.com/jessevdk/go-flags"
)

// Options defines the command-line options
type Options struct {
ModelPath string `short:"m" long:"model_path" description:"Path to the Word2Vec model file"`
SimilarityThreshold float64 `short:"t" long:"threshold" default:"0.7" description:"Similarity threshold for matching"`
ContextBefore int `short:"A" long:"before-context" description:"Number of lines before matching line"`
ContextAfter int `short:"B" long:"after-context" description:"Number of lines after matching line"`
ContextBoth int `short:"C" long:"context" description:"Number of lines before and after matching line"`
PrintLineNumbers bool `short:"n" long:"line-number" description:"Print line numbers"`
IgnoreCase bool `short:"i" long:"ignore-case" description:"Ignore case. Note: word2vec is case-sensitive. Ignoring case may lead to unexpected results"`
OutputOnlyMatching bool `short:"o" long:"only-matching" description:"Output only matching words"`
OutputOnlyLines bool `short:"l" long:"only-lines" description:"Output only matched lines without similarity scores"`
}

func main() {
modelPath := flag.String("model_path", "", "Path to the Word2Vec model file")
similarityThreshold := flag.Float64("threshold", 0.7, "Similarity threshold for matching")
contextBefore := flag.Int("A", 0, "Number of lines before matching line")
contextAfter := flag.Int("B", 0, "Number of lines after matching line")
contextBoth := flag.Int("C", 0, "Number of lines before and after matching line")
printLineNumbers := flag.Bool("n", false, "Print line numbers")
ignoreCase := flag.Bool("i", false, "Ignore case. Note: word2vec is case-sensitive. Ignoring case may lead to unexpected results")
outputOnlyMatching := flag.Bool("o", false, "Output only matching words")
outputOnlyLines := flag.Bool("l", false, "Output only matched lines without similarity scores")

flag.Parse()

if *contextBoth > 0 {
*contextBefore = *contextBoth
*contextAfter = *contextBoth
var opts Options
var parser = flags.NewParser(&opts, flags.Default)
parser.Usage = "[OPTIONS] QUERY [FILE]"

args, err := parser.Parse()
if err != nil {
if flagsErr, ok := err.(*flags.Error); ok && flagsErr.Type == flags.ErrHelp {
os.Exit(0)
} else {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
parser.WriteHelp(os.Stderr)
os.Exit(1)
}
}

args := flag.Args()
if len(args) < 1 {
fmt.Fprintln(os.Stderr, "Error: query is required")
flag.PrintDefaults()
parser.WriteHelp(os.Stderr)
os.Exit(1)
}

if opts.ContextBoth > 0 {
opts.ContextBefore = opts.ContextBoth
opts.ContextAfter = opts.ContextBoth
}

query := args[0]
var input *os.File
var err error

if len(args) > 1 {
input, err = os.Open(args[1])
Expand All @@ -61,29 +76,28 @@ func main() {
}
fmt.Fprintf(os.Stderr, "Using configuration file: %s\n", configPath)

if *modelPath == "" {
*modelPath = conf.ModelPath
if opts.ModelPath == "" {
opts.ModelPath = conf.ModelPath
}
}

if *modelPath == "" {
fmt.Fprintln(os.Stderr, "Error: Model path is required. Please provide it via config file or -model_path flag.")
flag.PrintDefaults()
if opts.ModelPath == "" {
fmt.Fprintln(os.Stderr, "Error: Model path is required. Please provide it via config file or -m/--model_path flag.")
parser.WriteHelp(os.Stderr)
os.Exit(1)
}

var w2vModel model.VectorModel
var similarityCache similarity.SimilarityCache

w2vModel, err = model.LoadVectorModel(*modelPath)
w2vModel, err = model.LoadVectorModel(opts.ModelPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Error loading full model: %v\n", err)
os.Exit(1)
}
similarityCache = similarity.NewSimilarityCache()

// Dereference the pointers when passing to ProcessLineByLine
processor.ProcessLineByLine(query, w2vModel, similarityCache, *similarityThreshold,
*contextBefore, *contextAfter, input, *printLineNumbers, *ignoreCase,
*outputOnlyMatching, *outputOnlyLines)
processor.ProcessLineByLine(query, w2vModel, similarityCache, opts.SimilarityThreshold,
opts.ContextBefore, opts.ContextAfter, input, opts.PrintLineNumbers, opts.IgnoreCase,
opts.OutputOnlyMatching, opts.OutputOnlyLines)
}

0 comments on commit 441fb51

Please sign in to comment.