Skip to content

Commit

Permalink
Merge pull request #2 from conneroisu/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
conneroisu authored Jun 7, 2024
2 parents dedf943 + 6d408d8 commit 3b98754
Show file tree
Hide file tree
Showing 45 changed files with 1,147 additions and 335 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@ jobs:
go install honnef.co/go/tools/cmd/staticcheck@latest
staticcheck ./...
go vet ./...
- name: revive
run: |
go install github.com/mgechev/revive@latest
revive -config .revive.toml ./...
File renamed without changes.
24 changes: 0 additions & 24 deletions .testcoverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,3 @@ threshold:
# (optional; default 0)
# The minimum total coverage project should have
total: 0

# Holds regexp rules which will override thresholds for matched files or packages
# using their paths.
#
# First rule from this list that matches file or package is going to apply
# new threshold to it. If project has multiple rules that match same path,
# override rules should be listed in order from specific to more general rules.
override:
# Increase coverage threshold to 100% for `foo` package
# (default is 80, as configured above in this example)
- threshold: 100
path: ^pkg/lib/foo$

# Holds regexp rules which will exclude matched files or packages
# from coverage statistics
exclude:
# Exclude files or packages matching their paths
paths:
- \.pb\.go$ # excludes all protobuf generated files
- ^pkg/bar # exclude package `pkg/bar`

# NOTES:
# - symbol `/` in all path regexps will be replaced by current OS file path separator
# to properly work on Windows
2 changes: 0 additions & 2 deletions cmd/cmds/docs.go

This file was deleted.

26 changes: 0 additions & 26 deletions cmd/cmds/gen.go

This file was deleted.

2 changes: 0 additions & 2 deletions cmd/doc.go

This file was deleted.

33 changes: 0 additions & 33 deletions cmd/root.go

This file was deleted.

10 changes: 0 additions & 10 deletions cmd/routes.go

This file was deleted.

14 changes: 11 additions & 3 deletions decoder.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package seltabl

import "io"
import (
"fmt"
"io"
)

// Decoder is a struct for decoding a reader into a slice of structs.
//
Expand Down Expand Up @@ -72,7 +75,12 @@ func NewDecoder[T any](r io.ReadCloser) *Decoder[T] {
// This allows for decoding a reader into a slice of structs.
//
// Similar to the json.Decoder for brevity.
func (d *Decoder[T]) Decode(value *T) ([]T, error) {
func (d *Decoder[T]) Decode() ([]T, error) {
defer d.reader.Close()
return NewFromReader[T](d.reader)
var result []T
result, err := NewFromReader[T](d.reader)
if err != nil {
return nil, fmt.Errorf("failed to decode: %w", err)
}
return result, nil
}
138 changes: 138 additions & 0 deletions decoder_test.go
Original file line number Diff line number Diff line change
@@ -1 +1,139 @@
package seltabl

import (
"io"
"strings"
"testing"
)

// DecodeExStruct is a test struct
type DecodeExStruct struct {
A string `json:"a" seltabl:"a" hSel:"tr:nth-child(1) td:nth-child(1)" dSel:"tr td:nth-child(1)" cSel:"$text"`
B string `json:"b" seltabl:"b" hSel:"tr:nth-child(1) td:nth-child(2)" dSel:"tr td:nth-child(2)" cSel:"$text"`
}

// TestDecoder_Decode tests the Decoder.Decode function
func TestDecoder_Decode(t *testing.T) {
testCases := []struct {
name string
input string
expected []DecodeExStruct
hasError bool
}{
{
name: "Valid input",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
<td>2</td>
</tr>
<tr>
<td>3</td>
<td>4</td>
</tr>
</table>
`,
expected: []DecodeExStruct{
{A: "1", B: "2"},
{A: "3", B: "4"},
},
hasError: false,
},
{
name: "Invalid input",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
{
name: "Invalid input with invalid html",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
{
name: "Invalid input with invalid json",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
{
name: "Invalid input with invalid json",
input: `
<table>
<tr>
<td>a</td>
<td>b</td>
<td>1</td>
</tr>
</table>
`,
expected: nil,
hasError: true,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
r := io.NopCloser(strings.NewReader(tc.input))
decoder := NewDecoder[DecodeExStruct](r)
result, err := decoder.Decode()

if tc.hasError {
if err == nil {
t.Errorf("Expected an error, but got none")
}
return
}

if err != nil {
t.Errorf("Unexpected error: %v", err)
}

if len(result) != len(tc.expected) {
t.Errorf("Expected %d results, but got %d", len(tc.expected), len(result))
}

for i, expected := range tc.expected {
if result[i].A != expected.A || result[i].B != expected.B {
t.Errorf("Expected %+v, but got %+v", expected, result[i])
}
}
})
}
}
7 changes: 0 additions & 7 deletions examples/example2/example2.go

This file was deleted.

7 changes: 0 additions & 7 deletions examples/example3/example3.go

This file was deleted.

7 changes: 0 additions & 7 deletions examples/example4/example4.go

This file was deleted.

7 changes: 0 additions & 7 deletions examples/example5/example5.go

This file was deleted.

22 changes: 22 additions & 0 deletions examples/huggingface-leader-board/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Package main shows how to use the seltabl package to scrape a table from a given url.

Check failure on line 1 in examples/huggingface-leader-board/main.go

View workflow job for this annotation

GitHub Actions / Update coverage badge

File test coverage below threshold

File test coverage below threshold: coverage: 40%; threshold: 80%
// The table used in this example is from the huggingface llm leader board.
package main

import (
"fmt"
"os"
)

// main scrapes from: https://huggingface.co/spaces/HuggingFaceH4/LLM-Leaderboard
func main() {
if err := run(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}

// run runs the example
func run() error {
fmt.Println("Hello, World from llm leader board!")
return nil
}
13 changes: 13 additions & 0 deletions examples/huggingface-leader-board/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main

import (
"testing"

"github.com/stretchr/testify/assert"
)

// TestRun tests the run function
func TestRun(t *testing.T) {
err := run()
assert.Nil(t, err)
}
3 changes: 3 additions & 0 deletions examples/ncaa/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package main is the an example of how to use the seltabl package.
// for the seltabl package
package main
File renamed without changes.
24 changes: 24 additions & 0 deletions examples/penguins-wikipedia/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Package main is the an example of how to use the seltabl package.

Check failure on line 1 in examples/penguins-wikipedia/main.go

View workflow job for this annotation

GitHub Actions / Update coverage badge

File test coverage below threshold

File test coverage below threshold: coverage: 40%; threshold: 80%
// for the seltabl package to scrape a html table from a given url.
// The table used in this example is from the wikipedia page for
// penguins.
package main

import (
"fmt"
"os"
)

// main scrapes from: https://en.wikipedia.org/wiki/List_of_penguins
func main() {
if err := run(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}

// run runs the example
func run() error {
fmt.Println("Hello, World from list of penguins!")
return nil
}
13 changes: 13 additions & 0 deletions examples/penguins-wikipedia/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main

import (
"testing"

"github.com/stretchr/testify/assert"
)

// TestRun tests the run function
func TestRun(t *testing.T) {
err := run()
assert.Nil(t, err)
}
Loading

0 comments on commit 3b98754

Please sign in to comment.