Skip to content

Commit

Permalink
snappr, cmd/snappr: Stricter timezone handling
Browse files Browse the repository at this point in the history
Don't allow mixed timezones; it's almost always a mistake or oversight,
and it results in confusing (although technically correct) behaviour.

Also test timezone handling more throughly.
  • Loading branch information
pgaskin committed Nov 16, 2023
1 parent b0101d3 commit c50451a
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 41 deletions.
31 changes: 17 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Can extract dates in arbitrary formats from arbitrary parts of a line, preserving the entire line, and ignoring or passing-through unmatched or invalid lines.

- **Zone-aware timestamp handling.** \
Will work correctly with mixed timezones, sorting by the real time, but using the calendar day/month/year from the zoned time.
Will work correctly with different timezones, sorting and determining secondly snapshots by the real time, but using the calendar day/month/year from the zoned time.

- **Verbose debugging information.** \
You can view which intervals caused a specific snapshot to be retained, and whether a retention policy wants more snapshots than it found.
Expand Down Expand Up @@ -61,19 +61,20 @@ $ btrfs subvol list -r /mnt/bkp/ |
#### CLI Usage

```
usage: snappr [options] policy...
usage: /tmp/go-build2822248938/b001/exe/snappr [options] policy...
options:
-E, --extended-regexp use full regexp syntax rather than POSIX (see pkg.go.dev/regexp/syntax)
-e, --extract string extract the timestamp from each input line using the provided regexp, which must contain up to one capture group
-h, --help show this help text
-v, --invert output the snapshots to keep instead of the ones to prune
-L, --local-time use the default timezone rather than UTC if no timezone is parsed from the timestamp
-o, --only only print the part of the line matching the regexp
-p, --parse string parse the timestamp using the specified Go time format (see pkg.go.dev/time#pkg-constants and the examples below) rather than a unix timestamp
-q, --quiet do not show warnings about invalid or unmatched input lines
-s, --summarize summarize retention policy results to stderr
-w, --why explain why each snapshot is being kept to stderr
-E, --extended-regexp use full regexp syntax rather than POSIX (see pkg.go.dev/regexp/syntax)
-e, --extract string extract the timestamp from each input line using the provided regexp, which must contain up to one capture group
-h, --help show this help text
-v, --invert output the snapshots to keep instead of the ones to prune
-o, --only only print the part of the line matching the regexp
-p, --parse string parse the timestamp using the specified Go time format (see pkg.go.dev/time#pkg-constants and the examples below) rather than a unix timestamp
-Z, --parse-timezone tz use a specific timezone rather than whatever is set for --timezone if no timezone is parsed from the timestamp itself
-q, --quiet do not show warnings about invalid or unmatched input lines
-s, --summarize summarize retention policy results to stderr
-z, --timezone tz convert all timestamps to this timezone while pruning snapshots (use "local" for the default system timezone) (default UTC)
-w, --why explain why each snapshot is being kept to stderr
time format examples:
- Mon Jan 02 15:04:05 2006
Expand All @@ -88,7 +89,7 @@ policy: N@unit:X
- there may only be one N specified for each unit:X pair
unit:
last snapshot count
last snapshot count (X must be 1)
secondly clock seconds (can also use the format #h#m#s, omitting any zeroed units)
daily calendar days
monthly calendar months
Expand All @@ -99,7 +100,9 @@ notes:
- input is read from stdin, and should consist of unix timestamps (or more if --extract and/or --parse are set)
- invalid/unmatched input lines are ignored, or passed through if --invert is set (and a warning is printed unless --quiet is set)
- everything will still work correctly even if timezones are different
- snapshots are ordered by their UTC time
- snapshots are always ordered by their real (i.e., UTC) time
- if using --parse-in, beware of duplicate timestamps at DST transitions (if the offset isn't included whatever you use as the
snapshot name, and your timezone has DST, you may end up with two snapshots for different times with the same name.
- timezones will only affect the exact point at which calendar days/months/years are split
```

Expand Down
66 changes: 55 additions & 11 deletions cmd/snappr/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,53 @@ var (
Extended = pflag.BoolP("extended-regexp", "E", false, "use full regexp syntax rather than POSIX (see pkg.go.dev/regexp/syntax)")
Only = pflag.BoolP("only", "o", false, "only print the part of the line matching the regexp")
Parse = pflag.StringP("parse", "p", "", "parse the timestamp using the specified Go time format (see pkg.go.dev/time#pkg-constants and the examples below) rather than a unix timestamp")
Local = pflag.BoolP("local-time", "L", false, "use the default timezone rather than UTC if no timezone is parsed from the timestamp")
ParseIn = pflag_TimezoneP("parse-timezone", "Z", nil, "use a specific timezone rather than whatever is set for --timezone if no timezone is parsed from the timestamp itself")
In = pflag_TimezoneP("timezone", "z", time.UTC, "convert all timestamps to this timezone while pruning snapshots (use \"local\" for the default system timezone)")
Invert = pflag.BoolP("invert", "v", false, "output the snapshots to keep instead of the ones to prune")
Why = pflag.BoolP("why", "w", false, "explain why each snapshot is being kept to stderr")
Summarize = pflag.BoolP("summarize", "s", false, "summarize retention policy results to stderr")
Help = pflag.BoolP("help", "h", false, "show this help text")
)

type timezoneFlag struct {
loc *time.Location
}

func pflag_TimezoneP(name, shorthand string, value *time.Location, usage string) **time.Location {
f := &timezoneFlag{value}
pflag.VarP(f, name, shorthand, usage)
return &f.loc
}

func (t *timezoneFlag) Type() string {
return "tz"
}

func (t *timezoneFlag) String() string {
if t.loc == nil {
return ""
}
return t.loc.String()
}

func (t *timezoneFlag) Set(s string) error {
switch string(s) {
case "":
t.loc = nil
case "UTC", "utc":
t.loc = time.UTC
case "Local", "local":
t.loc = time.Local
default:
loc, err := time.LoadLocation(s)
if err != nil {
return err
}
t.loc = loc
}
return nil
}

func main() {
pflag.Parse()

Expand Down Expand Up @@ -55,7 +95,9 @@ func main() {
fmt.Printf(" - input is read from stdin, and should consist of unix timestamps (or more if --extract and/or --parse are set)\n")
fmt.Printf(" - invalid/unmatched input lines are ignored, or passed through if --invert is set (and a warning is printed unless --quiet is set)\n")
fmt.Printf(" - everything will still work correctly even if timezones are different\n")
fmt.Printf(" - snapshots are ordered by their UTC time\n")
fmt.Printf(" - snapshots are always ordered by their real (i.e., UTC) time\n")
fmt.Printf(" - if using --parse-in, beware of duplicate timestamps at DST transitions (if the offset isn't included whatever you use as the\n")
fmt.Printf(" snapshot name, and your timezone has DST, you may end up with two snapshots for different times with the same name.\n")
fmt.Printf(" - timezones will only affect the exact point at which calendar days/months/years are split\n")
if *Help {
os.Exit(0)
Expand All @@ -64,6 +106,15 @@ func main() {
}
}

if *In == nil {
fmt.Fprintf(os.Stderr, "snappr: fatal: timezone must not be empty\n")
os.Exit(2)
}

if *ParseIn == nil {
*ParseIn = *In
}

policy, err := snappr.ParsePolicy(pflag.Args()...)
if err != nil {
fmt.Fprintf(os.Stderr, "snappr: fatal: invalid policy: %v\n", err)
Expand All @@ -87,14 +138,7 @@ func main() {
}
}

var tz *time.Location
if *Local {
tz = time.Local
} else {
tz = time.UTC
}

times, lines, err := scan(os.Stdin, extract, tz, *Parse, *Quiet, *Only)
times, lines, err := scan(os.Stdin, extract, *ParseIn, *Parse, *Quiet, *Only)
if err != nil {
fmt.Fprintf(os.Stderr, "snappr: fatal: failed to read stdin: %v\n", err)
os.Exit(2)
Expand All @@ -109,7 +153,7 @@ func main() {
}
}

keep, need := snappr.Prune(snapshots, policy)
keep, need := snappr.Prune(snapshots, policy, *In)

discard := make([]bool, len(times))
for at, why := range keep {
Expand Down
14 changes: 8 additions & 6 deletions snappr.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,14 +318,16 @@ func (p Policy) MarshalText() ([]byte, error) {
// periods requiring that snapshot, and the remaining number of snapshots
// required to fulfill the original policy.
//
// The timezone doesn't matter and doesn't need to be consistent since snapshots
// are ordered by their UTC time value. The timezone will only affect where
// days/months/years are split for the purpose of determining the calendar
// day/month/year.
// All snapshots are placed in the provided timezone, and the monotonic time
// component is removed. The timezone affects the exact point at which calendar
// days/months/years are split. Beware of duplicate timestamps at DST
// transitions (if the offset isn't included whatever you use as the snapshot
// name, and your timezone has DST, you may end up with two snapshots for
// different times with the same name).
//
// See pruneCorrectness in snappr_test.go for some additional notes about
// guarantees provided by Prune.
func Prune(snapshots []time.Time, policy Policy) (keep [][]Period, need Policy) {
func Prune(snapshots []time.Time, policy Policy, loc *time.Location) (keep [][]Period, need Policy) {
need = policy.Clone()
keep = make([][]Period, len(snapshots))

Expand All @@ -351,7 +353,7 @@ func Prune(snapshots []time.Time, policy Policy) (keep [][]Period, need Policy)
// start from the beginning, marking the first one in each period
for i := range snapshots {
var current int64
switch t := snapshots[sorted[i]].Truncate(-1); period.Unit {
switch t := snapshots[sorted[i]].In(loc).Truncate(-1); period.Unit {
case Last:
match[i] = true
continue
Expand Down
48 changes: 38 additions & 10 deletions snappr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import (
"fmt"
"maps"
"reflect"
"runtime"
"slices"
"strconv"
"strings"
"testing"
"time"
_ "time/tzdata"
)

func TestParsePolicy(t *testing.T) {
Expand Down Expand Up @@ -125,7 +128,14 @@ func TestParsePolicy(t *testing.T) {
}

// pruneCorrectness checks that guarantees provided by Prune are upheld.
func pruneCorrectness(snapshots []time.Time, policy Policy) error {
func pruneCorrectness(snapshots []time.Time, policy Policy, loc *time.Location) error {
{
tmp := make([]time.Time, len(snapshots))
for i, t := range snapshots {
tmp[i] = t.In(loc)
}
snapshots = tmp
}
var (
prevNeed Policy
prevSubset = -1
Expand All @@ -135,7 +145,7 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
allSnapshots := snapshots
snapshots := snapshots[:subset]

keep, need := Prune(snapshots, policy)
keep, need := Prune(snapshots, policy, loc)

/**
* Prune "keep" output will be like the input snapshots, but with a
Expand Down Expand Up @@ -194,7 +204,7 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
/**
* Pruning is reproducible.
*/
rKeep, rNeed := Prune(snapshots, policy)
rKeep, rNeed := Prune(snapshots, policy, loc)
if !maps.Equal(rNeed.count, need.count) {
return fmt.Errorf("subset %d: prune reproducibility: need: does not equal original need", subset)
}
Expand Down Expand Up @@ -228,7 +238,7 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
filteredSnap = append(filteredSnap, snapshots[at])
}
}
iKeep, iNeed := Prune(filteredSnap, policy)
iKeep, iNeed := Prune(filteredSnap, policy, loc)
if !maps.Equal(iNeed.count, need.count) {
return fmt.Errorf("subset %d: prune idempotentency: need: does not equal original need", subset)
}
Expand All @@ -252,7 +262,7 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
case Last:
continue
case Secondly:
key = period.Unit.String() + " " + snapshots[at].Truncate(-1).Format("2006-01-02 15:04:05")
key = period.Unit.String() + " " + strconv.FormatInt(snapshots[at].Truncate(-1).Unix(), 10)
case Daily:
key = period.Unit.String() + " " + snapshots[at].Truncate(-1).Format("2006-01-02")
case Monthly:
Expand Down Expand Up @@ -289,7 +299,7 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
*/
if subset != 0 {
lastKept = append(lastKept, snapshots[prevSubset:]...)
pKeep, _ := Prune(lastKept, policy)
pKeep, _ := Prune(lastKept, policy, loc)

var incN, absN int
lastKept = lastKept[:0]
Expand Down Expand Up @@ -335,6 +345,15 @@ func pruneCorrectness(snapshots []time.Time, policy Policy) error {
}

func TestPrune(t *testing.T) {
var locs []*time.Location
locs = append(locs, time.UTC)
for _, x := range []string{"EST5EDT", "WET", "Pacific/Chatham"} { // a variety of offsets
if loc, err := time.LoadLocation(x); err != nil {
panic(err)
} else {
locs = append(locs, loc)
}
}
for _, tc := range []func() (
times []time.Time,
policy Policy,
Expand Down Expand Up @@ -389,7 +408,7 @@ func TestPrune(t *testing.T) {
}

t.Run("Output", func(t *testing.T) {
keep, need := Prune(times, policy)
keep, need := Prune(times, policy, times[0].Location())

var b bytes.Buffer
for at, reason := range keep {
Expand Down Expand Up @@ -419,8 +438,17 @@ func TestPrune(t *testing.T) {
})

t.Run("Correctness", func(t *testing.T) {
if err := pruneCorrectness(times, policy); err != nil {
t.Error(err.Error())
for _, loc := range locs {
loc := loc
t.Run(loc.String(), func(t *testing.T) {
t.Parallel()
runtime.LockOSThread()
defer runtime.UnlockOSThread()

if err := pruneCorrectness(times, policy, loc); err != nil {
t.Error(err.Error())
}
})
}
})
})
Expand Down Expand Up @@ -448,7 +476,7 @@ func ExamplePrune() {
policy.MustSet(Last, 1, 3)
fmt.Println(policy)

keep, need := Prune(times, policy)
keep, need := Prune(times, policy, time.UTC)
for at, reason := range keep {
at := times[at]
if len(reason) != 0 {
Expand Down

0 comments on commit c50451a

Please sign in to comment.