Skip to content

Commit

Permalink
perf: remove allocs from iterator in sis for 16-512 params
Browse files Browse the repository at this point in the history
  • Loading branch information
gbotrel committed Jan 21, 2025
1 parent bc62245 commit 5d3af9e
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 15 deletions.
1 change: 0 additions & 1 deletion ecc/bls12-377/fr/sis/sis.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 47 additions & 4 deletions field/babybear/sis/sis.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions field/generator/generator_sis.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ func generateSIS(F *config.Field, outputDir string) error {
FF string
FieldPackagePath string
HasUnrolledFFT bool
F31 bool
}

data := &sisTemplateData{
FF: F.PackageName,
FieldPackagePath: fieldImportPath,
HasUnrolledFFT: F.NbBytes == 32,
F31: F.F31,
}

funcs := make(map[string]interface{})
Expand Down
64 changes: 59 additions & 5 deletions field/generator/internal/templates/sis/sis.go.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,65 @@ func (r *RSis) Hash(v, res []{{ .FF }}.Element) error {
}
{{- end}}

// inner hash
it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8)
for i := 0; i < len(r.Ag); i++ {
r.InnerHash(it, res, k, r.kz, i, mask)
}
{{- if .F31}}
if r.Degree == 512 && r.LogTwoBound == 16 {
// this is our hot path, we don't use the iterator because with
// avx512 instructions, it actually ends up being most of the CPU time.
er := {{ .FF }}.Element{1} // mul by 1 --> mont reduce
polId := 0
var k512 [512]{{ .FF }}.Element
vk := {{ .FF }}.Vector(k512[:])
vRes := {{ .FF }}.Vector(res)
vb := {{ .FF }}.Vector(k512[256:])

cosets, err := r.Domain.CosetTable()
if err != nil {
return err
}
vCosets := {{ .FF }}.Vector(cosets)

for j := 0; j < len(v); j+=256 {
start := j
end := j + 256
end = min(end, len(v))

// use half of vk to copy the v input to batch convert to regular form
copy(vb[:], v[start:end])
for k:= (end-start); k < 256; k++ {
vb[k][0] = 0
}
// batch montgomery -> regular
vb.ScalarMul(vb, &er)

// do the limb split
for k := 0; k < 256; k++ {
k512[k*2][0] = uint32(uint16(vb[k][0]))
k512[k*2+1][0] = uint32(uint16(vb[k][0] >> 16))
}

// inner hash
vk.Mul(vk, vCosets)
r.Domain.FFT(k512[:], fft.DIF, fft.WithNbTasks(1))
vk.Mul(vk, {{.FF}}.Vector(r.Ag[polId]))
vRes.Add(vRes, vk)
polId++
}
} else {
// inner hash
it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8)
for i := 0; i < len(r.Ag); i++ {
r.InnerHash(it, res, k, r.kz, i, mask)
}
}
{{- else}}
// inner hash
it := NewLimbIterator(&VectorIterator{v: v}, r.LogTwoBound/8)
for i := 0; i < len(r.Ag); i++ {
r.InnerHash(it, res, k, r.kz, i, mask)
}
{{- end}}



// reduces mod Xᵈ+1
r.Domain.FFTInverse(res, fft.DIT, fft.OnCoset(), fft.WithNbTasks(1))
Expand Down
1 change: 0 additions & 1 deletion field/goldilocks/sis/sis.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 47 additions & 4 deletions field/koalabear/sis/sis.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5d3af9e

Please sign in to comment.