Skip to content

Commit

Permalink
Add autoID for definition terms
Browse files Browse the repository at this point in the history
Fixes #13403
See #11566

Co-authored-by: Joe Mooring <[email protected]>
  • Loading branch information
bep and jmooring committed Feb 16, 2025
1 parent 9c2f8ec commit 7721fa6
Show file tree
Hide file tree
Showing 9 changed files with 261 additions and 46 deletions.
40 changes: 33 additions & 7 deletions markup/goldmark/autoid.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/gohugoio/hugo/common/text"

"github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util"

Expand All @@ -43,11 +44,11 @@ func sanitizeAnchorName(b []byte, idType string) []byte {
func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
buf := bp.GetBuffer()

if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
if idType == goldmark_config.AutoIDTypeBlackfriday {
// TODO(bep) make it more efficient.
buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
} else {
asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
asciiOnly := idType == goldmark_config.AutoIDTypeGitHubAscii

if asciiOnly {
// Normalize it to preserve accents if possible.
Expand Down Expand Up @@ -90,8 +91,9 @@ func isAlphaNumeric(r rune) bool {
var _ parser.IDs = (*idFactory)(nil)

type idFactory struct {
idType string
vals map[string]struct{}
idType string
vals map[string]struct{}
duplicates []string
}

func newIDFactory(idType string) *idFactory {
Expand All @@ -101,11 +103,28 @@ func newIDFactory(idType string) *idFactory {
}
}

type stringValuesProvider interface {
StringValues() []string
}

var _ stringValuesProvider = (*idFactory)(nil)

func (ids *idFactory) StringValues() []string {
values := make([]string, 0, len(ids.vals))
for k := range ids.vals {
values = append(values, k)
}
values = append(values, ids.duplicates...)
return values
}

func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
if buf.Len() == 0 {
if kind == ast.KindHeading {
buf.WriteString("heading")
} else if kind == east.KindDefinitionTerm {
buf.WriteString("term")
} else {
buf.WriteString("id")
}
Expand All @@ -123,11 +142,18 @@ func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
buf.Truncate(pos)
}
}

ids.vals[buf.String()] = struct{}{}
ids.put(buf.String())
})
}

func (ids *idFactory) put(s string) {
if _, found := ids.vals[s]; found {
ids.duplicates = append(ids.duplicates, s)
} else {
ids.vals[s] = struct{}{}
}
}

func (ids *idFactory) Put(value []byte) {
ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
ids.put(string(value))
}
18 changes: 9 additions & 9 deletions markup/goldmark/autoid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ tabspace
expect := expectlines[i]
c.Run(input, func(c *qt.C) {
b := []byte(input)
got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
got := string(sanitizeAnchorName(b, goldmark_config.AutoIDTypeGitHub))
c.Assert(got, qt.Equals, expect)
c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub), qt.Equals, expect)
c.Assert(string(b), qt.Equals, input)
})
}
Expand All @@ -89,20 +89,20 @@ tabspace
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
c := qt.New(t)

c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "god-is-good")
c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "resume")
}

func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
c := qt.New(t)
c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
}

func BenchmarkSanitizeAnchorName(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHub)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
Expand All @@ -113,7 +113,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHubAscii)
if len(result) != 12 {
b.Fatalf("got %d", len(result))
}
Expand All @@ -124,7 +124,7 @@ func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
input := []byte("God is good: 神真美好")
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeBlackfriday)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
Expand All @@ -135,7 +135,7 @@ func BenchmarkSanitizeAnchorNameString(b *testing.B) {
input := "God is good: 神真美好"
b.ResetTimer()
for i := 0; i < b.N; i++ {
result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
result := sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub)
if len(result) != 24 {
b.Fatalf("got %d", len(result))
}
Expand Down
12 changes: 4 additions & 8 deletions markup/goldmark/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
cfg: cfg,
md: md,
sanitizeAnchorName: func(s string) string {
return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType)
return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoIDType)
},
}, nil
}), nil
Expand Down Expand Up @@ -188,16 +188,12 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
extensions = append(extensions, emoji.Emoji)
}

if cfg.Parser.AutoHeadingID {
parserOptions = append(parserOptions, parser.WithAutoHeadingID())
}

if cfg.Parser.Attribute.Title {
parserOptions = append(parserOptions, parser.WithAttribute())
}

if cfg.Parser.Attribute.Block {
extensions = append(extensions, attributes.New())
if cfg.Parser.Attribute.Block || cfg.Parser.AutoHeadingID || cfg.Parser.AutoDefinitionTermID {
extensions = append(extensions, attributes.New(cfg.Parser))
}

md := goldmark.New(
Expand Down Expand Up @@ -295,7 +291,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (converter.Resu
}

func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext {
ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType)))
ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoIDType)))
ctx.Set(tocEnableKey, rctx.RenderTOC)
return &parserContext{
Context: ctx,
Expand Down
39 changes: 32 additions & 7 deletions markup/goldmark/goldmark_config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
package goldmark_config

const (
AutoHeadingIDTypeGitHub = "github"
AutoHeadingIDTypeGitHubAscii = "github-ascii"
AutoHeadingIDTypeBlackfriday = "blackfriday"
AutoIDTypeGitHub = "github"
AutoIDTypeGitHubAscii = "github-ascii"
AutoIDTypeBlackfriday = "blackfriday"
)

// Default holds the default Goldmark configuration.
Expand Down Expand Up @@ -79,7 +79,8 @@ var Default = Config{
},
Parser: Parser{
AutoHeadingID: true,
AutoHeadingIDType: AutoHeadingIDTypeGitHub,
AutoDefinitionTermID: false,
AutoIDType: AutoIDTypeGitHub,
WrapStandAloneImageWithinParagraph: true,
Attribute: ParserAttribute{
Title: true,
Expand All @@ -97,6 +98,16 @@ type Config struct {
RenderHooks RenderHooks
}

func (c *Config) Init() error {
if err := c.Parser.Init(); err != nil {
return err
}
if c.Parser.AutoDefinitionTermID && !c.Extensions.DefinitionList {
c.Parser.AutoDefinitionTermID = false
}
return nil
}

// RenderHooks contains configuration for Goldmark render hooks.
type RenderHooks struct {
Image ImageRenderHook
Expand Down Expand Up @@ -250,16 +261,30 @@ type Parser struct {
// auto generated heading ids.
AutoHeadingID bool

// The strategy to use when generating heading IDs.
// Available options are "github", "github-ascii".
// Enables auto definition term ids.
AutoDefinitionTermID bool

// The strategy to use when generating IDs.
// Available options are "github", "github-ascii", and "blackfriday".
// Default is "github", which will create GitHub-compatible anchor names.
AutoHeadingIDType string
AutoIDType string

// Enables custom attributes.
Attribute ParserAttribute

// Whether to wrap stand-alone images within a paragraph or not.
WrapStandAloneImageWithinParagraph bool

// Renamed to AutoIDType in 0.144.0.
AutoHeadingIDType string `json:"-"`
}

func (p *Parser) Init() error {
// Renamed from AutoHeadingIDType to AutoIDType in 0.144.0.
if p.AutoHeadingIDType != "" {
p.AutoIDType = p.AutoHeadingIDType
}
return nil
}

type ParserAttribute struct {
Expand Down
91 changes: 79 additions & 12 deletions markup/goldmark/internal/extensions/attributes/attributes.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package attributes

import (
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/markup/goldmark/internal/render"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
Expand All @@ -14,24 +17,29 @@ import (

var (
kindAttributesBlock = ast.NewNodeKind("AttributesBlock")
attrNameID = []byte("id")

defaultParser = new(attrParser)
defaultTransformer = new(transformer)
attributes goldmark.Extender = new(attrExtension)
defaultParser = new(attrParser)
)

func New() goldmark.Extender {
return attributes
func New(cfg goldmark_config.Parser) goldmark.Extender {
return &attrExtension{cfg: cfg}
}

type attrExtension struct{}
type attrExtension struct {
cfg goldmark_config.Parser
}

func (a *attrExtension) Extend(m goldmark.Markdown) {
if a.cfg.Attribute.Block {
m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(defaultParser, 100)),
)
}
m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(defaultParser, 100)),
parser.WithASTTransformers(
util.Prioritized(defaultTransformer, 100),
util.Prioritized(&transformer{cfg: a.cfg}, 100),
),
)
}
Expand Down Expand Up @@ -92,18 +100,47 @@ func (a *attributesBlock) Kind() ast.NodeKind {
return kindAttributesBlock
}

type transformer struct{}
type transformer struct {
cfg goldmark_config.Parser
}

func (a *transformer) isFragmentNode(n ast.Node) bool {
switch n.Kind() {
case east.KindDefinitionTerm, ast.KindHeading:
return true
default:
return false
}
}

func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
attributes := make([]ast.Node, 0, 500)
var attributes []ast.Node
if a.cfg.Attribute.Block {
attributes = make([]ast.Node, 0, 500)
}
ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
if entering && node.Kind() == kindAttributesBlock {
if !entering {
return ast.WalkContinue, nil
}

if a.isFragmentNode(node) {
if id, found := node.Attribute(attrNameID); !found {
a.generateAutoID(node, reader, pc)
} else {
pc.IDs().Put(id.([]byte))
}
}

if a.cfg.Attribute.Block && node.Kind() == kindAttributesBlock {
// Attributes for fenced code blocks are handled in their own extension,
// but note that we currently only support code block attributes when
// CodeFences=true.
if node.PreviousSibling() != nil && node.PreviousSibling().Kind() != ast.KindFencedCodeBlock && !node.HasBlankPreviousLines() {
attributes = append(attributes, node)
return ast.WalkSkipChildren, nil
} else {
// remove attributes node
node.Parent().RemoveChild(node.Parent(), node)
}
}

Expand All @@ -123,3 +160,33 @@ func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parse
attr.Parent().RemoveChild(attr.Parent(), attr)
}
}

func (a *transformer) generateAutoID(n ast.Node, reader text.Reader, pc parser.Context) {
var text []byte
switch n := n.(type) {
case *ast.Heading:
if a.cfg.AutoHeadingID {
text = textHeadingID(n, reader)
}
case *east.DefinitionTerm:
if a.cfg.AutoDefinitionTermID {
text = []byte(render.TextPlain(n, reader.Source()))
}
}

if len(text) > 0 {
headingID := pc.IDs().Generate(text, n.Kind())
n.SetAttribute(attrNameID, headingID)
}
}

// Markdown settext headers can have multiple lines, use the last line for the ID.
func textHeadingID(node *ast.Heading, reader text.Reader) []byte {
var line []byte
lastIndex := node.Lines().Len() - 1
if lastIndex > -1 {
lastLine := node.Lines().At(lastIndex)
line = lastLine.Value(reader.Source())
}
return line
}
Loading

0 comments on commit 7721fa6

Please sign in to comment.