Skip to content

Commit

Permalink
feat: Add way to style identifiers.
Browse files Browse the repository at this point in the history
Replace "default" token with "whitespace" and "identifier" tokens, with fallback to "unknown" token.
Also, classify backticked identifiers like `foo` as "identifier" rather than "string".

This allows for identifiers to be styled independently from strings and whitespace.

It also simplifies getSegments() from 30 lines down to 5, by removing the special-case
code for the "default" token.

Fixes: #147.
  • Loading branch information
wkeese committed Oct 4, 2023
1 parent 4a3aa67 commit cadb0a9
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 111 deletions.
32 changes: 16 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ document.body.innerHTML += highlighted
**Output:**
```html
<span class="sql-hl-keyword">SELECT</span>
<span class="sql-hl-string">`id`</span>
<span class="sql-hl-identifier">`id`</span>
<span class="sql-hl-special">,</span>
<span class="sql-hl-string">`username`</span>
<span class="sql-hl-identifier">`username`</span>
<span class="sql-hl-keyword">FROM</span>
<span class="sql-hl-string">`users`</span>
<span class="sql-hl-identifier">`users`</span>
<span class="sql-hl-keyword">WHERE</span>
<span class="sql-hl-string">`email`</span>
<span class="sql-hl-identifier">`email`</span>
<span class="sql-hl-special">=</span>
<span class="sql-hl-string">'[email protected]'</span>
```
Expand Down Expand Up @@ -112,22 +112,22 @@ console.log(segments)
```js
[
{ name: 'keyword', content: 'SELECT' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`id`' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`id`' },
{ name: 'special', content: ',' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`username`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`username`' },
{ name: 'whitespace', content: ' ' },
{ name: 'keyword', content: 'FROM' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`users`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`users`' },
{ name: 'whitespace', content: ' ' },
{ name: 'keyword', content: 'WHERE' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`email`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`email`' },
{ name: 'whitespace', content: ' ' },
{ name: 'special', content: '=' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'string', content: "'[email protected]'" }
]
```
Expand Down
73 changes: 23 additions & 50 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ const DEFAULT_OPTIONS = {
}
}

const DEFAULT_KEYWORD = 'default'

const highlighters = [
/\b(?<number>\d+(?:\.\d+)?)\b/,

// Note: Repeating string escapes like 'sql''server' will also work as they are just repeating strings
/(?<string>'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*"|`(?:[^`\\]|\\.)*`)/,
/(?<string>'(?:[^'\\]|\\.)*'|"(?:[^"\\]|\\.)*")/,

/(?<comment>--[^\n\r]*|#[^\n\r]*|\/\*(?:[^*]|\*(?!\/))*\*\/)/,

Expand All @@ -34,54 +32,29 @@ const highlighters = [

/(?<bracket>[()])/,

/(?<special>!=|[=%*/\-+,;:<>])/
]
/(?<special>!=|[=%*/\-+,;:<>.])/,

function getRegexString (regex) {
const str = regex.toString()
return str.replace(/^\/|\/\w*$/g, '')
}
/(?<identifier>\b\w+\b|`(?:[^`\\]|\\.)*`)/,

/(?<whitespace>\s+)/,

/(?<unknown>\.+?)/
]

// Regex of the shape /(.*?)|((?<token1>...)|(?<token2>...)|...|$)/y
// Regex of the shape /(?<token1>...)|(?<token2>...)|.../g
const tokenizer = new RegExp(
'(.*?)(' +
'\\b(?<keyword>' + keywords.join('|') + ')\\b|' +
highlighters.map(getRegexString).join('|') +
'|$)', // $ needed to to match "default" till the end of string
'isy'
[
'\\b(?<keyword>' + keywords.join('|') + ')\\b',
...highlighters.map(regex => regex.source)
].join('|'),
'gis'
)

function getSegments (sqlString) {
const segments = []
let match

// Reset the starting position
tokenizer.lastIndex = 0

// This is probably the one time when an assignment inside a condition makes sense
// eslint-disable-next-line no-cond-assign
while (match = tokenizer.exec(sqlString)) {
if (match[1]) {
segments.push({
name: DEFAULT_KEYWORD,
content: match[1]
})
}

if (match[2]) {
const name = Object.keys(match.groups).find(key => match.groups[key])
segments.push({
name,
content: match.groups[name]
})
}

// Stop at the end of string
if (match.index + match[0].length >= sqlString.length) {
break
}
}

const segments = Array.from(sqlString.matchAll(tokenizer), match => ({
name: Object.keys(match.groups).find(key => match.groups[key]),
content: match[0]
}))
return segments
}

Expand All @@ -90,14 +63,14 @@ function highlight (sqlString, options) {

return getSegments(sqlString)
.map(({ name, content }) => {
if (name === DEFAULT_KEYWORD) {
return content
}
if (options.html) {
const escapedContent = options.htmlEscaper(content)
return `<span class="${options.classPrefix}${name}">${escapedContent}</span>`
return name === 'whitespace' ? escapedContent : `<span class="${options.classPrefix}${name}">${escapedContent}</span>`
}
if (options.colors[name]) {
return options.colors[name] + content + options.colors.clear
}
return options.colors[name] + content + options.colors.clear
return content
})
.join('')
}
Expand Down
Loading

0 comments on commit cadb0a9

Please sign in to comment.