Skip to content

Commit

Permalink
Address feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
Alejandro Hernandez committed Apr 24, 2024
1 parent 3658ef6 commit c7b2930
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 7 deletions.
1 change: 1 addition & 0 deletions packages/provider-elasticsearch/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"homepage": "https://github.com/smartprocure/contexture/tree/main/packages/provider-elasticsearch",
"dependencies": {
"@elastic/datemath": "^2.3.0",
"contexture-util": "workspace:^",
"debug": "^4.3.1",
"futil": "^1.76.4",
"js-combinatorics": "^2.1.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import F from 'futil'
import { Permutation } from 'js-combinatorics'
import { stripLegacySubFields } from '../../utils/fields.js'
import { sanitizeTagInputs } from '../../utils/keywordGenerations.js'
import { queryStringCharacterBlacklist } from 'contexture-util/exampleTypes/tagsQuery.js'

let maxTagCount = 100

Expand Down Expand Up @@ -31,14 +32,10 @@ let addQuotesAndDistance = _.curry((tag, text) => {

let replaceReservedChars = _.flow(
_.toString,
// Most of these characters are `query_string` reserved characters. See
// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
// The characters `;,$'&` are not reserved but they get stripped out by our
// analyzers so there's no point in sending them.
_.replace(/([&|!(){}[\]^"~*?\\<>;,$'])/g, ' '),
_.replace(new RegExp(`([${queryStringCharacterBlacklist}])`, 'g'), ' '),
// These characters are not stripped out by our analyzers but they are
// `query_string` reserved characters so we need to escape them.
_.replace(/([+\-=:/])/g, '\\$1')
_.replace(/([&+\-=:/])/g, '\\$1')
)

let tagToQueryString = (tag) => {
Expand Down
2 changes: 1 addition & 1 deletion packages/react/src/greyVest/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ let maxCharsPerTagWord = 100
//
// If in doubt, make a request to the `/{index}/analyze` elasticsearch endpoint
// to see exactly which characters get stripped out of text.
let wordRegex = /[^|><!(){}[\]^"~*?\\;,$']+/g
let wordRegex = /[^|!(){}[\]^"~*?\\<>;,$']+/g
let words = _.words.convert({ fixed: false })

// Convert string to words, take the first maxWordsPerTag, truncate them and convert back to string
Expand Down
5 changes: 5 additions & 0 deletions packages/util/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# common

Utilities common to all packages.

Code in this package should be isomorphic (e.g. should run on both node and the browser).
34 changes: 34 additions & 0 deletions packages/util/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"name": "contexture-util",
"version": "0.1.0",
"description": "Utilities for contexture packages",
"type": "module",
"packageManager": "[email protected]",
"exports": {
".": {
"import": "./dist/esm/index.js",
"require": "./dist/cjs/index.js"
},
"./*": {
"import": "./dist/esm/*",
"require": "./dist/cjs/*"
}
},
"files": [
"dist"
],
"scripts": {
"prepack": "node ../../scripts/esbuild.js",
"test": "NODE_NO_WARNINGS=1 NODE_OPTIONS=--experimental-vm-modules yarn run -T jest ."
},
"repository": {
"type": "git",
"url": "git+https://github.com/smartprocure/contexture.git"
},
"author": "Alejandro Hernandez",
"license": "MIT",
"bugs": {
"url": "https://github.com/smartprocure/contexture/issues"
},
"homepage": "https://github.com/smartprocure/contexture/tree/main/packages/common"
}
12 changes: 12 additions & 0 deletions packages/util/src/exampleTypes/tagsQuery.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// These are reserved characters in the context of an elastic `query_string`
// query that are unlikely to be searched for by an user.
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters
let queryStringReserved = `|!(){}[\\]^"~*?\\<>`

// These are characters stripped out by our analyzers so there's no point in
// sending them.
let strippedByAnalyzers = `;,$'`

// Characters that we should strip out from `query_string` queries before
// sending to elastic.
export let queryStringCharacterBlacklist = `${queryStringReserved}${strippedByAnalyzers}`
Empty file added packages/util/src/index.js
Empty file.
1 change: 1 addition & 0 deletions scripts/esbuild.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ await fs.rm('dist', { force: true, recursive: true })
let entryPoints = glob.sync('src/**/*.js', {
ignore: ['src/**/*.{test,stories}.js', 'src/**/{test,stories}/**/*'],
})
console.log(entryPoints)

// Build project

Expand Down
7 changes: 7 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7817,6 +7817,7 @@ __metadata:
"@elastic/elasticsearch": ^7.11.0
agentkeepalive: ^4.1.4
contexture: ^0.12.21
contexture-util: "workspace:^"
debug: ^4.3.1
futil: ^1.76.4
js-combinatorics: ^2.1.1
Expand Down Expand Up @@ -7930,6 +7931,12 @@ __metadata:
languageName: unknown
linkType: soft

"contexture-util@workspace:^, contexture-util@workspace:packages/util":
version: 0.0.0-use.local
resolution: "contexture-util@workspace:packages/util"
languageName: unknown
linkType: soft

"contexture@^0.12.21, contexture@workspace:packages/server":
version: 0.0.0-use.local
resolution: "contexture@workspace:packages/server"
Expand Down

0 comments on commit c7b2930

Please sign in to comment.