diff --git a/.changeset/gold-bags-love.md b/.changeset/gold-bags-love.md new file mode 100644 index 000000000..fe352d8e1 --- /dev/null +++ b/.changeset/gold-bags-love.md @@ -0,0 +1,5 @@ +--- +'contexture-util': patch +--- + +New contexture-util package diff --git a/.changeset/mean-eels-grin.md b/.changeset/mean-eels-grin.md new file mode 100644 index 000000000..6d879fa81 --- /dev/null +++ b/.changeset/mean-eels-grin.md @@ -0,0 +1,6 @@ +--- +'contexture-elasticsearch': patch +'contexture-react': patch +--- + +Allow some special characters in TagsQuery component diff --git a/packages/provider-elasticsearch/package.json b/packages/provider-elasticsearch/package.json index e17142c9d..d09054e9e 100644 --- a/packages/provider-elasticsearch/package.json +++ b/packages/provider-elasticsearch/package.json @@ -36,7 +36,9 @@ "homepage": "https://github.com/smartprocure/contexture/tree/main/packages/provider-elasticsearch", "dependencies": { "@elastic/datemath": "^2.3.0", + "contexture-util": "^0.1.0", "debug": "^4.3.1", + "escape-string-regexp": "^5.0.0", "futil": "^1.76.4", "js-combinatorics": "^2.1.1", "lodash": "^4.17.4", diff --git a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js index 90ff4e6ec..29360294b 100644 --- a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js +++ b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.js @@ -2,7 +2,9 @@ import _ from 'lodash/fp.js' import F from 'futil' import { Permutation } from 'js-combinatorics' import { stripLegacySubFields } from '../../utils/fields.js' -import { sanitizeTagInputs } from '../../utils/keywordGenerations.js' +import { sanitizeTagInputs } from 'contexture-util/keywordGenerations.js' +import { queryStringCharacterBlacklist } from 'contexture-util/exampleTypes/tagsQuery.js' +import escapeStringRegexp from 'escape-string-regexp' let maxTagCount = 100 @@ -29,11 +31,17 @@ let addQuotesAndDistance = _.curry((tag, text) => { return text + (tag.misspellings ? '~1' : '') }) -// https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters +let replaceRegexp = new RegExp( + `[${escapeStringRegexp(queryStringCharacterBlacklist)}]`, + 'g' +) + let replaceReservedChars = _.flow( _.toString, - // Replace characters with white space ` ` - _.replace(/([+\-=&|!(){}[\]^"~*?:\\/<>;,$'])/g, ' ') + _.replace(replaceRegexp, ' '), + // These characters are not stripped out by our analyzers but they are + // `query_string` reserved characters so we need to escape them. + _.replace(/([&+\-=:/])/g, '\\$1') ) let tagToQueryString = (tag) => { @@ -68,7 +76,7 @@ let limitResultsToCertainTags = _.find('onlyShowTheseResults') let tagsToQueryString = (tags, join) => _.flow( F.when(limitResultsToCertainTags, _.filter('onlyShowTheseResults')), - _.map(tagToQueryString), + F.compactMap(tagToQueryString), joinTags(join) )(tags) diff --git a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.test.js b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.test.js index 052ae60f8..3402a251f 100644 --- a/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.test.js +++ b/packages/provider-elasticsearch/src/example-types/filters/tagsQuery.test.js @@ -63,8 +63,15 @@ describe('addQuotesAndDistance', () => { describe('replaceReservedChars', () => { it('should replace reserved characters with empty space', () => { expect( - replaceReservedChars('foo: [bar] (baz) - 1 ^ 2 <> 3 !$ 4,5') - ).toEqual('foo bar baz 1 2 3 4 5') + replaceReservedChars( + 'foo| [bar!] (baz) {1} ^ 2 <> 3 !$ 4,5 ~house *lamp;, me' + ) + ).toEqual('foo bar baz 1 2 3 4 5 house lamp me') + }) + it('should escape reserved characters', () => { + expect(replaceReservedChars('foo+ bar- baz= :house /lamp')).toEqual( + 'foo\\+ bar\\- baz\\= \\:house \\/lamp' + ) }) }) @@ -142,6 +149,11 @@ describe('tagsToQueryString', () => { ) ).toEqual('foo OR bar') }) + it('should ignore empty words', () => { + expect( + tagsToQueryString([{ word: 'foo' }, { word: '' }, { word: 'bar' }], 'any') + ).toEqual('foo OR bar') + }) }) describe('hasValue', () => { diff --git a/packages/react/package.json b/packages/react/package.json index 427eb6a1f..081ed9a95 100644 --- a/packages/react/package.json +++ b/packages/react/package.json @@ -68,7 +68,6 @@ "mobx": "^4.3.1", "mobx-react": "^6.3.0", "mobx-utils": "^5.0.0", - "moment": "^2.24.0", "react": "^16.8.0", "react-dom": "^16.8.0", "react-select": "^2.0.0", @@ -80,6 +79,8 @@ "dependencies": { "@chakra-ui/react-use-outside-click": "^2.1.0", "contexture": "^0.12.21", + "contexture-util": "0.1.0", + "escape-string-regexp": "^5.0.0", "futil": "^1.76.4", "lodash": "^4.17.15", "moment": "^2.24.0", diff --git a/packages/react/src/exampleTypes/ExpandableTagsQuery/index.js b/packages/react/src/exampleTypes/ExpandableTagsQuery/index.js index f045af4fb..88d96da17 100644 --- a/packages/react/src/exampleTypes/ExpandableTagsQuery/index.js +++ b/packages/react/src/exampleTypes/ExpandableTagsQuery/index.js @@ -16,9 +16,9 @@ import { } from '../TagsQuery/utils.js' import ActionsMenu from '../TagsQuery/ActionsMenu.js' import { useOutsideClick } from '@chakra-ui/react-use-outside-click' -import { sanitizeTagInputs } from 'contexture-elasticsearch/utils/keywordGenerations.js' +import { sanitizeTagInputs } from 'contexture-util/keywordGenerations.js' import KeywordGenerations from './KeywordGenerations.js' -import { wordRegex, wordRegexWithDot } from '../../greyVest/utils.js' +import { sanitizeQueryStringTag } from '../../greyVest/utils.js' let innerHeightLimit = 40 @@ -133,10 +133,6 @@ let TagsWrapper = observer( popoverOffsetY, theme: { Icon, TagsInput, Tag, Popover }, joinOptions, - // Allow tag keywords to contain dots in them if the user is searching for exact - // words instead of their variations. - wordsMatchPattern = node.exact ? wordRegexWithDot : wordRegex, - sanitizeTags = true, splitCommas = true, maxTags = 1000, hasPopover, @@ -195,9 +191,8 @@ let TagsWrapper = observer( 0 ? _.map(tagValueField, node.tags) diff --git a/packages/react/src/greyVest/ExpandableTagsInput.js b/packages/react/src/greyVest/ExpandableTagsInput.js index 239fca789..9dc7e5ea4 100644 --- a/packages/react/src/greyVest/ExpandableTagsInput.js +++ b/packages/react/src/greyVest/ExpandableTagsInput.js @@ -2,7 +2,7 @@ import React from 'react' import _ from 'lodash/fp.js' import { observer } from 'mobx-react' import { Tag as DefaultTag, Flex } from './index.js' -import { sanitizeTagWords, splitTagOnComma, wordRegex } from './utils.js' +import { createTags } from './utils.js' export let Tags = ({ reverse = false, @@ -38,7 +38,7 @@ let isValidInput = (tag, tags) => !_.isEmpty(tag) && !_.includes(tag, tags) let ExpandableTagsInput = ({ tags, - addTags, + addTags: setTags, removeTag, submit = _.noop, tagStyle, @@ -48,30 +48,18 @@ let ExpandableTagsInput = ({ autoFocus, onBlur = _.noop, onInputChange = _.noop, - maxWordsPerTag = 100, - maxCharsPerTagWord = 100, - wordsMatchPattern = wordRegex, onTagClick = _.noop, - sanitizeTags = true, + sanitizeTagFn, Tag = DefaultTag, ...props }) => { - let sanitizeTagFn = sanitizeTagWords( - wordsMatchPattern, - maxWordsPerTag, - maxCharsPerTagWord - ) - - addTags = _.flow( - _.trim, - (tags) => (splitCommas ? splitTagOnComma(tags) : _.castArray(tags)), - (tags) => (sanitizeTags ? _.map(sanitizeTagFn, tags) : tags), - _.difference(_, tags), - addTags - ) - let [currentInput, setCurrentInput] = React.useState('') + let addTags = (input) => { + let newTags = createTags({ input, splitCommas, sanitizeTagFn }) + setTags(_.difference(newTags, tags)) + } + return (
diff --git a/packages/react/src/greyVest/TagsInput.js b/packages/react/src/greyVest/TagsInput.js index 801ab42b3..fe3d2d071 100644 --- a/packages/react/src/greyVest/TagsInput.js +++ b/packages/react/src/greyVest/TagsInput.js @@ -1,10 +1,9 @@ import React, { forwardRef } from 'react' import _ from 'lodash/fp.js' -import { observable } from '../utils/mobx.js' -import { observer, inject } from 'mobx-react' +import { observer } from 'mobx-react' import Flex from './Flex.js' import DefaultTag from './Tag.js' -import { sanitizeTagWords, splitTagOnComma, wordRegex } from './utils.js' +import { createTags } from './utils.js' let isValidInput = (tag, tags) => !_.isEmpty(tag) && !_.includes(tag, tags) @@ -12,7 +11,7 @@ let TagsInput = forwardRef( ( { tags, - addTags, + addTags: setTags, removeTag, submit = _.noop, tagStyle, @@ -22,10 +21,7 @@ let TagsInput = forwardRef( onBlur = _.noop, onInputChange = _.noop, onTagClick = _.noop, - maxWordsPerTag = 100, - maxCharsPerTagWord = 100, - wordsMatchPattern = wordRegex, - sanitizeTags = true, + sanitizeTagFn, Tag = DefaultTag, ...props }, @@ -34,19 +30,10 @@ let TagsInput = forwardRef( let containerRef = React.useRef() let [currentInput, setCurrentInput] = React.useState('') - let sanitizeTagFn = sanitizeTagWords( - wordsMatchPattern, - maxWordsPerTag, - maxCharsPerTagWord - ) - - addTags = _.flow( - _.trim, - (tags) => (splitCommas ? splitTagOnComma(tags) : _.castArray(tags)), - (tags) => (sanitizeTags ? _.map(sanitizeTagFn, tags) : tags), - _.difference(_, tags), - addTags - ) + let addTags = (input) => { + let newTags = createTags({ input, splitCommas, sanitizeTagFn }) + setTags(_.difference(newTags, tags)) + } return (
@@ -118,19 +105,4 @@ let TagsInput = forwardRef( } ) -// Just uses an internal observable array -export let MockTagsInput = inject(() => { - let tags = observable([]) - return { - tags, - addTags(tag) { - tags.push(tag) - }, - removeTag(tag) { - tags = _.without(tag, tags) - }, - } -})(TagsInput) -MockTagsInput.displayName = 'MockTagsInput' - export default observer(TagsInput) diff --git a/packages/react/src/greyVest/TagsInput.stories.js b/packages/react/src/greyVest/TagsInput.stories.js new file mode 100644 index 000000000..43ef0dd09 --- /dev/null +++ b/packages/react/src/greyVest/TagsInput.stories.js @@ -0,0 +1,25 @@ +import _ from 'lodash/fp.js' +import React from 'react' +import TagsInput from './TagsInput.js' + +export default { + component: TagsInput, +} + +export const Default = () => { + let [tags, setTags] = React.useState([ + 'janitor', + 'soap', + 'cleaner', + 'cleaning', + 'clean', + ]) + return ( + setTags((current) => _.union(tags, current))} + removeTag={(tag) => setTags((current) => _.pull(tag, current))} + tagStyle={{ background: 'lavender' }} + /> + ) +} diff --git a/packages/react/src/greyVest/utils.js b/packages/react/src/greyVest/utils.js index ad7a3d6c8..68ceaedfa 100644 --- a/packages/react/src/greyVest/utils.js +++ b/packages/react/src/greyVest/utils.js @@ -1,41 +1,58 @@ import _ from 'lodash/fp.js' import F from 'futil' +import { queryStringCharacterBlacklist } from 'contexture-util/exampleTypes/tagsQuery.js' +import escapeStringRegexp from 'escape-string-regexp' export let openBinding = (...lens) => ({ isOpen: F.view(...lens), onClose: F.off(...lens), }) +let maxWordsPerTag = 100 +let maxCharsPerTagWord = 100 + +// Strip these characters when splitting a tag into words. We do this to display +// tags that are closer to what we send to elastic in a `query_string` query. +// +// See https://github.com/smartprocure/contexture-elasticsearch/pull/170 +// +// If in doubt, make a request to the `/{index}/analyze` elasticsearch endpoint +// to see exactly which characters get stripped out of text. +let wordRegexp = new RegExp( + `[^${escapeStringRegexp(queryStringCharacterBlacklist)}]+`, + 'g' +) +let words = _.words.convert({ fixed: false }) + // Convert string to words, take the first maxWordsPerTag, truncate them and convert back to string -export let sanitizeTagWords = ( - wordsMatchPattern, - maxWordsPerTag, - maxCharsPerTagWord -) => { - let words = _.words.convert({ fixed: false }) - return _.flow( - (string) => words(string, wordsMatchPattern), - _.take(maxWordsPerTag), - _.map((word) => - _.flow( - _.truncate({ length: maxCharsPerTagWord, omission: '' }), - // Remove beginning of line dash and space dash - _.replace(/^-| -/g, ' '), - _.trim - )(word) - ), - _.join(' ') - ) -} +export let sanitizeQueryStringTag = _.flow( + (string) => words(string, wordRegexp), + _.take(maxWordsPerTag), + _.map((word) => + _.flow( + _.truncate({ length: maxCharsPerTagWord, omission: '' }), + // Remove beginning of line dash and space dash + // https://github.com/smartprocure/spark/issues/10923 + _.replace(/^-| -/g, ' '), + _.trim + )(word) + ), + _.join(' ') +) // Split a tag on comma into unique words -export let splitTagOnComma = _.flow( - _.trim, +let splitTagOnComma = _.flow( _.split(','), _.invokeMap('trim'), _.compact, _.uniq ) -export let wordRegex = /[-\w]+/g -export let wordRegexWithDot = /[-.\w]+/g +export let createTags = ({ input, splitCommas, sanitizeTagFn }) => + _.flow( + _.trim, + splitCommas ? splitTagOnComma : _.identity, + _.castArray, + sanitizeTagFn ? _.map(sanitizeTagFn) : _.identity, + _.compact + )(input) diff --git a/packages/util/README.md b/packages/util/README.md new file mode 100644 index 000000000..9118974ed --- /dev/null +++ b/packages/util/README.md @@ -0,0 +1,5 @@ +# common + +Utilities common to all packages. + +Code in this package should be isomorphic (e.g. should run on both node and the browser). diff --git a/packages/util/package.json b/packages/util/package.json new file mode 100644 index 000000000..e9021b3ba --- /dev/null +++ b/packages/util/package.json @@ -0,0 +1,34 @@ +{ + "name": "contexture-util", + "version": "0.1.0", + "description": "Utilities for contexture packages", + "type": "module", + "packageManager": "yarn@3.3.1", + "exports": { + ".": { + "import": "./dist/esm/index.js", + "require": "./dist/cjs/index.js" + }, + "./*": { + "import": "./dist/esm/*", + "require": "./dist/cjs/*" + } + }, + "files": [ + "dist" + ], + "scripts": { + "prepack": "node ../../scripts/esbuild.js", + "test": "NODE_NO_WARNINGS=1 NODE_OPTIONS=--experimental-vm-modules yarn run -T jest ." + }, + "repository": { + "type": "git", + "url": "git+https://github.com/smartprocure/contexture.git" + }, + "author": "Alejandro Hernandez", + "license": "MIT", + "bugs": { + "url": "https://github.com/smartprocure/contexture/issues" + }, + "homepage": "https://github.com/smartprocure/contexture/tree/main/packages/common" +} diff --git a/packages/util/src/exampleTypes/tagsQuery.js b/packages/util/src/exampleTypes/tagsQuery.js new file mode 100644 index 000000000..bbdacbabc --- /dev/null +++ b/packages/util/src/exampleTypes/tagsQuery.js @@ -0,0 +1,12 @@ +// These are reserved characters in the context of an elastic `query_string` +// query that are unlikely to be searched for by an user. +// See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#_reserved_characters +let queryStringReserved = `|!(){}[]^"~*?\\<>` + +// These are characters stripped out by our analyzers so there's no point in +// sending them. +let strippedByAnalyzers = `;,$'` + +// Characters that we should strip out from `query_string` queries before +// sending to elastic. +export let queryStringCharacterBlacklist = `${queryStringReserved}${strippedByAnalyzers}` diff --git a/packages/provider-elasticsearch/src/utils/keywordGenerations.js b/packages/util/src/keywordGenerations.js similarity index 100% rename from packages/provider-elasticsearch/src/utils/keywordGenerations.js rename to packages/util/src/keywordGenerations.js diff --git a/packages/provider-elasticsearch/src/utils/keywordGenerations.test.js b/packages/util/src/keywordGenerations.test.js similarity index 100% rename from packages/provider-elasticsearch/src/utils/keywordGenerations.test.js rename to packages/util/src/keywordGenerations.test.js diff --git a/yarn.lock b/yarn.lock index 761e0aa07..6a8ed287d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7817,7 +7817,9 @@ __metadata: "@elastic/elasticsearch": ^7.11.0 agentkeepalive: ^4.1.4 contexture: ^0.12.21 + contexture-util: ^0.1.0 debug: ^4.3.1 + escape-string-regexp: ^5.0.0 futil: ^1.76.4 js-combinatorics: ^2.1.1 json-stable-stringify: ^1.0.1 @@ -7896,8 +7898,10 @@ __metadata: contexture: ^0.12.21 contexture-client: ^2.53.7 contexture-elasticsearch: ^1.27.3 + contexture-util: 0.1.0 elasticsearch-browser: ^14.2.2 emoji-datasource: ^5.0.1 + escape-string-regexp: ^5.0.0 eslint-plugin-react: ^7.32.0 eslint-plugin-storybook: ^0.6.14 futil: ^1.76.4 @@ -7930,6 +7934,12 @@ __metadata: languageName: unknown linkType: soft +"contexture-util@0.1.0, contexture-util@^0.1.0, contexture-util@workspace:packages/util": + version: 0.0.0-use.local + resolution: "contexture-util@workspace:packages/util" + languageName: unknown + linkType: soft + "contexture@^0.12.21, contexture@workspace:packages/server": version: 0.0.0-use.local resolution: "contexture@workspace:packages/server" @@ -9185,6 +9195,13 @@ __metadata: languageName: node linkType: hard +"escape-string-regexp@npm:^5.0.0": + version: 5.0.0 + resolution: "escape-string-regexp@npm:5.0.0" + checksum: 20daabe197f3cb198ec28546deebcf24b3dbb1a5a269184381b3116d12f0532e06007f4bc8da25669d6a7f8efb68db0758df4cd981f57bc5b57f521a3e12c59e + languageName: node + linkType: hard + "escodegen@npm:^2.1.0": version: 2.1.0 resolution: "escodegen@npm:2.1.0"