From 1237b3ab404a3a0f8f4395fde734aa7f82a645c4 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Mon, 5 Feb 2024 17:51:38 -0500 Subject: [PATCH 1/2] Low-hanging perf improvements in highlighting code --- .changeset/purple-ghosts-heal.md | 5 ++++ .../metricGroups/percentilesGroupStats.js | 2 +- .../results/highlighting/request.js | 25 +++++++++++-------- 3 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 .changeset/purple-ghosts-heal.md diff --git a/.changeset/purple-ghosts-heal.md b/.changeset/purple-ghosts-heal.md new file mode 100644 index 000000000..0ccaa5db8 --- /dev/null +++ b/.changeset/purple-ghosts-heal.md @@ -0,0 +1,5 @@ +--- +'contexture-elasticsearch': patch +--- + +Low-hanging perf improvements in elasticsearch highlighting diff --git a/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js b/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js index a0166dd94..2065184b6 100644 --- a/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js +++ b/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js @@ -6,7 +6,7 @@ import { groupStats } from './groupStatUtils.js' // [1, 2, 3] -> [{to: 1}, {from: 1, to: 2}, {from: 2, to: 3}, {from: 3}] let boundariesToRanges = _.flow( F.mapIndexed((to, i, list) => F.compactObject({ from: list[i - 1], to })), - (arr) => F.push({ from: _.last(arr).to }, arr) + (arr) => F.pushOn(arr, { from: _.last(arr).to }) ) let drilldownToRange = (drilldown) => { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index 4f3761bc0..5abd5d163 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -95,16 +95,19 @@ let getFieldSubFields = (field) => /** * Returns object of all subfields in a schema. */ -let getSchemaSubFields = (schema) => - F.reduceIndexed( - (acc, field, path) => - F.mergeOn( - acc, - _.mapKeys((k) => `${path}.${k}`, getFieldSubFields(field)) - ), - {}, - schema.fields - ) +let getSchemaSubFields = _.memoize( + (schema) => + F.reduceIndexed( + (acc, field, path) => { + let subFields = getFieldSubFields(field) + for (let k in subFields) acc[`${path}.${k}`] = subFields[k] + return acc + }, + {}, + schema.fields + ), + _.get('elasticsearch.index') +) /** * Returns object of all group fields and their subfields in a schema. @@ -148,7 +151,7 @@ export let getAllHighlightFields = _.memoize((schema) => { let collectKeysAndValues = (f, coll) => F.reduceTree()( (acc, val, key) => - f(val) ? F.push(val, acc) : f(key) ? F.push(key, acc) : acc, + f(val) ? F.pushOn(acc, val) : f(key) ? F.pushOn(acc, key) : acc, [], coll ) From 462eaf073faa4e1dc961b4cd0371149fb22ddd76 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 6 Feb 2024 10:17:27 -0500 Subject: [PATCH 2/2] Address David's feedback --- .../metricGroups/percentilesGroupStats.js | 11 ++++-- .../results/highlighting/request.js | 38 +++++++++---------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js b/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js index 2065184b6..68e9a651e 100644 --- a/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js +++ b/packages/provider-elasticsearch/src/example-types/metricGroups/percentilesGroupStats.js @@ -4,10 +4,13 @@ import { pickSafeNumbers } from '../../utils/futil.js' import { groupStats } from './groupStatUtils.js' // [1, 2, 3] -> [{to: 1}, {from: 1, to: 2}, {from: 2, to: 3}, {from: 3}] -let boundariesToRanges = _.flow( - F.mapIndexed((to, i, list) => F.compactObject({ from: list[i - 1], to })), - (arr) => F.pushOn(arr, { from: _.last(arr).to }) -) +let boundariesToRanges = (boundaries) => { + let ranges = F.mapIndexed( + (to, i, list) => F.compactObject({ from: list[i - 1], to }), + boundaries + ) + return [...ranges, { from: _.last(ranges)?.to }] +} let drilldownToRange = (drilldown) => { let [gte, lt] = _.split('-', drilldown) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index 5abd5d163..7fb5a9221 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -95,19 +95,16 @@ let getFieldSubFields = (field) => /** * Returns object of all subfields in a schema. */ -let getSchemaSubFields = _.memoize( - (schema) => - F.reduceIndexed( - (acc, field, path) => { - let subFields = getFieldSubFields(field) - for (let k in subFields) acc[`${path}.${k}`] = subFields[k] - return acc - }, - {}, - schema.fields - ), - _.get('elasticsearch.index') -) +let getSchemaSubFields = _.memoize((schema) => { + let acc = {} + for (let path in schema.fields) { + let subFields = getFieldSubFields(schema.fields[path]) + for (let k in subFields) { + acc[`${path}.${k}`] = subFields[k] + } + } + return acc +}, _.get('elasticsearch.index')) /** * Returns object of all group fields and their subfields in a schema. @@ -148,13 +145,14 @@ export let getAllHighlightFields = _.memoize((schema) => { }) }, _.get('elasticsearch.index')) -let collectKeysAndValues = (f, coll) => - F.reduceTree()( - (acc, val, key) => - f(val) ? F.pushOn(acc, val) : f(key) ? F.pushOn(acc, key) : acc, - [], - coll - ) +let collectKeysAndValues = (predicate, coll) => { + let acc = [] + F.walk()((val, key) => { + if (predicate(val)) acc.push(val) + else if (predicate(key)) acc.push(key) + })(coll) + return acc +} let blobConfiguration = { fragment_size: 250,