forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lunr-search-index.js
executable file
·114 lines (97 loc) · 3 KB
/
lunr-search-index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env node
import { fileURLToPath } from 'url'
import path from 'path'
import lunr from 'lunr'
import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js'
import tinyseg from 'lunr-languages/tinyseg.js'
import lunrJa from 'lunr-languages/lunr.ja.js'
import lunrEs from 'lunr-languages/lunr.es.js'
import lunrPt from 'lunr-languages/lunr.pt.js'
import fs from 'fs/promises'
import validateRecords from './validate-records.js'
import { compress } from '../../lib/search/compress.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
lunrStemmerSupport(lunr)
tinyseg(lunr)
lunrJa(lunr)
lunrEs(lunr)
lunrPt(lunr)
export default class LunrIndex {
constructor(name, records) {
this.name = name
// Add custom rankings
this.records = records.map((record) => {
return record
})
this.validate()
return this
}
validate() {
return validateRecords(this.name, this.records)
}
build() {
const language = this.name.split('-').pop()
const records = this.records
this.index = lunr(function constructIndex() {
// No arrow here!
if (['ja', 'es', 'pt'].includes(language)) {
this.use(lunr[language])
}
// By default Lunr considers the `-` character to be a word boundary.
// This allows hyphens to be included in the search index.
// If you change this, remember to make it match the indexing separator
// in lib/search/lunr-search.js so the query is tokenized
// identically to the way it was indexed.
this.tokenizer.separator = /[\s]+/
this.ref('objectID')
this.field('url')
this.field('breadcrumbs')
this.field('headings', { boost: 3 })
this.field('title', { boost: 5 })
this.field('content')
this.field('topics')
this.metadataWhitelist = ['position']
for (const record of records) {
this.add(record)
}
})
}
toJSON() {
this.build()
return JSON.stringify(this.index, null, 2)
}
get recordsObject() {
return Object.fromEntries(this.records.map((record) => [record.objectID, record]))
}
async write({
outDirectory = path.posix.join(__dirname, '../../lib/search/indexes'),
compressFiles = true,
}) {
this.build()
// Write the parsed records
await Promise.resolve(this.recordsObject)
.then(JSON.stringify)
.then((str) => (compressFiles ? compress(str) : str))
.then((content) =>
fs.writeFile(
path.join(
outDirectory,
compressFiles ? `${this.name}-records.json.br` : `${this.name}-records.json`
),
content
// Do not set to 'utf8'
)
)
// Write the index
await Promise.resolve(this.index)
.then(JSON.stringify)
.then((str) => (compressFiles ? compress(str) : str))
.then((content) =>
fs.writeFile(
path.join(outDirectory, compressFiles ? `${this.name}.json.br` : `${this.name}.json`),
content
// Do not set to 'utf8'
)
)
}
}