feat(plugin-search): add language-based text segmentation support for search (#838)

This commit is contained in:
pengzhanbo 2026-02-12 00:59:35 +08:00 committed by GitHub
parent 32e4f92c61
commit 8cd08f4f02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 8 deletions

View File

@ -25,7 +25,7 @@ import {
toRef,
watch,
} from 'vue'
import { useRouteLocale, useRouter, withBase } from 'vuepress/client'
import { usePageLang, useRouteLocale, useRouter, withBase } from 'vuepress/client'
import { useLocale, useSearchIndex } from '../composables/index.js'
import { LRUCache } from '../utils/index.js'
import BackIcon from './icons/BackIcon.vue'
@ -43,6 +43,7 @@ const emit = defineEmits<{
const routeLocale = useRouteLocale()
const locale = useLocale(toRef(() => locales))
const lang = usePageLang()
const el = shallowRef<HTMLElement>()
const resultsEl = shallowRef<HTMLElement>()
@ -59,8 +60,14 @@ const { activate } = useFocusTrap(el, {
immediate: true,
})
const searchIndex = computedAsync(async () =>
markRaw(
const searchIndex = computedAsync(async () => {
let tokenize: ((str: string) => string[]) | undefined
if (typeof Intl.Segmenter !== 'undefined') {
const segmenter = new Intl.Segmenter(lang.value, { granularity: 'word' })
tokenize = str => Array.from(segmenter.segment(str)).map(s => s.segment)
}
return markRaw(
MiniSearch.loadJSON<Result>(
(await searchIndexData.value[routeLocale.value]?.())?.default,
{
@ -70,13 +77,14 @@ const searchIndex = computedAsync(async () =>
fuzzy: 0.2,
prefix: true,
boost: { title: 4, text: 2, titles: 1 },
tokenize,
},
...options.miniSearch?.searchOptions,
...options.miniSearch?.options,
},
),
),
)
})
const disableQueryPersistence = computed(() =>
options?.disableQueryPersistence === true,

View File

@ -21,12 +21,16 @@ const SEARCH_INDEX_DIR = 'internal/minisearchIndex/'
const indexByLocales = new Map<string, MiniSearch<IndexObject>>()
const indexCache = new Map<string, IndexObject[]>()
function getIndexByLocale(locale: string, options: SearchIndexOptions['searchOptions']) {
function getIndexByLocale(locale: string, lang: string, options: SearchIndexOptions['searchOptions']) {
const segmenter = new Intl.Segmenter(lang, { granularity: 'word' })
let index = indexByLocales.get(locale)
if (!index) {
index = new MiniSearch<IndexObject>({
fields: ['title', 'titles', 'text'],
storeFields: ['title', 'titles'],
tokenize(text) {
return Array.from(segmenter.segment(text)).map(s => s.segment)
},
...options.miniSearch?.options,
})
indexByLocales.set(locale, index)
@ -93,7 +97,8 @@ export async function onSearchIndexRemoved(
const page = app.pages.find(p => p.filePathRelative?.endsWith(filepath))!
const fileId = page.path
const locale = page.pathLocale
const index = getIndexByLocale(locale, searchOptions)
const lang = page.lang
const index = getIndexByLocale(locale, lang, searchOptions)
const cache = getIndexCache(fileId)
if (cache && cache.length)
index.removeAll(cache)
@ -131,7 +136,8 @@ async function indexFile(page: Page, options: SearchIndexOptions['searchOptions'
// get file metadata
const fileId = page.path
const locale = page.pathLocale
const index = getIndexByLocale(locale, options)
const lang = page.lang
const index = getIndexByLocale(locale, lang, options)
const cache = getIndexCache(fileId)
// retrieve file and split into "sections"
const html = `<h1><a href="#"><span>${page.frontmatter.title || page.title}</span></a></h1>