From 8cd08f4f02de72f6c02cc07ec1e1ab3103081dd4 Mon Sep 17 00:00:00 2001 From: pengzhanbo Date: Thu, 12 Feb 2026 00:59:35 +0800 Subject: [PATCH] feat(plugin-search): add language-based text segmentation support for search (#838) --- .../src/client/components/SearchBox.vue | 18 +++++++++++++----- .../src/node/prepareSearchIndex.ts | 12 +++++++++--- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/plugins/plugin-search/src/client/components/SearchBox.vue b/plugins/plugin-search/src/client/components/SearchBox.vue index 0c8ec01e..d9abd519 100644 --- a/plugins/plugin-search/src/client/components/SearchBox.vue +++ b/plugins/plugin-search/src/client/components/SearchBox.vue @@ -25,7 +25,7 @@ import { toRef, watch, } from 'vue' -import { useRouteLocale, useRouter, withBase } from 'vuepress/client' +import { usePageLang, useRouteLocale, useRouter, withBase } from 'vuepress/client' import { useLocale, useSearchIndex } from '../composables/index.js' import { LRUCache } from '../utils/index.js' import BackIcon from './icons/BackIcon.vue' @@ -43,6 +43,7 @@ const emit = defineEmits<{ const routeLocale = useRouteLocale() const locale = useLocale(toRef(() => locales)) +const lang = usePageLang() const el = shallowRef() const resultsEl = shallowRef() @@ -59,8 +60,14 @@ const { activate } = useFocusTrap(el, { immediate: true, }) -const searchIndex = computedAsync(async () => - markRaw( +const searchIndex = computedAsync(async () => { + let tokenize: ((str: string) => string[]) | undefined + if (typeof Intl.Segmenter !== 'undefined') { + const segmenter = new Intl.Segmenter(lang.value, { granularity: 'word' }) + tokenize = str => Array.from(segmenter.segment(str)).map(s => s.segment) + } + + return markRaw( MiniSearch.loadJSON( (await searchIndexData.value[routeLocale.value]?.())?.default, { @@ -70,13 +77,14 @@ const searchIndex = computedAsync(async () => fuzzy: 0.2, prefix: true, boost: { title: 4, text: 2, titles: 1 }, + tokenize, }, ...options.miniSearch?.searchOptions, ...options.miniSearch?.options, }, ), - ), -) + ) +}) const disableQueryPersistence = computed(() => options?.disableQueryPersistence === true, diff --git a/plugins/plugin-search/src/node/prepareSearchIndex.ts b/plugins/plugin-search/src/node/prepareSearchIndex.ts index 44964dcc..3042e3bc 100644 --- a/plugins/plugin-search/src/node/prepareSearchIndex.ts +++ b/plugins/plugin-search/src/node/prepareSearchIndex.ts @@ -21,12 +21,16 @@ const SEARCH_INDEX_DIR = 'internal/minisearchIndex/' const indexByLocales = new Map>() const indexCache = new Map() -function getIndexByLocale(locale: string, options: SearchIndexOptions['searchOptions']) { +function getIndexByLocale(locale: string, lang: string, options: SearchIndexOptions['searchOptions']) { + const segmenter = new Intl.Segmenter(lang, { granularity: 'word' }) let index = indexByLocales.get(locale) if (!index) { index = new MiniSearch({ fields: ['title', 'titles', 'text'], storeFields: ['title', 'titles'], + tokenize(text) { + return Array.from(segmenter.segment(text)).map(s => s.segment) + }, ...options.miniSearch?.options, }) indexByLocales.set(locale, index) @@ -93,7 +97,8 @@ export async function onSearchIndexRemoved( const page = app.pages.find(p => p.filePathRelative?.endsWith(filepath))! const fileId = page.path const locale = page.pathLocale - const index = getIndexByLocale(locale, searchOptions) + const lang = page.lang + const index = getIndexByLocale(locale, lang, searchOptions) const cache = getIndexCache(fileId) if (cache && cache.length) index.removeAll(cache) @@ -131,7 +136,8 @@ async function indexFile(page: Page, options: SearchIndexOptions['searchOptions' // get file metadata const fileId = page.path const locale = page.pathLocale - const index = getIndexByLocale(locale, options) + const lang = page.lang + const index = getIndexByLocale(locale, lang, options) const cache = getIndexCache(fileId) // retrieve file and split into "sections" const html = `

${page.frontmatter.title || page.title}