This is an automated email from the ASF dual-hosted git repository.
jiafengzheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push:
new d165736b83b Change static files to CDN (#37)
d165736b83b is described below
commit d165736b83bc83fa655b8bf7b4f31558079e62d4
Author: song7788q <[email protected]>
AuthorDate: Wed Aug 10 17:53:42 2022 +0800
Change static files to CDN (#37)
Change static files to CDN
---
config/custom-docusaurus-plugin.js | 18 ++
config/ssrTemplate.js | 33 +++
docusaurus.config.js | 36 +--
package.json | 2 +-
src/scss/components/search.scss | 18 +-
src/theme/LoadingRing/LoadingRing.module.css | 47 ++++
src/theme/LoadingRing/LoadingRing.tsx | 19 ++
src/theme/SearchBar/EmptyTemplate.js | 12 +
src/theme/SearchBar/SearchBar.jsx | 256 ++++++++++++++++++
src/theme/SearchBar/SearchBar.module.css | 256 ++++++++++++++++++
src/theme/SearchBar/SuggestionTemplate.js | 49 ++++
src/theme/SearchBar/fetchIndexes.js | 30 +++
src/theme/SearchBar/icons.js | 7 +
src/theme/SearchBar/index.js | 3 +
src/utils/SearchSourceFactory.spec.ts | 100 ++++++++
src/utils/SearchSourceFactory.ts | 83 ++++++
src/utils/__mocks__/proxiedGenerated.ts | 21 ++
src/utils/concatDocumentPath.ts | 3 +
src/utils/cutZhWords.spec.ts | 42 +++
src/utils/cutZhWords.ts | 89 +++++++
src/utils/escapeHtml.ts | 15 ++
src/utils/getStemmedPositions.spec.ts | 41 +++
src/utils/getStemmedPositions.ts | 17 ++
src/utils/highlight.spec.ts | 31 +++
src/utils/highlight.ts | 43 ++++
src/utils/highlightStemmed.spec.ts | 165 ++++++++++++
src/utils/highlightStemmed.ts | 124 +++++++++
src/utils/looseTokenize.spec.ts | 9 +
src/utils/looseTokenize.ts | 22 ++
src/utils/processTreeStatusOfSearchResults.spec.ts | 83 ++++++
src/utils/processTreeStatusOfSearchResults.ts | 19 ++
src/utils/proxiedGenerated.ts | 2 +
src/utils/smartQueries.spec.ts | 285 +++++++++++++++++++++
src/utils/smartQueries.ts | 131 ++++++++++
src/utils/smartTerms.spec.ts | 35 +++
src/utils/smartTerms.ts | 42 +++
src/utils/sortSearchResults.spec.ts | 73 ++++++
src/utils/sortSearchResults.ts | 40 +++
src/utils/tokenize.spec.ts | 40 +++
src/utils/tokenize.ts | 32 +++
40 files changed, 2349 insertions(+), 24 deletions(-)
diff --git a/config/custom-docusaurus-plugin.js
b/config/custom-docusaurus-plugin.js
new file mode 100644
index 00000000000..5ae9f47440e
--- /dev/null
+++ b/config/custom-docusaurus-plugin.js
@@ -0,0 +1,18 @@
+const path = require('path');
+
+module.exports = function (context, options) {
+ return {
+ name: 'custom-docusaurus-plugin',
+ configureWebpack(config, isServer, utils) {
+ return {
+ output: {
+ ...config.output,
+ publicPath:
+ context.i18n.currentLocale === 'en'
+ ? 'https://cdn.selectdb.com/'
+ : 'https://cdn.selectdb.com/zh-CN/',
+ },
+ };
+ },
+ };
+};
diff --git a/config/ssrTemplate.js b/config/ssrTemplate.js
new file mode 100644
index 00000000000..e2c84109b46
--- /dev/null
+++ b/config/ssrTemplate.js
@@ -0,0 +1,33 @@
+module.exports = {
+ ssrTemplate: `<!DOCTYPE html>
+<html <%~ it.htmlAttributes %>>
+ <head>
+ <meta charset="UTF-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0,
minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
+ <meta name="generator" content="Docusaurus v<%= it.version %>">
+ <% if (it.noIndex) { %>
+ <meta name="robots" content="noindex, nofollow" />
+ <% } %>
+ <%~ it.headTags %>
+ <% it.metaAttributes.forEach((metaAttribute) => { %>
+ <%~ metaAttribute %>
+ <% }); %>
+ <% it.stylesheets.forEach((stylesheet) => { %>
+ <link rel="stylesheet" href="<%= 'https://cdn.selectdb.com' %><%=
it.baseUrl %><%= stylesheet %>" />
+ <% }); %>
+ <% it.scripts.forEach((script) => { %>
+ <link rel="preload" href="<%= 'https://cdn.selectdb.com' %><%=
it.baseUrl %><%= script %>" as="script">
+ <% }); %>
+ </head>
+ <body <%~ it.bodyAttributes %>>
+ <%~ it.preBodyTags %>
+ <div id="__docusaurus">
+ <%~ it.appHtml %>
+ </div>
+ <% it.scripts.forEach((script) => { %>
+ <script src="<%= 'https://cdn.selectdb.com' %><%= it.baseUrl %><%=
script %>"></script>
+ <% }); %>
+ <%~ it.postBodyTags %>
+ </body>
+</html>`,
+};
diff --git a/docusaurus.config.js b/docusaurus.config.js
index 9027a9d45a5..0e56902e32b 100644
--- a/docusaurus.config.js
+++ b/docusaurus.config.js
@@ -4,6 +4,8 @@
const versions = require('./versions.json');
const lightCodeTheme = require('prism-react-renderer/themes/github');
const showAllVersions = true;
+const { ssrTemplate } = require('./config/ssrTemplate');
+const customDocusaurusPlugin = require('./config/custom-docusaurus-plugin');
/** @type {import('@docusaurus/types').Config} */
const config = {
@@ -46,6 +48,7 @@ const config = {
sidebarPath: require.resolve('./sidebarsCommunity.json'),
}),
],
+ process.env.NODE_ENV === 'development' ? null : customDocusaurusPlugin,
[
'@docusaurus/plugin-pwa',
{
@@ -114,7 +117,8 @@ const config = {
lastVersion: 'current',
versions: {
current: {
- label: '1,1',
+ banner: 'none',
+ label: '1.1',
path: '',
},
'1.0': {
@@ -156,7 +160,6 @@ const config = {
highlightSearchTermsOnTargetPage: true,
// indexPages: true,
indexDocs: true,
- docsDir: ['docs', 'community'],
indexBlog: false,
explicitSearchResultPath: true,
},
@@ -169,7 +172,7 @@ const config = {
title: '',
logo: {
alt: 'Doris',
- src: 'images/logo.svg',
+ src: 'https://cdn.selectdb.com/images/logo.svg',
},
items: [
{ to: '/', label: 'Home', position: 'left', exact: true },
@@ -197,12 +200,12 @@ const config = {
type: 'localeDropdown',
position: 'right',
},
- // {
- // href: "https://github.com/apache/doris",
- // className: "header-right-button-github",
- // position: "right",
- // label: "GitHub",
- // },
+ // {
+ // href: 'https://github.com/apache/doris',
+ // className: 'header-right-button-github',
+ // position: 'right',
+ // label: 'GitHub',
+ // },
{
href: '/download',
className: 'header-right-button-primary
navbar-download-mobile',
@@ -274,14 +277,15 @@ const config = {
colorMode: {
disableSwitch: true,
},
- metadata: [
- {
- name: 'viewport',
- content:
- 'width=device-width, initial-scale=1.0,
minimum-scale=1.0, maximum-scale=1.0, user-scalable=no',
- },
- ],
+ // metadata: [
+ // {
+ // name: 'viewport',
+ // content:
+ // 'width=device-width, initial-scale=1.0,
minimum-scale=1.0, maximum-scale=1.0, user-scalable=no',
+ // },
+ // ],
}),
+ ssrTemplate,
};
module.exports = config;
diff --git a/package.json b/package.json
index d27458a2840..5d95b1ee807 100644
--- a/package.json
+++ b/package.json
@@ -20,7 +20,7 @@
"@docusaurus/core": "2.0.0-beta.21",
"@docusaurus/plugin-pwa": "2.0.0-beta.21",
"@docusaurus/preset-classic": "2.0.0-beta.21",
- "@easyops-cn/docusaurus-search-local": "^0.28.0",
+ "@easyops-cn/docusaurus-search-local": "^0.30.2",
"@mdx-js/react": "^1.6.22",
"clsx": "^1.1.1",
"docusaurus-plugin-sass": "^0.2.2",
diff --git a/src/scss/components/search.scss b/src/scss/components/search.scss
index 3aa94b58322..401fc36fe21 100644
--- a/src/scss/components/search.scss
+++ b/src/scss/components/search.scss
@@ -1,7 +1,7 @@
-.dropdownMenu_qbY6 {
+.dropdownMenu_jUzS {
padding: 0;
- .suggestion_fB_2 {
+ .suggestion_HjS8 {
padding: 0 1.5rem;
&.cursor_eG29 {
@@ -12,20 +12,20 @@
}
}
- .hitIcon_a7Zy {
+ .hitIcon_fVnR {
display: none;
}
- .hitTitle_vyVt {
+ .hitTitle_LImS {
font-size: var(--global-font-size-medium);
}
- .hitPath_ieM4 {
+ .hitPath_zaD7 {
font-size: var(--global-font-size-small);
color: rgba(35, 45, 62, 0.7);
}
- .hitAction_NqkB {
+ .hitAction__La6 {
svg {
display: none;
@@ -42,9 +42,13 @@
}
}
- .hitFooter_E9YW a {
+ .hitFooter_QvWT a {
text-decoration: none;
color: rgba(35, 45, 62, 0.6);
+
+ &:hover {
+ color: var(--ifm-color-primary);
+ }
}
}
diff --git a/src/theme/LoadingRing/LoadingRing.module.css
b/src/theme/LoadingRing/LoadingRing.module.css
new file mode 100644
index 00000000000..2e569824e25
--- /dev/null
+++ b/src/theme/LoadingRing/LoadingRing.module.css
@@ -0,0 +1,47 @@
+/* https://loading.io/css/ */
+.loadingRing {
+ display: inline-block;
+ position: relative;
+ width: 20px;
+ height: 20px;
+ opacity: var(--search-local-loading-icon-opacity, 0.5);
+}
+
+.loadingRing div {
+ box-sizing: border-box;
+ display: block;
+ position: absolute;
+ width: 16px;
+ height: 16px;
+ margin: 2px;
+ border: 2px solid
+ var(--search-load-loading-icon-color,
var(--ifm-navbar-search-input-color));
+ border-radius: 50%;
+ animation: loading-ring 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite;
+ border-color: var(
+ --search-load-loading-icon-color,
+ var(--ifm-navbar-search-input-color)
+ )
+ transparent transparent transparent;
+}
+
+.loadingRing div:nth-child(1) {
+ animation-delay: -0.45s;
+}
+
+.loadingRing div:nth-child(2) {
+ animation-delay: -0.3s;
+}
+
+.loadingRing div:nth-child(3) {
+ animation-delay: -0.15s;
+}
+
+@keyframes loading-ring {
+ 0% {
+ transform: rotate(0deg);
+ }
+ 100% {
+ transform: rotate(360deg);
+ }
+}
diff --git a/src/theme/LoadingRing/LoadingRing.tsx
b/src/theme/LoadingRing/LoadingRing.tsx
new file mode 100644
index 00000000000..166a728a12e
--- /dev/null
+++ b/src/theme/LoadingRing/LoadingRing.tsx
@@ -0,0 +1,19 @@
+// istanbul ignore file
+import React from "react";
+import clsx from "clsx";
+import styles from "./LoadingRing.module.css";
+
+export default function LoadingRing({
+ className,
+}: {
+ className?: string;
+}): React.ReactElement {
+ return (
+ <div className={clsx(styles.loadingRing, className)}>
+ <div></div>
+ <div></div>
+ <div></div>
+ <div></div>
+ </div>
+ );
+}
diff --git a/src/theme/SearchBar/EmptyTemplate.js
b/src/theme/SearchBar/EmptyTemplate.js
new file mode 100644
index 00000000000..0c67ba65f42
--- /dev/null
+++ b/src/theme/SearchBar/EmptyTemplate.js
@@ -0,0 +1,12 @@
+import { translate } from "@docusaurus/Translate";
+import { iconNoResults } from "./icons";
+import styles from "./SearchBar.module.css";
+export function EmptyTemplate() {
+ if (process.env.NODE_ENV === "production") {
+ return `<span class="${styles.noResults}"><span
class="${styles.noResultsIcon}">${iconNoResults}</span><span>${translate({
+ id: "theme.SearchBar.noResultsText",
+ message: "No results",
+ })}</span></span>`;
+ }
+ return `<span class="${styles.noResults}">⚠️ The search index is only
available when you run docusaurus build!</span>`;
+}
diff --git a/src/theme/SearchBar/SearchBar.jsx
b/src/theme/SearchBar/SearchBar.jsx
new file mode 100644
index 00000000000..dcef94e6309
--- /dev/null
+++ b/src/theme/SearchBar/SearchBar.jsx
@@ -0,0 +1,256 @@
+import React, { useCallback, useEffect, useRef, useState, } from "react";
+import clsx from "clsx";
+import useDocusaurusContext from "@docusaurus/useDocusaurusContext";
+import ExecutionEnvironment from "@docusaurus/ExecutionEnvironment";
+import { useHistory, useLocation } from "@docusaurus/router";
+import { translate } from "@docusaurus/Translate";
+import { ReactContextError, useDocsPreferredVersion, } from
"@docusaurus/theme-common";
+import { useActivePlugin } from "@docusaurus/plugin-content-docs/client";
+import { fetchIndexes } from "./fetchIndexes";
+import { SearchSourceFactory } from "../../utils/SearchSourceFactory";
+import { SuggestionTemplate } from "./SuggestionTemplate";
+import { EmptyTemplate } from "./EmptyTemplate";
+import { searchResultLimits, Mark, searchBarShortcut, searchBarShortcutHint,
docsPluginIdForPreferredVersion, indexDocs, } from
"../../utils/proxiedGenerated";
+import LoadingRing from "../LoadingRing/LoadingRing";
+import styles from "./SearchBar.module.css";
+async function fetchAutoCompleteJS() {
+ const autoCompleteModule = await import("@easyops-cn/autocomplete.js");
+ const autoComplete = autoCompleteModule.default;
+ if (autoComplete.noConflict) {
+ // For webpack v5 since docusaurus v2.0.0-alpha.75
+ autoComplete.noConflict();
+ }
+ else if (autoCompleteModule.noConflict) {
+ // For webpack v4 before docusaurus v2.0.0-alpha.74
+ autoCompleteModule.noConflict();
+ }
+ return autoComplete;
+}
+const SEARCH_PARAM_HIGHLIGHT = "_highlight";
+export default function SearchBar({ handleSearchBarToggle, }) {
+ const { siteConfig: { baseUrl }, } = useDocusaurusContext();
+ // It returns undefined for non-docs pages
+ const activePlugin = useActivePlugin();
+ let versionUrl = baseUrl;
+ // For non-docs pages while using plugin-content-docs with custom ids,
+ // this will throw an error of:
+ // > Docusaurus plugin global data not found for
"docusaurus-plugin-content-docs" plugin with id "default".
+ // It seems that we can not get the correct id for non-docs pages.
+ try {
+ // The try-catch is a hack because useDocsPreferredVersion just throws
an
+ // exception when versions are not used.
+ // The same hack is used in SearchPage.tsx
+ // eslint-disable-next-line react-hooks/rules-of-hooks
+ const { preferredVersion } =
useDocsPreferredVersion(activePlugin?.pluginId ??
docsPluginIdForPreferredVersion);
+ if (preferredVersion && !preferredVersion.isLast) {
+ versionUrl = preferredVersion.path + "/";
+ }
+ }
+ catch (e) {
+ if (indexDocs) {
+ if (e instanceof ReactContextError) {
+ /* ignore, happens when website doesn't use versions */
+ }
+ else {
+ throw e;
+ }
+ }
+ }
+ const history = useHistory();
+ const location = useLocation();
+ const searchBarRef = useRef(null);
+ const indexState = useRef("empty"); // empty, loaded, done
+ // Should the input be focused after the index is loaded?
+ const focusAfterIndexLoaded = useRef(false);
+ const [loading, setLoading] = useState(false);
+ const [inputChanged, setInputChanged] = useState(false);
+ const [inputValue, setInputValue] = useState("");
+ const search = useRef(null);
+ const loadIndex = useCallback(async () => {
+ if (indexState.current !== "empty") {
+ // Do not load the index (again) if its already loaded or in the
process of being loaded.
+ return;
+ }
+ indexState.current = "loading";
+ setLoading(true);
+ const [{ wrappedIndexes, zhDictionary }, autoComplete] = await
Promise.all([
+ fetchIndexes(versionUrl),
+ fetchAutoCompleteJS(),
+ ]);
+ search.current = autoComplete(searchBarRef.current, {
+ hint: false,
+ autoselect: true,
+ openOnFocus: true,
+ cssClasses: {
+ root: styles.searchBar,
+ noPrefix: true,
+ dropdownMenu: styles.dropdownMenu,
+ input: styles.input,
+ hint: styles.hint,
+ suggestions: styles.suggestions,
+ suggestion: styles.suggestion,
+ cursor: styles.cursor,
+ dataset: styles.dataset,
+ empty: styles.empty,
+ },
+ }, [
+ {
+ source: SearchSourceFactory(wrappedIndexes, zhDictionary,
searchResultLimits),
+ templates: {
+ suggestion: SuggestionTemplate,
+ empty: EmptyTemplate,
+ footer: ({ query, isEmpty }) => {
+ if (isEmpty) {
+ return;
+ }
+ const a = document.createElement("a");
+ const url =
`${baseUrl}search?q=${encodeURIComponent(query)}`;
+ a.href = url;
+ a.textContent = translate({
+ id: "theme.SearchBar.seeAll",
+ message: "See all results",
+ });
+ a.addEventListener("click", (e) => {
+ if (!e.ctrlKey && !e.metaKey) {
+ e.preventDefault();
+ search.current.autocomplete.close();
+ history.push(url);
+ }
+ });
+ const div = document.createElement("div");
+ div.className = styles.hitFooter;
+ div.appendChild(a);
+ return div;
+ },
+ },
+ },
+ ])
+ .on("autocomplete:selected", function (event, { document: { u, h
}, tokens }) {
+ searchBarRef.current?.blur();
+ let url = u;
+ if (Mark && tokens.length > 0) {
+ const params = new URLSearchParams();
+ for (const token of tokens) {
+ params.append(SEARCH_PARAM_HIGHLIGHT, token);
+ }
+ url += `?${params.toString()}`;
+ }
+ if (h) {
+ url += h;
+ }
+ history.push(url);
+ })
+ .on("autocomplete:closed", () => {
+ searchBarRef.current?.blur();
+ });
+ indexState.current = "done";
+ setLoading(false);
+ if (focusAfterIndexLoaded.current) {
+ const input = searchBarRef.current;
+ if (input.value) {
+ search.current.autocomplete.open();
+ }
+ input.focus();
+ }
+ }, [baseUrl, versionUrl, history]);
+ useEffect(() => {
+ if (!Mark) {
+ return;
+ }
+ const keywords = ExecutionEnvironment.canUseDOM
+ ? new
URLSearchParams(location.search).getAll(SEARCH_PARAM_HIGHLIGHT)
+ : [];
+ // A workaround to fix an issue of highlighting in code blocks.
+ // See https://github.com/easyops-cn/docusaurus-search-local/issues/92
+ // Code blocks will be re-rendered after this `useEffect` ran.
+ // So we make the marking run after a macro task.
+ setTimeout(() => {
+ const root = document.querySelector("article");
+ if (!root) {
+ return;
+ }
+ const mark = new Mark(root);
+ mark.unmark();
+ if (keywords.length !== 0) {
+ mark.mark(keywords);
+ }
+ // Apply any keywords to the search input so that we can clear
marks in case we loaded a page with a highlight in the url
+ setInputValue(keywords.join(" "));
+ search.current?.autocomplete.setVal(keywords.join(" "));
+ });
+ }, [location.search, location.pathname]);
+ const [focused, setFocused] = useState(false);
+ const onInputFocus = useCallback(() => {
+ focusAfterIndexLoaded.current = true;
+ loadIndex();
+ setFocused(true);
+ handleSearchBarToggle?.(true);
+ }, [handleSearchBarToggle, loadIndex]);
+ const onInputBlur = useCallback(() => {
+ setFocused(false);
+ handleSearchBarToggle?.(false);
+ }, [handleSearchBarToggle]);
+ const onInputMouseEnter = useCallback(() => {
+ loadIndex();
+ }, [loadIndex]);
+ const onInputChange = useCallback((event) => {
+ setInputValue(event.target.value);
+ if (event.target.value) {
+ setInputChanged(true);
+ }
+ }, []);
+ // Implement hint icons for the search shortcuts on mac and the rest
operating systems.
+ const isMac = ExecutionEnvironment.canUseDOM
+ ? /mac/i.test(navigator.userAgentData?.platform ?? navigator.platform)
+ : false;
+ useEffect(() => {
+ if (!searchBarShortcut) {
+ return;
+ }
+ // Add shortcuts command/ctrl + K
+ const handleShortcut = (event) => {
+ if ((isMac ? event.metaKey : event.ctrlKey) && event.code ===
"KeyK") {
+ event.preventDefault();
+ searchBarRef.current?.focus();
+ onInputFocus();
+ }
+ };
+ document.addEventListener("keydown", handleShortcut);
+ return () => {
+ document.removeEventListener("keydown", handleShortcut);
+ };
+ }, [isMac, onInputFocus]);
+ const onClearSearch = useCallback(() => {
+ const params = new URLSearchParams(location.search);
+ params.delete(SEARCH_PARAM_HIGHLIGHT);
+ const paramsStr = params.toString();
+ const searchUrl = location.pathname +
+ (paramsStr != "" ? `?${paramsStr}` : "") +
+ location.hash;
+ if (searchUrl != location.pathname + location.search + location.hash) {
+ history.push(searchUrl);
+ }
+ // We always clear these here because in case no match was selected
the above history push wont happen
+ setInputValue("");
+ search.current?.autocomplete.setVal("");
+ }, [location.pathname, location.search, location.hash, history]);
+ return (<div className={clsx("navbar__search", styles.searchBarContainer, {
+ [styles.searchIndexLoading]: loading && inputChanged,
+ [styles.focused]: focused,
+ })}>
+ <input placeholder={translate({
+ id: "theme.SearchBar.label",
+ message: "Search",
+ description: "The ARIA label and placeholder for search button",
+ })} aria-label="Search" className="navbar__search-input"
onMouseEnter={onInputMouseEnter} onFocus={onInputFocus} onBlur={onInputBlur}
onChange={onInputChange} ref={searchBarRef} value={inputValue}/>
+ <LoadingRing className={styles.searchBarLoadingRing}/>
+ {searchBarShortcut &&
+ searchBarShortcutHint &&
+ (inputValue !== "" ? (<button className={styles.searchClearButton}
onClick={onClearSearch}>
+ ✕
+ </button>) : (<div className={styles.searchHintContainer}>
+ <kbd className={styles.searchHint}>{isMac ? "⌘" : "ctrl"}</kbd>
+ <kbd className={styles.searchHint}>K</kbd>
+ </div>))}
+ </div>);
+}
diff --git a/src/theme/SearchBar/SearchBar.module.css
b/src/theme/SearchBar/SearchBar.module.css
new file mode 100644
index 00000000000..1777c864ed6
--- /dev/null
+++ b/src/theme/SearchBar/SearchBar.module.css
@@ -0,0 +1,256 @@
+.searchBar .dropdownMenu {
+ left: auto !important;
+ right: 0 !important;
+
+ background: var(--search-local-modal-background, #f5f6f7);
+ border-radius: 6px;
+ box-shadow: var(
+ --search-local-modal-shadow,
+ inset 1px 1px 0 0 hsla(0, 0%, 100%, 0.5),
+ 0 3px 8px 0 #555a64
+ );
+ margin-top: 8px;
+ width: var(--search-local-modal-width, 560px);
+ position: relative;
+
+ padding: var(--search-local-spacing, 12px);
+}
+
+@media (max-width: 576px) {
+ :global(.navbar__search-input):not(:focus) {
+ width: 2rem;
+ }
+
+ .searchBar .dropdownMenu {
+ width: var(--search-local-modal-width-sm, 340px);
+ max-width: calc(100vw - var(--ifm-navbar-padding-horizontal) * 2);
+ }
+}
+
+html[data-theme="dark"] .searchBar .dropdownMenu {
+ background: var(--search-local-modal-background,
var(--ifm-background-color));
+ box-shadow: var(
+ --search-local-modal-shadow,
+ inset 1px 1px 0 0 #2c2e40,
+ 0 3px 8px 0 #000309
+ );
+}
+
+.searchBar .dropdownMenu .suggestion {
+ cursor: pointer;
+ background: var(--search-local-hit-background, #fff);
+ border-radius: 4px;
+ box-shadow: var(--search-local-hit-shadow, 0 1px 3px 0 #d4d9e1);
+ padding: 0 var(--search-local-spacing, 12px);
+ width: 100%;
+
+ align-items: center;
+ color: var(--search-local-hit-color, #444950);
+ display: flex;
+ flex-direction: row;
+ height: var(--search-local-hit-height, 56px);
+}
+
+html[data-theme="dark"] .dropdownMenu .suggestion {
+ background: var(--search-local-hit-background,
var(--ifm-color-emphasis-100));
+ box-shadow: var(--search-local-hit-shadow, none);
+ color: var(--search-local-hit-color, var(--ifm-font-color-base));
+}
+
+.searchBar .dropdownMenu .suggestion:not(:last-child) {
+ margin-bottom: 4px;
+}
+
+.searchBar .dropdownMenu .suggestion.cursor {
+ background-color: var(
+ --search-local-highlight-color,
+ var(--ifm-color-primary)
+ );
+}
+
+.hitTree,
+.hitIcon,
+.hitPath,
+.noResultsIcon,
+.hitFooter a {
+ color: var(--search-local-muted-color, #969faf);
+}
+
+html[data-theme="dark"] .hitTree,
+html[data-theme="dark"] .hitIcon,
+html[data-theme="dark"] .hitPath,
+html[data-theme="dark"] .noResultsIcon {
+ color: var(--search-local-muted-color, var(--ifm-color-secondary-darkest));
+}
+
+.hitTree {
+ display: flex;
+ align-items: center;
+}
+
+.hitTree > svg {
+ height: var(--search-local-hit-height, 56px);
+ opacity: 0.5;
+ stroke-width: var(--search-local-icon-stroke-width, 1.4);
+ width: 24px;
+}
+
+.hitIcon {
+ stroke-width: var(--search-local-icon-stroke-width, 1.4);
+
+ height: 20px;
+ width: 20px;
+}
+
+.hitWrapper {
+ flex: 1 1 auto;
+ display: flex;
+ flex-direction: column;
+ font-weight: 500;
+ justify-content: center;
+ margin: 0 8px;
+ overflow-x: hidden;
+ width: 80%;
+}
+
+.hitWrapper mark {
+ background: none;
+ color: var(--search-local-highlight-color, var(--ifm-color-primary));
+}
+
+.hitTitle {
+ font-size: 0.9em;
+}
+
+.hitPath {
+ font-size: 0.75em;
+}
+
+.hitPath,
+.hitTitle {
+ white-space: nowrap;
+ overflow-x: hidden;
+ text-overflow: ellipsis;
+}
+
+.hitAction {
+ height: 20px;
+ width: 20px;
+}
+
+.hideAction > svg {
+ display: none;
+}
+
+.noResults {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ justify-content: center;
+ padding: var(--search-local-spacing, 12px) 0;
+}
+
+.noResultsIcon {
+ margin-bottom: var(--search-local-spacing, 12px);
+}
+
+.hitFooter {
+ text-align: center;
+ margin-top: var(--search-local-spacing, 12px);
+ font-size: 0.85em;
+}
+
+.hitFooter a {
+ text-decoration: underline;
+}
+
+.cursor .hideAction > svg {
+ display: block;
+}
+
+.suggestion.cursor,
+.suggestion.cursor mark,
+.suggestion.cursor .hitTree,
+.suggestion.cursor .hitIcon,
+.suggestion.cursor .hitPath {
+ color: var(
+ --search-local-hit-active-color,
+ var(--ifm-color-white)
+ ) !important;
+}
+
+.suggestion.cursor mark {
+ text-decoration: underline;
+}
+
+.searchBarContainer {
+ margin-left: 16px;
+}
+
+.searchBarContainer .searchBarLoadingRing {
+ display: none;
+ position: absolute;
+ left: 10px;
+ top: 6px;
+}
+
+.searchBarContainer .searchClearButton {
+ position: absolute;
+ right: 0.8rem;
+ top: 50%;
+ transform: translate(0, -50%);
+ padding: 0;
+ background: none;
+ border: none;
+ line-height: 1rem;
+}
+
+:global(.navbar__search) {
+ position: relative;
+}
+
+.searchIndexLoading :global(.navbar__search-input) {
+ background-image: none;
+}
+
+.searchBarContainer.searchIndexLoading .searchBarLoadingRing {
+ display: inline-block;
+}
+
+.searchHintContainer {
+ position: absolute;
+ right: 10px;
+ top: 0px;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ height: 100%;
+ pointer-events: none;
+ gap: 4px;
+}
+
+.searchHint {
+ color: var(--ifm-navbar-search-input-placeholder-color);
+ background-color: var(--ifm-navbar-search-input-background-color);
+ border: 1px solid var(--ifm-color-emphasis-500);
+ box-shadow: inset 0 -1px 0 var(--ifm-color-emphasis-500);
+}
+
+@media (max-width: 576px) {
+ .searchBarContainer:not(.focused) .searchClearButton,
+ .searchHintContainer {
+ display: none;
+ }
+}
+
+.input {
+}
+.hint {
+}
+.suggestions {
+}
+.dataset {
+}
+.empty {
+}
+/**/
diff --git a/src/theme/SearchBar/SuggestionTemplate.js
b/src/theme/SearchBar/SuggestionTemplate.js
new file mode 100644
index 00000000000..d11f8c76357
--- /dev/null
+++ b/src/theme/SearchBar/SuggestionTemplate.js
@@ -0,0 +1,49 @@
+import { concatDocumentPath } from "../../utils/concatDocumentPath";
+import { getStemmedPositions } from "../../utils/getStemmedPositions";
+import { highlight } from "../../utils/highlight";
+import { highlightStemmed } from "../../utils/highlightStemmed";
+import { explicitSearchResultPath } from "../../utils/proxiedGenerated";
+import { iconAction, iconContent, iconHeading, iconTitle, iconTreeInter,
iconTreeLast, } from "./icons";
+import styles from "./SearchBar.module.css";
+export function SuggestionTemplate({ document, type, page, metadata, tokens,
isInterOfTree, isLastOfTree, }) {
+ const isTitle = type === 0;
+ const isHeading = type === 1;
+ const tree = [];
+ if (isInterOfTree) {
+ tree.push(iconTreeInter);
+ }
+ else if (isLastOfTree) {
+ tree.push(iconTreeLast);
+ }
+ const treeWrapper = tree.map((item) => `<span
class="${styles.hitTree}">${item}</span>`);
+ const icon = `<span class="${styles.hitIcon}">${isTitle ? iconTitle :
isHeading ? iconHeading : iconContent}</span>`;
+ const wrapped = [
+ `<span class="${styles.hitTitle}">${highlightStemmed(document.t,
getStemmedPositions(metadata, "t"), tokens)}</span>`,
+ ];
+ const needsExplicitHitPath = !isInterOfTree && !isLastOfTree &&
explicitSearchResultPath;
+ if (needsExplicitHitPath) {
+ const pathItems = page
+ ? (page.b ?? [])
+ .concat(page.t)
+ .concat(!document.s || document.s === page.t ? [] : document.s)
+ : document.b;
+ wrapped.push(`<span
class="${styles.hitPath}">${concatDocumentPath(pathItems ?? [])}</span>`);
+ }
+ else if (!isTitle) {
+ wrapped.push(`<span class="${styles.hitPath}">${highlight(page.t ||
+ // Todo(weareoutman): This is for EasyOps only.
+ // istanbul ignore next
+ (document.u.startsWith("/docs/api-reference/")
+ ? "API Reference"
+ : ""), tokens)}</span>`);
+ }
+ const action = `<span class="${styles.hitAction}">${iconAction}</span>`;
+ return [
+ ...treeWrapper,
+ icon,
+ `<span class="${styles.hitWrapper}">`,
+ ...wrapped,
+ "</span>",
+ action,
+ ].join("");
+}
diff --git a/src/theme/SearchBar/fetchIndexes.js
b/src/theme/SearchBar/fetchIndexes.js
new file mode 100644
index 00000000000..369677190c3
--- /dev/null
+++ b/src/theme/SearchBar/fetchIndexes.js
@@ -0,0 +1,30 @@
+import lunr from 'lunr';
+import { searchIndexUrl } from '../../utils/proxiedGenerated';
+export async function fetchIndexes(baseUrl) {
+ if (process.env.NODE_ENV === 'production') {
+ // const json = await (await
fetch(`${baseUrl}${searchIndexUrl}`)).json();
+ const json = await (await
fetch(`https://cdn.selectdb.com${baseUrl}${searchIndexUrl}`)).json();
+ const wrappedIndexes = json.map(({ documents, index }, type) => ({
+ type: type,
+ documents,
+ index: lunr.Index.load(index),
+ }));
+ const zhDictionary = json.reduce((acc, item) => {
+ for (const tuple of item.index.invertedIndex) {
+ if (/\p{Unified_Ideograph}/u.test(tuple[0][0])) {
+ acc.add(tuple[0]);
+ }
+ }
+ return acc;
+ }, new Set());
+ return {
+ wrappedIndexes,
+ zhDictionary: Array.from(zhDictionary),
+ };
+ }
+ // The index does not exist in development, therefore load a dummy index
here.
+ return {
+ wrappedIndexes: [],
+ zhDictionary: [],
+ };
+}
diff --git a/src/theme/SearchBar/icons.js b/src/theme/SearchBar/icons.js
new file mode 100644
index 00000000000..d5380213e98
--- /dev/null
+++ b/src/theme/SearchBar/icons.js
@@ -0,0 +1,7 @@
+export const iconTitle = '<svg width="20" height="20" viewBox="0 0 20
20"><path d="M17 6v12c0 .52-.2 1-1 1H4c-.7 0-1-.33-1-1V2c0-.55.42-1 1-1h8l5
5zM14 8h-3.13c-.51 0-.87-.34-.87-.87V4" stroke="currentColor" fill="none"
fill-rule="evenodd" stroke-linejoin="round"></path></svg>';
+export const iconHeading = '<svg width="20" height="20" viewBox="0 0 20
20"><path d="M13 13h4-4V8H7v5h6v4-4H7V8H3h4V3v5h6V3v5h4-4v5zm-6 0v4-4H3h4z"
stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round"
stroke-linejoin="round"></path></svg>';
+export const iconContent = '<svg width="20" height="20" viewBox="0 0 20
20"><path d="M17 5H3h14zm0 5H3h14zm0 5H3h14z" stroke="currentColor" fill="none"
fill-rule="evenodd" stroke-linejoin="round"></path></svg>';
+export const iconAction = '<svg width="20" height="20" viewBox="0 0 20 20"><g
stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round"
stroke-linejoin="round"><path d="M18 3v4c0 2-2 4-4 4H2"></path><path d="M8
17l-6-6 6-6"></path></g></svg>';
+export const iconNoResults = '<svg width="40" height="40" viewBox="0 0 20 20"
fill="none" fill-rule="evenodd" stroke="currentColor" stroke-linecap="round"
stroke-linejoin="round"><path d="M15.5 4.8c2 3 1.7 7-1 9.7h0l4.3
4.3-4.3-4.3a7.8 7.8 0 01-9.8 1m-2.2-2.2A7.8 7.8 0 0113.2 2.4M2 18L18
2"></path></svg>';
+export const iconTreeInter = '<svg viewBox="0 0 24 54"><g
stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round"
stroke-linejoin="round"><path d="M8 6v42M20 27H8.3"></path></g></svg>';
+export const iconTreeLast = '<svg viewBox="0 0 24 54"><g stroke="currentColor"
fill="none" fill-rule="evenodd" stroke-linecap="round"
stroke-linejoin="round"><path d="M8 6v21M20 27H8.3"></path></g></svg>';
diff --git a/src/theme/SearchBar/index.js b/src/theme/SearchBar/index.js
new file mode 100644
index 00000000000..369df710bfd
--- /dev/null
+++ b/src/theme/SearchBar/index.js
@@ -0,0 +1,3 @@
+import "../../utils/proxiedGenerated";
+import SearchBar from "./SearchBar";
+export default SearchBar;
diff --git a/src/utils/SearchSourceFactory.spec.ts
b/src/utils/SearchSourceFactory.spec.ts
new file mode 100644
index 00000000000..bdba3949076
--- /dev/null
+++ b/src/utils/SearchSourceFactory.spec.ts
@@ -0,0 +1,100 @@
+import lunr from "lunr";
+import { SearchDocument } from "../../shared/interfaces";
+import { SearchSourceFactory } from "./SearchSourceFactory";
+
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("lunr-languages/lunr.stemmer.support")(lunr);
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("../../shared/lunrLanguageZh").lunrLanguageZh(lunr);
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("lunr-languages/lunr.multi")(lunr);
+
+jest.mock("./proxiedGenerated");
+
+describe("SearchSourceFactory", () => {
+ const documentsOfTitles: SearchDocument[] = [
+ {
+ i: 1,
+ t: "First Page Title",
+ u: "/1",
+ },
+ {
+ i: 4,
+ t: "Second Page Title > peace",
+ u: "/2",
+ },
+ ];
+ const documentsOfHeadings: SearchDocument[] = [
+ {
+ i: 2,
+ t: "First heading > peace",
+ u: "/1#2",
+ p: 1,
+ },
+ ];
+ const documentsOfContents: SearchDocument[] = [
+ {
+ i: 3,
+ t: "First content. > peace",
+ u: "/1#2",
+ p: 1,
+ },
+ ];
+
+ const getIndex = (documents: SearchDocument[]) =>
+ lunr(function () {
+ this.ref("i");
+ this.field("t");
+ this.metadataWhitelist = ["position"];
+ documents.forEach((doc) => {
+ this.add({
+ ...doc,
+ // The ref must be a string.
+ i: doc.i.toString(),
+ });
+ });
+ });
+
+ const searchSource = SearchSourceFactory(
+ [
+ {
+ documents: documentsOfTitles,
+ index: getIndex(documentsOfTitles),
+ type: 0,
+ },
+ {
+ documents: documentsOfHeadings,
+ index: getIndex(documentsOfHeadings),
+ type: 1,
+ },
+ {
+ documents: documentsOfContents,
+ index: getIndex(documentsOfContents),
+ type: 2,
+ },
+ ],
+ [],
+ 2
+ );
+ const callback = jest.fn();
+
+ test.each<[string, number[]]>([
+ [",", []],
+ ["nothing", []],
+ ["peace", [4, 2]],
+ ])(
+ "SearchSourceFactory('%s', zhDictionary) should return %j",
+ (input, results) => {
+ searchSource(input, callback);
+ expect(callback).toBeCalledWith(
+ results.map((i) =>
+ expect.objectContaining({
+ document: expect.objectContaining({
+ i,
+ }),
+ })
+ )
+ );
+ }
+ );
+});
diff --git a/src/utils/SearchSourceFactory.ts b/src/utils/SearchSourceFactory.ts
new file mode 100644
index 00000000000..e2dfa37bc1e
--- /dev/null
+++ b/src/utils/SearchSourceFactory.ts
@@ -0,0 +1,83 @@
+import { tokenize } from "./tokenize";
+import { smartQueries } from "./smartQueries";
+import {
+ MatchMetadata,
+ WrappedIndex,
+ SearchResult,
+ SearchDocument,
+ InitialSearchResult,
+} from "../../shared/interfaces";
+import { sortSearchResults } from "./sortSearchResults";
+import { processTreeStatusOfSearchResults } from
"./processTreeStatusOfSearchResults";
+import { language } from "./proxiedGenerated";
+
+export function SearchSourceFactory(
+ wrappedIndexes: WrappedIndex[],
+ zhDictionary: string[],
+ resultsLimit: number
+) {
+ return function searchSource(
+ input: string,
+ callback: (results: SearchResult[]) => void
+ ): void {
+ const rawTokens = tokenize(input, language);
+ if (rawTokens.length === 0) {
+ callback([]);
+ return;
+ }
+
+ const queries = smartQueries(rawTokens, zhDictionary);
+ const results: InitialSearchResult[] = [];
+
+ search: for (const { term, tokens } of queries) {
+ for (const { documents, index, type } of wrappedIndexes) {
+ results.push(
+ ...index
+ .query((query) => {
+ for (const item of term) {
+ query.term(item.value, {
+ wildcard: item.wildcard,
+ presence: item.presence,
+ });
+ }
+ })
+ .slice(0, resultsLimit)
+ // Remove duplicated results.
+ .filter(
+ (result) =>
+ !results.some(
+ (item) => item.document.i.toString() === result.ref
+ )
+ )
+ .slice(0, resultsLimit - results.length)
+ .map((result) => {
+ const document = documents.find(
+ (doc) => doc.i.toString() === result.ref
+ ) as SearchDocument;
+ return {
+ document,
+ type,
+ page:
+ type !== 0 &&
+ wrappedIndexes[0].documents.find(
+ (doc) => doc.i === document.p
+ ),
+ metadata: result.matchData.metadata as MatchMetadata,
+ tokens,
+ score: result.score,
+ };
+ })
+ );
+ if (results.length >= resultsLimit) {
+ break search;
+ }
+ }
+ }
+
+ sortSearchResults(results);
+
+ processTreeStatusOfSearchResults(results);
+
+ callback(results as SearchResult[]);
+ };
+}
diff --git a/src/utils/__mocks__/proxiedGenerated.ts
b/src/utils/__mocks__/proxiedGenerated.ts
new file mode 100644
index 00000000000..35fc2ba2f53
--- /dev/null
+++ b/src/utils/__mocks__/proxiedGenerated.ts
@@ -0,0 +1,21 @@
+export let language = ["en", "zh"];
+export let removeDefaultStopWordFilter = false;
+export let removeDefaultStemmer = false;
+export const searchIndexUrl = "search-index.json?_=abc";
+export const searchResultLimits = 8;
+export const searchResultContextMaxLength = 50;
+export const explicitSearchResultPath = false;
+export const docsPluginIdForPreferredVersion = undefined;
+export const indexDocs = true;
+
+export function __setLanguage(value: string[]): void {
+ language = value;
+}
+
+export function __setRemoveDefaultStopWordFilter(value: boolean): void {
+ removeDefaultStopWordFilter = value;
+}
+
+export function __setRemoveDefaultStemmer(value: boolean): void {
+ removeDefaultStemmer = value;
+}
diff --git a/src/utils/concatDocumentPath.ts b/src/utils/concatDocumentPath.ts
new file mode 100644
index 00000000000..77af1544bac
--- /dev/null
+++ b/src/utils/concatDocumentPath.ts
@@ -0,0 +1,3 @@
+export function concatDocumentPath(pathItems: string[]): string {
+ return pathItems.join(" › ");
+}
diff --git a/src/utils/cutZhWords.spec.ts b/src/utils/cutZhWords.spec.ts
new file mode 100644
index 00000000000..9dae8c9f54b
--- /dev/null
+++ b/src/utils/cutZhWords.spec.ts
@@ -0,0 +1,42 @@
+import { cutZhWords } from "./cutZhWords";
+
+const zhDictionary = ["研究生", "研究", "生命", "科学", "生命科学"];
+
+describe("cutZhWords", () => {
+ test.each<[string, string[][]]>([
+ [
+ "研究生命科学",
+ [
+ ["研究", "生命科学"],
+ ["研究", "生命", "科学"],
+ ["研究生", "科学"],
+ ],
+ ],
+ [
+ "研究生命科",
+ [
+ ["研究", "生命科*"],
+ ["研究", "生命", "科*"],
+ ["研究生", "科*"],
+ ],
+ ],
+ ["研究生", [["研究生"], ["研究", "生*"]]],
+ [
+ "研究生科",
+ [
+ ["研究生", "科*"],
+ ["研究", "生*", "科*"],
+ ],
+ ],
+ ["我研究生", [["研究生"], ["研究", "生*"]]],
+ ["研究生我", [["研究生"], ["研究", "生*"]]],
+ ["我", []],
+ ["命", []],
+ ])("cutZhWords('%s', zhDictionary) should work", (token, terms) => {
+ expect(
+ cutZhWords(token, zhDictionary).map((term) =>
+ term.map((item) => `${item.value}${item.trailing ? "*" : ""}`)
+ )
+ ).toEqual(terms);
+ });
+});
diff --git a/src/utils/cutZhWords.ts b/src/utils/cutZhWords.ts
new file mode 100644
index 00000000000..4c7cf1f7672
--- /dev/null
+++ b/src/utils/cutZhWords.ts
@@ -0,0 +1,89 @@
+import { SmartTerm, WrappedTerm } from "../../shared/interfaces";
+
+/**
+ * Get all possible terms for a string of consecutive Chinese words,
+ * by a words dictionary.
+ *
+ * @remarks
+ *
+ * Terms are sorted in ascending order by the count of words.
+ *
+ * @param token - A string of consecutive Chinese words.
+ * @param zhDictionary - A Chinese words dictionary.
+ *
+ * @returns A smart term list.
+ */
+export function cutZhWords(token: string, zhDictionary: string[]): SmartTerm[]
{
+ const wrappedTerms: WrappedTerm[] = [];
+ function cut(subToken: string, carry: WrappedTerm): void {
+ let matchedLastIndex = 0;
+ let matched = false;
+ for (const words of zhDictionary) {
+ if (subToken.substr(0, words.length) === words) {
+ const nextCarry = {
+ missed: carry.missed,
+ term: carry.term.concat({
+ value: words,
+ }),
+ };
+ if (subToken.length > words.length) {
+ cut(subToken.substr(words.length), nextCarry);
+ } else {
+ wrappedTerms.push(nextCarry);
+ }
+ matched = true;
+ } else {
+ for (
+ let lastIndex = words.length - 1;
+ lastIndex > matchedLastIndex;
+ lastIndex -= 1
+ ) {
+ const subWords = words.substr(0, lastIndex);
+ if (subToken.substr(0, lastIndex) === subWords) {
+ matchedLastIndex = lastIndex;
+ const nextCarry = {
+ missed: carry.missed,
+ term: carry.term.concat({
+ value: subWords,
+ trailing: true,
+ }),
+ };
+ if (subToken.length > lastIndex) {
+ cut(subToken.substr(lastIndex), nextCarry);
+ } else {
+ wrappedTerms.push(nextCarry);
+ }
+ matched = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!matched) {
+ if (subToken.length > 0) {
+ cut(subToken.substr(1), {
+ missed: carry.missed + 1,
+ term: carry.term,
+ });
+ } else if (carry.term.length > 0) {
+ wrappedTerms.push(carry);
+ }
+ }
+ }
+ cut(token, {
+ missed: 0,
+ term: [],
+ });
+ return wrappedTerms
+ .sort((a, b) => {
+ const aMissed = a.missed > 0 ? 1 : 0;
+ const bMissed = b.missed > 0 ? 1 : 0;
+ if (aMissed !== bMissed) {
+ // Put all no-words-missed terms before words-missed terms.
+ return aMissed - bMissed;
+ }
+ // Put terms with less words before those with more words.
+ return a.term.length - b.term.length;
+ })
+ .map((item) => item.term);
+}
diff --git a/src/utils/escapeHtml.ts b/src/utils/escapeHtml.ts
new file mode 100644
index 00000000000..6e5008d0301
--- /dev/null
+++ b/src/utils/escapeHtml.ts
@@ -0,0 +1,15 @@
+/**
+ * Escape html special chars.
+ *
+ * @param unsafe - A unsafe string.
+ *
+ * @returns A safe string can be injected as innerHTML.
+ */
+export function escapeHtml(unsafe: string): string {
+ return unsafe
+ .replace(/&/g, "&")
+ .replace(/</g, "<")
+ .replace(/>/g, ">")
+ .replace(/"/g, """)
+ .replace(/'/g, "'");
+}
diff --git a/src/utils/getStemmedPositions.spec.ts
b/src/utils/getStemmedPositions.spec.ts
new file mode 100644
index 00000000000..f5070c8af39
--- /dev/null
+++ b/src/utils/getStemmedPositions.spec.ts
@@ -0,0 +1,41 @@
+import { getStemmedPositions } from "./getStemmedPositions";
+
+describe("getStemmedPositions", () => {
+ test("flatten and sort positions", () => {
+ expect(
+ getStemmedPositions(
+ {
+ dr: {
+ body: {
+ position: [
+ [9, 2],
+ [24, 2],
+ ],
+ },
+ },
+ dream: {
+ body: {
+ position: [
+ [9, 5],
+ [24, 5],
+ ],
+ },
+ },
+ true: {
+ body: {
+ position: [[36, 4]],
+ },
+ },
+ unknown: {},
+ },
+ "body"
+ )
+ ).toEqual([
+ [9, 5],
+ [9, 2],
+ [24, 5],
+ [24, 2],
+ [36, 4],
+ ]);
+ });
+});
diff --git a/src/utils/getStemmedPositions.ts b/src/utils/getStemmedPositions.ts
new file mode 100644
index 00000000000..52f4ec51e5e
--- /dev/null
+++ b/src/utils/getStemmedPositions.ts
@@ -0,0 +1,17 @@
+import { MatchMetadata, MetadataPosition } from "../../shared/interfaces";
+
+export function getStemmedPositions(
+ metadata: MatchMetadata,
+ field: string
+): MetadataPosition[] {
+ const positions: MetadataPosition[] = [];
+ for (const match of Object.values(metadata)) {
+ if (match[field]) {
+ positions.push(...match[field].position);
+ }
+ }
+
+ // Put positions with lower start pos before those with higher start pos.
+ // Put longer positions before shorter positions when they are the same in
start pos.
+ return positions.sort((a, b) => a[0] - b[0] || b[1] - a[1]);
+}
diff --git a/src/utils/highlight.spec.ts b/src/utils/highlight.spec.ts
new file mode 100644
index 00000000000..0bd97e69f78
--- /dev/null
+++ b/src/utils/highlight.spec.ts
@@ -0,0 +1,31 @@
+import { highlight } from "./highlight";
+
+describe("highlight", () => {
+ test.each<[string, string[], boolean, string]>([
+ [
+ "I Have A Dream. And the dream comes true",
+ ["dream", "have", "true", "i"],
+ false,
+ "<mark>I</mark> <mark>Have</mark> A <mark>Dream</mark>. And the
<mark>dream</mark> comes <mark>true</mark>",
+ ],
+ [
+ "<b>The</b> dream comes <em>true</em>",
+ ["dream"],
+ false,
+ "<b>The</b> <mark>dream</mark> comes
<em>true</em>",
+ ],
+ [
+ "query jQuery",
+ ["jquery", "query"],
+ false,
+ "<mark>query</mark> <mark>jQuery</mark>",
+ ],
+ ["dream", ["dreams"], true, "<mark>dream</mark>"],
+ ["<b>dream</b>", ["dreams"], true,
"<mark><b>dream</b></mark>"],
+ ])(
+ "highlight('%s', %j) should return '%s'",
+ (text, tokens, matched, result) => {
+ expect(highlight(text, tokens, matched)).toEqual(result);
+ }
+ );
+});
diff --git a/src/utils/highlight.ts b/src/utils/highlight.ts
new file mode 100644
index 00000000000..7a193d777dd
--- /dev/null
+++ b/src/utils/highlight.ts
@@ -0,0 +1,43 @@
+import { escapeHtml } from "./escapeHtml";
+
+/**
+ * Highlight specified tokens in text content.
+ *
+ * @param content - Text content.
+ * @param tokens - Tokens to be highlighted (in lower-case and sorted by
descending of length).
+ * @param forceMatched - Whether to force matched.
+ *
+ * @returns A html string with marked tokens.
+ */
+export function highlight(
+ content: string,
+ tokens: string[],
+ forceMatched?: boolean
+): string {
+ const html: string[] = [];
+
+ for (const token of tokens) {
+ const index = content.toLowerCase().indexOf(token);
+ if (index >= 0) {
+ if (index > 0) {
+ html.push(highlight(content.substr(0, index), tokens));
+ }
+ html.push(
+ `<mark>${escapeHtml(content.substr(index, token.length))}</mark>`
+ );
+ const end = index + token.length;
+ if (end < content.length) {
+ html.push(highlight(content.substr(end), tokens));
+ }
+ break;
+ }
+ }
+
+ if (html.length === 0) {
+ return forceMatched
+ ? `<mark>${escapeHtml(content)}</mark>`
+ : escapeHtml(content);
+ }
+
+ return html.join("");
+}
diff --git a/src/utils/highlightStemmed.spec.ts
b/src/utils/highlightStemmed.spec.ts
new file mode 100644
index 00000000000..43997e5347e
--- /dev/null
+++ b/src/utils/highlightStemmed.spec.ts
@@ -0,0 +1,165 @@
+import { MetadataPosition, HighlightChunk } from "../../shared/interfaces";
+import { highlightStemmed, splitIntoChunks } from "./highlightStemmed";
+
+jest.mock("./proxiedGenerated");
+
+describe("highlightStemmed", () => {
+ test.each<[string, MetadataPosition[], string[], number | undefined,
string]>(
+ [
+ [
+ "I Have A Dream. And the dream comes true",
+ //1 5 0 5 0 5 0 5 0
+ [
+ [9, 5], // dream
+ [24, 5], // dream
+ [36, 4], // true
+ ],
+ ["dream", "true"],
+ undefined,
+ "I Have A <mark>Dream</mark>. And the <mark>dream</mark> comes
<mark>true</mark>",
+ ],
+ [
+ "I Have A Dream. And the dream comes true",
+ //1 5 0 5 0 5 0 5 0
+ [
+ [9, 5], // dream
+ [24, 5], // dream
+ [36, 4], // true
+ ],
+ ["dream", "true"],
+ 16,
+ "… A <mark>Dream</mark>. And …",
+ ],
+ ]
+ )(
+ "highlightStemmed('%s', %j, %j, %j) should return '%s'",
+ (text, positions, tokens, maxLength, result) => {
+ expect(highlightStemmed(text, positions, tokens, maxLength)).toEqual(
+ result
+ );
+ }
+ );
+});
+
+describe("splitIntoChunks", () => {
+ test.each<[string, MetadataPosition[], string[], HighlightChunk[], number]>([
+ [
+ "I Have A Dream. And the dream comes true.<br />",
+ //1 5 10 15 20 25 30 35 40
+ [
+ [9, 5], // dream
+ [12, 2], // am
+ [24, 5], // dream
+ [27, 2], // am
+ [36, 4], // true
+ ],
+ ["dream", "true", "am"],
+ [
+ {
+ html: "I",
+ textLength: 1,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "Have",
+ textLength: 4,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "A",
+ textLength: 1,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "<mark>Dream</mark>",
+ textLength: 5,
+ },
+ {
+ html: ". ",
+ textLength: 2,
+ },
+ {
+ html: "And",
+ textLength: 3,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "the",
+ textLength: 3,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "<mark>dream</mark>",
+ textLength: 5,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "comes",
+ textLength: 5,
+ },
+ {
+ html: " ",
+ textLength: 1,
+ },
+ {
+ html: "<mark>true</mark>",
+ textLength: 4,
+ },
+ {
+ html: ".<",
+ textLength: 2,
+ },
+ {
+ html: "br",
+ textLength: 2,
+ },
+ {
+ html: " />",
+ textLength: 3,
+ },
+ ],
+ 6,
+ ],
+ [
+ "研究生",
+ [
+ [0, 3],
+ [0, 2],
+ ],
+ ["研究生", "研究"],
+ [
+ {
+ html: "<mark>研究生</mark>",
+ textLength: 3,
+ },
+ ],
+ 0,
+ ],
+ ])(
+ "splitIntoChunks('%s', %j, %j, 0, 0) should return %j",
+ (text, positions, tokens, chunks, chunkIndex) => {
+ expect(splitIntoChunks(text, positions, tokens)).toEqual({
+ chunkIndex,
+ chunks,
+ });
+ }
+ );
+});
diff --git a/src/utils/highlightStemmed.ts b/src/utils/highlightStemmed.ts
new file mode 100644
index 00000000000..b0158058147
--- /dev/null
+++ b/src/utils/highlightStemmed.ts
@@ -0,0 +1,124 @@
+import { HighlightChunk, MetadataPosition } from "../../shared/interfaces";
+import { escapeHtml } from "./escapeHtml";
+import { highlight } from "./highlight";
+import { looseTokenize } from "./looseTokenize";
+import { searchResultContextMaxLength } from "./proxiedGenerated";
+
+export function highlightStemmed(
+ content: string,
+ positions: MetadataPosition[],
+ tokens: string[],
+ maxLength = searchResultContextMaxLength
+): string {
+ const { chunkIndex, chunks } = splitIntoChunks(content, positions, tokens);
+
+ const leadingChunks = chunks.slice(0, chunkIndex);
+ const firstChunk = chunks[chunkIndex];
+ const html: string[] = [firstChunk.html];
+ const trailingChunks = chunks.slice(chunkIndex + 1);
+
+ let currentLength = firstChunk.textLength;
+ let leftPadding = 0;
+ let rightPadding = 0;
+ let leftOverflowed = false;
+ let rightOverflowed = false;
+
+ while (currentLength < maxLength) {
+ if (
+ (leftPadding <= rightPadding || trailingChunks.length === 0) &&
+ leadingChunks.length > 0
+ ) {
+ const chunk = leadingChunks.pop() as HighlightChunk;
+ if (currentLength + chunk.textLength <= maxLength) {
+ html.unshift(chunk.html);
+ leftPadding += chunk.textLength;
+ currentLength += chunk.textLength;
+ } else {
+ leftOverflowed = true;
+ leadingChunks.length = 0;
+ }
+ } else if (trailingChunks.length > 0) {
+ const chunk = trailingChunks.shift() as HighlightChunk;
+ if (currentLength + chunk.textLength <= maxLength) {
+ html.push(chunk.html);
+ rightPadding += chunk.textLength;
+ currentLength += chunk.textLength;
+ } else {
+ rightOverflowed = true;
+ trailingChunks.length = 0;
+ }
+ } else {
+ break;
+ }
+ }
+
+ if (leftOverflowed || leadingChunks.length > 0) {
+ html.unshift("…");
+ }
+
+ if (rightOverflowed || trailingChunks.length > 0) {
+ html.push("…");
+ }
+
+ return html.join("");
+}
+
+export function splitIntoChunks(
+ content: string,
+ positions: MetadataPosition[],
+ tokens: string[]
+): {
+ chunkIndex: number;
+ chunks: HighlightChunk[];
+} {
+ const chunks: HighlightChunk[] = [];
+ let positionIndex = 0;
+ let cursor = 0;
+ let chunkIndex = -1;
+ while (positionIndex < positions.length) {
+ const [start, length] = positions[positionIndex];
+ positionIndex += 1;
+ if (start < cursor) {
+ continue;
+ }
+
+ if (start > cursor) {
+ const leadingChunks = looseTokenize(content.substring(cursor,
start)).map(
+ (token) => ({
+ html: escapeHtml(token),
+ textLength: token.length,
+ })
+ );
+ for (const item of leadingChunks) {
+ chunks.push(item);
+ }
+ }
+
+ if (chunkIndex === -1) {
+ chunkIndex = chunks.length;
+ }
+
+ cursor = start + length;
+ chunks.push({
+ html: highlight(content.substring(start, cursor), tokens, true),
+ textLength: length,
+ });
+ }
+
+ if (cursor < content.length) {
+ const trailingChunks = looseTokenize(content.substring(cursor)).map(
+ (token) => ({
+ html: escapeHtml(token),
+ textLength: token.length,
+ })
+ );
+ for (const item of trailingChunks) {
+ chunks.push(item);
+ }
+ }
+
+ return {
+ chunkIndex,
+ chunks,
+ };
+}
diff --git a/src/utils/looseTokenize.spec.ts b/src/utils/looseTokenize.spec.ts
new file mode 100644
index 00000000000..33f39bc35bf
--- /dev/null
+++ b/src/utils/looseTokenize.spec.ts
@@ -0,0 +1,9 @@
+import { looseTokenize } from "./looseTokenize";
+
+describe("looseTokenize", () => {
+ test.each<[string, string[]]>([
+ ["I have a 梦想。", ["I", " ", "have", " ", "a", " ", "梦", "想", "。"]],
+ ])("looseTokenize('%s') should return %j", (content, tokens) => {
+ expect(looseTokenize(content)).toEqual(tokens);
+ });
+});
diff --git a/src/utils/looseTokenize.ts b/src/utils/looseTokenize.ts
new file mode 100644
index 00000000000..48b95357bdf
--- /dev/null
+++ b/src/utils/looseTokenize.ts
@@ -0,0 +1,22 @@
+// https://zhuanlan.zhihu.com/p/33335629
+const singleMatchOfWord = /\w+|\p{Unified_Ideograph}/u;
+
+export function looseTokenize(content: string): string[] {
+ const tokens: string[] = [];
+ let start = 0;
+ let text = content;
+ while (text.length > 0) {
+ const match = text.match(singleMatchOfWord);
+ if (!match) {
+ tokens.push(text);
+ break;
+ }
+ if ((match.index as number) > 0) {
+ tokens.push(text.substring(0, match.index));
+ }
+ tokens.push(match[0]);
+ start += (match.index as number) + match[0].length;
+ text = content.substring(start);
+ }
+ return tokens;
+}
diff --git a/src/utils/processTreeStatusOfSearchResults.spec.ts
b/src/utils/processTreeStatusOfSearchResults.spec.ts
new file mode 100644
index 00000000000..019aa844cf1
--- /dev/null
+++ b/src/utils/processTreeStatusOfSearchResults.spec.ts
@@ -0,0 +1,83 @@
+import { InitialSearchResult } from "../../shared/interfaces";
+import { processTreeStatusOfSearchResults } from
"./processTreeStatusOfSearchResults";
+
+describe("processTreeStatusOfSearchResults", () => {
+ test("should work", () => {
+ const pageTitles = [
+ {
+ document: {
+ i: 100,
+ },
+ type: 0,
+ page: undefined,
+ },
+ {
+ document: {
+ i: 200,
+ },
+ type: 0,
+ page: undefined,
+ },
+ ] as InitialSearchResult[];
+ const results = [
+ {
+ document: {
+ i: 1,
+ },
+ type: 2,
+ page: {},
+ },
+ {
+ document: {
+ i: 2,
+ },
+ type: 1,
+ page: {},
+ },
+ pageTitles[0],
+ {
+ document: {
+ i: 101,
+ },
+ type: 2,
+ page: pageTitles[0].document,
+ },
+ {
+ document: {
+ i: 3,
+ },
+ type: 1,
+ page: {},
+ },
+ pageTitles[1],
+ {
+ document: {
+ i: 201,
+ },
+ type: 1,
+ page: pageTitles[1].document,
+ },
+ {
+ document: {
+ i: 202,
+ },
+ type: 2,
+ page: pageTitles[1].document,
+ },
+ ] as InitialSearchResult[];
+ processTreeStatusOfSearchResults(results);
+ const statuses: [boolean, boolean][] = [
+ [undefined, undefined],
+ [undefined, undefined],
+ [undefined, undefined],
+ [undefined, true],
+ [undefined, undefined],
+ [undefined, undefined],
+ [true, undefined],
+ [undefined, true],
+ ];
+ results.forEach((item, i) => {
+ expect([item.isInterOfTree, item.isLastOfTree]).toEqual(statuses[i]);
+ });
+ });
+});
diff --git a/src/utils/processTreeStatusOfSearchResults.ts
b/src/utils/processTreeStatusOfSearchResults.ts
new file mode 100644
index 00000000000..9cd29603b8f
--- /dev/null
+++ b/src/utils/processTreeStatusOfSearchResults.ts
@@ -0,0 +1,19 @@
+import { InitialSearchResult } from "../../shared/interfaces";
+
+export function processTreeStatusOfSearchResults(
+ results: InitialSearchResult[]
+): void {
+ results.forEach((item, i) => {
+ if (
+ i > 0 &&
+ item.page &&
+ results.some((prev) => prev.document === item.page)
+ ) {
+ if (i < results.length - 1 && results[i + 1].page === item.page) {
+ item.isInterOfTree = true;
+ } else {
+ item.isLastOfTree = true;
+ }
+ }
+ });
+}
diff --git a/src/utils/proxiedGenerated.ts b/src/utils/proxiedGenerated.ts
new file mode 100644
index 00000000000..8fd1adacb11
--- /dev/null
+++ b/src/utils/proxiedGenerated.ts
@@ -0,0 +1,2 @@
+// This file is auto generated while building.
+export * from
"@generated/@easyops-cn/docusaurus-search-local/default/generated.js";
diff --git a/src/utils/smartQueries.spec.ts b/src/utils/smartQueries.spec.ts
new file mode 100644
index 00000000000..c0ac72889a8
--- /dev/null
+++ b/src/utils/smartQueries.spec.ts
@@ -0,0 +1,285 @@
+import lunr from "lunr";
+import { smartQueries } from "./smartQueries";
+import {
+ __setLanguage,
+ __setRemoveDefaultStopWordFilter,
+ __setRemoveDefaultStemmer,
+} from "./proxiedGenerated";
+import { SmartQuery } from "../../shared/interfaces";
+
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("lunr-languages/lunr.stemmer.support")(lunr);
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("../../shared/lunrLanguageZh").lunrLanguageZh(lunr);
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("lunr-languages/lunr.multi")(lunr);
+
+(lunr as any).fake = {};
+
+jest.mock("./proxiedGenerated");
+
+const zhDictionary = ["研究生", "研究", "生命", "科学", "生命科学"];
+
+interface TestQuery {
+ tokens: string[];
+ keyword: string;
+}
+
+describe("smartQueries", () => {
+ beforeEach(() => {
+ __setLanguage(["en", "zh"]);
+ __setRemoveDefaultStopWordFilter(false);
+ __setRemoveDefaultStemmer(false);
+ });
+
+ test.each<[string[], TestQuery[]]>([
+ [
+ ["hello"],
+ [
+ {
+ tokens: ["hello"],
+ keyword: "+hello",
+ },
+ {
+ tokens: ["hello"],
+ keyword: "+hello*",
+ },
+ ],
+ ],
+ [
+ ["hello", "world"],
+ [
+ {
+ tokens: ["hello", "world"],
+ keyword: "+hello +world",
+ },
+ {
+ tokens: ["hello", "world"],
+ keyword: "+hello +world*",
+ },
+ ],
+ ],
+ [
+ ["研究生命科学"],
+ [
+ {
+ tokens: ["研究", "生命科学"],
+ keyword: "+研究 +生命科学",
+ },
+ {
+ tokens: ["研究", "生命", "科学"],
+ keyword: "+研究 +生命 +科学",
+ },
+ {
+ tokens: ["研究生", "科学"],
+ keyword: "+研究生 +科学",
+ },
+ {
+ tokens: ["研究", "生命科学"],
+ keyword: "+研究 +生命科学*",
+ },
+ {
+ tokens: ["研究", "生命", "科学"],
+ keyword: "+研究 +生命 +科学*",
+ },
+ {
+ tokens: ["研究生", "科学"],
+ keyword: "+研究生 +科学*",
+ },
+ {
+ tokens: ["研究", "生命"],
+ keyword: "+研究 +生命",
+ },
+ {
+ tokens: ["研究", "科学"],
+ keyword: "+研究 +科学",
+ },
+ {
+ tokens: ["生命", "科学"],
+ keyword: "+生命 +科学",
+ },
+ {
+ tokens: ["研究", "科学"],
+ keyword: "+研究 +科学*",
+ },
+ {
+ tokens: ["生命", "科学"],
+ keyword: "+生命 +科学*",
+ },
+ ],
+ ],
+ [
+ ["研究生"],
+ [
+ {
+ tokens: ["研究生"],
+ keyword: "+研究生",
+ },
+ {
+ tokens: ["研究", "生"],
+ keyword: "+研究 +生*",
+ },
+ {
+ tokens: ["研究生"],
+ keyword: "+研究生*",
+ },
+ ],
+ ],
+ /* [
+ ["生命科学", "研究生"],
+ [
+ {
+ tokens: ["生命科学", "研究生"],
+ keyword: "+生命科学 +研究生",
+ },
+ {
+ tokens: ["生命科学", "研究", "生"],
+ keyword: "+生命科学 +研究 +生*",
+ },
+ {
+ tokens: ["生命", "科学", "研究生"],
+ keyword: "+生命 +科学 +研究生",
+ },
+ {
+ tokens: ["生命", "科学", "研究", "生"],
+ keyword: "+生命 +科学 +研究 +生*",
+ },
+ {
+ tokens: ["生命科学", "研究生"],
+ keyword: "+生命科学 +研究生*",
+ },
+ {
+ tokens: ["生命", "科学", "研究生"],
+ keyword: "+生命 +科学 +研究生*",
+ },
+ ],
+ ], */
+ [
+ ["a", "hello", "world"],
+ [
+ {
+ tokens: ["a", "hello", "world"],
+ keyword: "+a +hello +world",
+ },
+ {
+ tokens: ["hello", "world"],
+ keyword: "+hello +world",
+ },
+ {
+ tokens: ["a", "hello", "world"],
+ keyword: "+a +hello +world*",
+ },
+ {
+ tokens: ["hello", "world"],
+ keyword: "+hello +world*",
+ },
+ ],
+ ],
+ [
+ ["hello", "a"],
+ [
+ {
+ tokens: ["hello", "a"],
+ keyword: "+hello +a",
+ },
+ {
+ tokens: ["hello"],
+ keyword: "+hello",
+ },
+ {
+ tokens: ["hello", "a"],
+ keyword: "+hello +a*",
+ },
+ ],
+ ],
+ [
+ ["a"],
+ [
+ {
+ tokens: ["a"],
+ keyword: "+a",
+ },
+ {
+ tokens: ["a"],
+ keyword: "+a*",
+ },
+ ],
+ ],
+ [
+ ["hello", "world", "命"],
+ [
+ {
+ tokens: ["hello", "world", "命"],
+ keyword: "+*hello* +*world* +*命*",
+ },
+ ],
+ ],
+ [
+ ["termos", "alfabetização"],
+ [
+ {
+ tokens: ["termos", "alfabetização"],
+ keyword: "+termos +alfabetização",
+ },
+ {
+ tokens: ["termos", "alfabetização"],
+ keyword: "+termos +alfabetização*",
+ },
+ ],
+ ],
+ ])("smartQueries(%j, zhDictionary) should work", (tokens, queries) => {
+ expect(smartQueries(tokens, zhDictionary).map(transformQuery)).toEqual(
+ queries
+ );
+ });
+});
+
+describe("smartQueries with no stop words filter", () => {
+ beforeEach(() => {
+ __setLanguage(["en", "fake"]);
+ __setRemoveDefaultStopWordFilter(true);
+ __setRemoveDefaultStemmer(false);
+ });
+
+ test.each<[string[], TestQuery[]]>([
+ [
+ ["a", "hello"],
+ [
+ {
+ tokens: ["a", "hello"],
+ keyword: "+a +hello",
+ },
+ {
+ tokens: ["a", "hello"],
+ keyword: "+a +hello*",
+ },
+ ],
+ ],
+ ])("smartQueries(%j, zhDictionary) should work", (tokens, queries) => {
+ expect(smartQueries(tokens, zhDictionary).map(transformQuery)).toEqual(
+ queries
+ );
+ });
+});
+
+function transformQuery(query: SmartQuery): TestQuery {
+ return {
+ tokens: query.tokens,
+ keyword: query.term
+ .map(
+ (item) =>
+ `${item.presence === lunr.Query.presence.REQUIRED ? "+" : ""}${
+ (item.wildcard & lunr.Query.wildcard.LEADING) ===
+ lunr.Query.wildcard.LEADING
+ ? "*"
+ : ""
+ }${item.value}${
+ (item.wildcard & lunr.Query.wildcard.TRAILING) ===
+ lunr.Query.wildcard.TRAILING
+ ? "*"
+ : ""
+ }`
+ )
+ .join(" "),
+ };
+}
diff --git a/src/utils/smartQueries.ts b/src/utils/smartQueries.ts
new file mode 100644
index 00000000000..29fe8d7bcd4
--- /dev/null
+++ b/src/utils/smartQueries.ts
@@ -0,0 +1,131 @@
+import lunr from "lunr";
+import { SmartQuery, SmartTerm } from "../../shared/interfaces";
+import { smartTerms } from "./smartTerms";
+import { language, removeDefaultStopWordFilter } from "./proxiedGenerated";
+
+/**
+ * Get all possible queries for a list of tokens consists of words mixed
English and Chinese,
+ * by a Chinese words dictionary.
+ *
+ * @param tokens - Tokens consists of English words or strings of consecutive
Chinese words.
+ * @param zhDictionary - A Chinese words dictionary.
+ *
+ * @returns A smart query list.
+ */
+export function smartQueries(
+ tokens: string[],
+ zhDictionary: string[]
+): SmartQuery[] {
+ const terms = smartTerms(tokens, zhDictionary);
+
+ if (terms.length === 0) {
+ // There are no matched terms.
+ // All tokens are considered required and with wildcard.
+ return [
+ {
+ tokens,
+ term: tokens.map((value) => ({
+ value,
+ presence: lunr.Query.presence.REQUIRED,
+ wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING,
+ })),
+ },
+ ];
+ }
+
+ // The last token of a term maybe incomplete while user is typing.
+ for (const term of terms) {
+ term[term.length - 1].maybeTyping = true;
+ }
+
+ // Try to append terms without stop words,
+ // since they are removed in the index.
+ const stopWordPipelines: lunr.PipelineFunction[] = [];
+ for (const lang of language) {
+ if (lang === "en") {
+ if (!removeDefaultStopWordFilter) {
+ stopWordPipelines.unshift(lunr.stopWordFilter);
+ }
+ } else {
+ const lunrLang = (lunr as any)[lang] as typeof lunr;
+ if (lunrLang.stopWordFilter) {
+ stopWordPipelines.unshift(lunrLang.stopWordFilter);
+ }
+ }
+ }
+
+ let refinedTerms: SmartTerm[];
+
+ if (stopWordPipelines.length > 0) {
+ const pipe = (term: SmartTerm) =>
+ stopWordPipelines.reduce(
+ (term, p) =>
+ term.filter((item) =>
+ (p as unknown as (str: string) => string | undefined)(item.value)
+ ),
+ term
+ );
+ refinedTerms = [];
+ const newTerms: SmartTerm[] = [];
+ for (const term of terms) {
+ const filteredTerm = pipe(term);
+ refinedTerms.push(filteredTerm);
+ // Add extra terms only if some stop words are removed,
+ // and some non-stop-words exist too.
+ if (filteredTerm.length < term.length && filteredTerm.length > 0) {
+ newTerms.push(filteredTerm);
+ }
+ }
+ terms.push(...newTerms);
+ } else {
+ refinedTerms = terms.slice();
+ }
+
+ // Also try to add extra terms which miss one of the searched tokens,
+ // when the term contains 3 or more tokens,
+ // to improve the search precision.
+ const extraTerms: SmartTerm[] = [];
+ for (const term of refinedTerms) {
+ if (term.length > 2) {
+ for (let i = term.length - 1; i >= 0; i -= 1) {
+ extraTerms.push(term.slice(0, i).concat(term.slice(i + 1)));
+ }
+ }
+ }
+
+ return
getQueriesMaybeTyping(terms).concat(getQueriesMaybeTyping(extraTerms));
+}
+
+function getQueriesMaybeTyping(terms: SmartTerm[]): SmartQuery[] {
+ return termsToQueries(terms).concat(
+ termsToQueries(
+ // Ignore terms whose last token already has a trailing wildcard,
+ // or the last token is not `maybeTyping`.
+ terms.filter((term) => {
+ const token = term[term.length - 1];
+ return !token.trailing && token.maybeTyping;
+ }),
+ true
+ )
+ );
+}
+
+function termsToQueries(
+ terms: SmartTerm[],
+ maybeTyping?: boolean
+): SmartQuery[] {
+ return terms.map((term) => ({
+ tokens: term.map((item) => item.value),
+ term: term.map((item) => ({
+ value: item.value,
+ presence: lunr.Query.presence.REQUIRED,
+ // The last token of a term maybe incomplete while user is typing.
+ // So append more queries with trailing wildcard added.
+ wildcard: (
+ maybeTyping ? item.trailing || item.maybeTyping : item.trailing
+ )
+ ? lunr.Query.wildcard.TRAILING
+ : lunr.Query.wildcard.NONE,
+ })),
+ }));
+}
diff --git a/src/utils/smartTerms.spec.ts b/src/utils/smartTerms.spec.ts
new file mode 100644
index 00000000000..1eadc548eb4
--- /dev/null
+++ b/src/utils/smartTerms.spec.ts
@@ -0,0 +1,35 @@
+import { smartTerms } from "./smartTerms";
+
+const zhDictionary = ["研究生", "研究", "生命", "科学", "生命科学"];
+
+describe("smartTerms", () => {
+ test.each<[string[], string[][]]>([
+ [["hello"], [["hello"]]],
+ [["hello", "world"], [["hello", "world"]]],
+ [
+ ["hello", "world", "研究生命科学"],
+ [
+ ["hello", "world", "研究", "生命科学"],
+ ["hello", "world", "研究", "生命", "科学"],
+ ["hello", "world", "研究生", "科学"],
+ ],
+ ],
+ [
+ ["生命科学", "研究生"],
+ [
+ ["生命科学", "研究生"],
+ ["生命科学", "研究", "生*"],
+ ["生命", "科学", "研究生"],
+ ["生命", "科学", "研究", "生*"],
+ ],
+ ],
+ [["hello", "world", "命"], []],
+ [["alfabetização"], [["alfabetização"]]],
+ ])("smartTerms(%j, zhDictionary) should work", (tokens, terms) => {
+ expect(
+ smartTerms(tokens, zhDictionary).map((term) =>
+ term.map((item) => `${item.value}${item.trailing ? "*" : ""}`)
+ )
+ ).toEqual(terms);
+ });
+});
diff --git a/src/utils/smartTerms.ts b/src/utils/smartTerms.ts
new file mode 100644
index 00000000000..9787cafa192
--- /dev/null
+++ b/src/utils/smartTerms.ts
@@ -0,0 +1,42 @@
+import { SmartTerm } from "../../shared/interfaces";
+import { cutZhWords } from "./cutZhWords";
+
+/**
+ * Get all possible terms for a list of tokens consists of words mixed in
Chinese and non-Chinese,
+ * by a Chinese words dictionary.
+ *
+ * @param tokens - Tokens consists of English words or strings of consecutive
Chinese words.
+ * @param zhDictionary - A Chinese words dictionary.
+ *
+ * @returns A smart term list.
+ */
+export function smartTerms(
+ tokens: string[],
+ zhDictionary: string[]
+): SmartTerm[] {
+ const terms: SmartTerm[] = [];
+
+ function cutMixedWords(subTokens: string[], carry: SmartTerm): void {
+ if (subTokens.length === 0) {
+ terms.push(carry);
+ return;
+ }
+ const token = subTokens[0];
+ if (/\p{Unified_Ideograph}/u.test(token)) {
+ const terms = cutZhWords(token, zhDictionary);
+ for (const term of terms) {
+ const nextCarry = carry.concat(...term);
+ cutMixedWords(subTokens.slice(1), nextCarry);
+ }
+ } else {
+ const nextCarry = carry.concat({
+ value: token,
+ });
+ cutMixedWords(subTokens.slice(1), nextCarry);
+ }
+ }
+
+ cutMixedWords(tokens, []);
+
+ return terms;
+}
diff --git a/src/utils/sortSearchResults.spec.ts
b/src/utils/sortSearchResults.spec.ts
new file mode 100644
index 00000000000..952ce2962fd
--- /dev/null
+++ b/src/utils/sortSearchResults.spec.ts
@@ -0,0 +1,73 @@
+import { InitialSearchResult } from "../../shared/interfaces";
+import { sortSearchResults } from "./sortSearchResults";
+
+describe("sortSearchResults", () => {
+ test("should work", () => {
+ const pageTitles = [
+ {
+ document: {
+ i: 100,
+ },
+ type: 0,
+ page: undefined,
+ },
+ {
+ document: {
+ i: 200,
+ },
+ type: 0,
+ page: undefined,
+ },
+ ] as InitialSearchResult[];
+ const results = [
+ {
+ document: {
+ i: 1,
+ },
+ type: 2,
+ page: {},
+ },
+ {
+ document: {
+ i: 2,
+ },
+ type: 1,
+ page: {},
+ },
+ pageTitles[0],
+ {
+ document: {
+ i: 3,
+ },
+ type: 1,
+ page: {},
+ },
+ {
+ document: {
+ i: 201,
+ },
+ type: 1,
+ page: pageTitles[1].document,
+ },
+ {
+ document: {
+ i: 202,
+ },
+ type: 2,
+ page: pageTitles[1].document,
+ },
+ pageTitles[1],
+ {
+ document: {
+ i: 101,
+ },
+ type: 2,
+ page: pageTitles[0].document,
+ },
+ ] as InitialSearchResult[];
+ sortSearchResults(results);
+ expect(results.map((item) => item.document.i)).toEqual([
+ 1, 2, 100, 101, 3, 200, 201, 202,
+ ]);
+ });
+});
diff --git a/src/utils/sortSearchResults.ts b/src/utils/sortSearchResults.ts
new file mode 100644
index 00000000000..41509cc8f1d
--- /dev/null
+++ b/src/utils/sortSearchResults.ts
@@ -0,0 +1,40 @@
+import { InitialSearchResult, SearchResult } from "../../shared/interfaces";
+
+export function sortSearchResults(results: InitialSearchResult[]): void {
+ results.forEach((item, index) => {
+ item.index = index;
+ });
+
+ // Put search results of headings and contents just after
+ // their belonged page's title, if existed.
+ (results as SearchResult[]).sort((a, b) => {
+ let aPageIndex =
+ a.type > 0 && a.page
+ ? results.findIndex((item) => item.document === a.page)
+ : a.index;
+
+ let bPageIndex =
+ b.type > 0 && b.page
+ ? results.findIndex((item) => item.document === b.page)
+ : b.index;
+
+ if (aPageIndex === -1) {
+ aPageIndex = a.index;
+ }
+
+ if (bPageIndex === -1) {
+ bPageIndex = b.index;
+ }
+
+ if (aPageIndex === bPageIndex) {
+ if (a.type === 0) {
+ return -1;
+ }
+ if (b.type === 0) {
+ return 1;
+ }
+ return a.index - b.index;
+ }
+ return aPageIndex - bPageIndex;
+ });
+}
diff --git a/src/utils/tokenize.spec.ts b/src/utils/tokenize.spec.ts
new file mode 100644
index 00000000000..62dcb370d7a
--- /dev/null
+++ b/src/utils/tokenize.spec.ts
@@ -0,0 +1,40 @@
+import lunr from "lunr";
+
+// The `require`s below are required for testing `ja`.
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("lunr-languages/lunr.stemmer.support")(lunr);
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require("lunr-languages/tinyseg")(lunr);
+// eslint-disable-next-line @typescript-eslint/no-var-requires
+require(`lunr-languages/lunr.ja`)(lunr);
+
+import { tokenize } from "./tokenize";
+
+describe("tokenize", () => {
+ test.each<[string, string[]]>([
+ ["Hello-World", ["hello", "world"]],
+ ["Hello World 「世界和平」", ["hello", "world", "世界和平"]],
+ [
+ "a1b2很好c3_d4更好56也好,不错。",
+ ["a1b2", "很好", "c3_d4", "更好", "56", "也好", "不错"],
+ ],
+ ["…", []],
+ ])("tokenize('%s', ['en', 'zh']) should return %j", (text, tokens) => {
+ expect(tokenize(text, ["en", "zh"])).toEqual(tokens);
+ });
+
+ test.each<[string, string[]]>([
+ [
+ "População portuguesa é composta",
+ ["população", "portuguesa", "é", "composta"],
+ ],
+ ])("tokenize('%s', ['en', 'pt']) should return %j", (text, tokens) => {
+ expect(tokenize(text, ["en", "pt"])).toEqual(tokens);
+ });
+
+ test.each<[string, string[]]>([
+ ["私は電車が好きです。", ["私", "は", "電車", "が", "好き", "です", "。"]],
+ ])("tokenize('%s', ['ja']) should return %j", (text, tokens) => {
+ expect(tokenize(text, ["ja"])).toEqual(tokens);
+ });
+});
diff --git a/src/utils/tokenize.ts b/src/utils/tokenize.ts
new file mode 100644
index 00000000000..63755bcb3a5
--- /dev/null
+++ b/src/utils/tokenize.ts
@@ -0,0 +1,32 @@
+import lunr from "lunr";
+
+/**
+ * Split a sentence to tokens, considering a sequence of consecutive Chinese
words as a single token.
+ *
+ * @param text - Text to be tokenized.
+ * @param language - Languages used.
+ *
+ * @returns Tokens.
+ */
+export function tokenize(text: string, language: string[]): string[] {
+ // Some languages have their own tokenizer.
+ if (language.length === 1 && ["ja", "jp", "th"].includes(language[0])) {
+ return ((lunr as any)[language[0]] as typeof lunr)
+ .tokenizer(text)
+ .map((token) => token.toString());
+ }
+
+ let regExpMatchWords = /[^-\s]+/g;
+
+ // Especially optimization for `zh`.
+ if (language.includes("zh")) {
+ // Currently only works fine with letters in Latin alphabet and Chinese.
+ // https://zhuanlan.zhihu.com/p/33335629
+ regExpMatchWords = /\w+|\p{Unified_Ideograph}+/gu;
+ // regExpMatchWords =
/\p{Unified_Ideograph}+|[^-\s\p{Unified_Ideograph}]+/gu;
+ //
https://mothereff.in/regexpu#input=const+regex+%3D+/%5Cp%7BUnified_Ideograph%7D/u%3B&unicodePropertyEscape=1
+ // regExpMatchWords =
/\w+|[\u3400-\u4DBF\u4E00-\u9FFC\uFA0E\uFA0F\uFA11\uFA13\uFA14\uFA1F\uFA21\uFA23\uFA24\uFA27-\uFA29\u{20000}-\u{2A6DD}\u{2A700}-\u{2B734}\u{2B740}-\u{2B81D}\u{2B820}-\u{2CEA1}\u{2CEB0}-\u{2EBE0}\u{30000}-\u{3134A}]+/gu
+ }
+
+ return text.toLowerCase().match(regExpMatchWords) || [];
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]