perf: merge regexes in match_regex trigger while taking backreferences into account
This commit is contained in:
parent
50665c7bb7
commit
d874617e48
2 changed files with 61 additions and 4 deletions
|
@ -7,12 +7,30 @@ import { TRegex } from "../../../validatorUtils";
|
||||||
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
|
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
|
||||||
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
|
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
|
||||||
import { automodTrigger } from "../helpers";
|
import { automodTrigger } from "../helpers";
|
||||||
|
import { categorize } from "../../../utils/categorize";
|
||||||
|
|
||||||
interface MatchResultType {
|
interface MatchResultType {
|
||||||
pattern: string;
|
pattern: string;
|
||||||
type: MatchableTextType;
|
type: MatchableTextType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const regexCache = new WeakMap<any, RegExp[]>();
|
||||||
|
const hasBackreference = /(?:^|[^\\]|[\\]{2})\\\d+/;
|
||||||
|
|
||||||
|
function buildCacheableRegexes(sourceRegexes: RegExp[], flags: string) {
|
||||||
|
const categories = categorize(sourceRegexes, {
|
||||||
|
hasBackreferences: (regex) => hasBackreference.exec(regex.source) !== null,
|
||||||
|
safeToMerge: () => true,
|
||||||
|
});
|
||||||
|
const regexes: RegExp[] = [];
|
||||||
|
if (categories.safeToMerge.length) {
|
||||||
|
const merged = categories.safeToMerge.map((r) => `(?:${r.source})`).join("|");
|
||||||
|
regexes.push(new RegExp(merged, flags));
|
||||||
|
}
|
||||||
|
regexes.push(...categories.hasBackreferences);
|
||||||
|
return regexes;
|
||||||
|
}
|
||||||
|
|
||||||
export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
||||||
configType: t.type({
|
configType: t.type({
|
||||||
patterns: t.array(TRegex),
|
patterns: t.array(TRegex),
|
||||||
|
@ -44,6 +62,13 @@ export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!regexCache.has(trigger)) {
|
||||||
|
const flags = trigger.case_sensitive ? "" : "i";
|
||||||
|
const toCache = buildCacheableRegexes(trigger.patterns, flags);
|
||||||
|
regexCache.set(trigger, toCache);
|
||||||
|
}
|
||||||
|
const regexes = regexCache.get(trigger)!;
|
||||||
|
|
||||||
for await (let [type, str] of matchMultipleTextTypesOnMessage(pluginData, trigger, context.message)) {
|
for await (let [type, str] of matchMultipleTextTypesOnMessage(pluginData, trigger, context.message)) {
|
||||||
if (trigger.strip_markdown) {
|
if (trigger.strip_markdown) {
|
||||||
str = stripMarkdown(str);
|
str = stripMarkdown(str);
|
||||||
|
@ -53,13 +78,12 @@ export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
||||||
str = normalizeText(str);
|
str = normalizeText(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const sourceRegex of trigger.patterns) {
|
for (const regex of regexes) {
|
||||||
const regex = new RegExp(sourceRegex.source, trigger.case_sensitive && !sourceRegex.ignoreCase ? "" : "i");
|
|
||||||
const matches = await pluginData.state.regexRunner.exec(regex, str).catch(allowTimeout);
|
const matches = await pluginData.state.regexRunner.exec(regex, str).catch(allowTimeout);
|
||||||
if (matches?.length) {
|
if (matches?.length) {
|
||||||
return {
|
return {
|
||||||
extra: {
|
extra: {
|
||||||
pattern: sourceRegex.source,
|
pattern: regex.source,
|
||||||
type,
|
type,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -72,6 +96,6 @@ export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
||||||
|
|
||||||
renderMatchInformation({ pluginData, contexts, matchResult }) {
|
renderMatchInformation({ pluginData, contexts, matchResult }) {
|
||||||
const partialSummary = getTextMatchPartialSummary(pluginData, matchResult.extra.type, contexts[0]);
|
const partialSummary = getTextMatchPartialSummary(pluginData, matchResult.extra.type, contexts[0]);
|
||||||
return `Matched regex \`${Util.escapeInlineCode(matchResult.extra.pattern)}\` in ${partialSummary}`;
|
return `Matched regex in ${partialSummary}`;
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
33
backend/src/utils/categorize.ts
Normal file
33
backend/src/utils/categorize.ts
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
type Categories<T> = {
|
||||||
|
[key: string]: (item: T) => boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
type CategoryReturnType<T, C extends Categories<T>> = {
|
||||||
|
[key in keyof C]: T[];
|
||||||
|
};
|
||||||
|
|
||||||
|
function initCategories<T extends unknown, C extends Categories<T>>(categories: C): CategoryReturnType<T, C> {
|
||||||
|
return Object.keys(categories).reduce((map, key) => {
|
||||||
|
map[key] = [];
|
||||||
|
return map;
|
||||||
|
}, {}) as CategoryReturnType<T, C>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function categorize<T extends unknown, C extends Categories<T>>(
|
||||||
|
arr: T[],
|
||||||
|
categories: C,
|
||||||
|
): CategoryReturnType<T, C> {
|
||||||
|
const result = initCategories<T, C>(categories);
|
||||||
|
const categoryEntries = Object.entries(categories);
|
||||||
|
|
||||||
|
itemLoop: for (const item of arr) {
|
||||||
|
for (const [category, fn] of categoryEntries) {
|
||||||
|
if (fn(item)) {
|
||||||
|
result[category].push(item);
|
||||||
|
continue itemLoop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue