From d874617e48744a872106244b069b9f15222e93e8 Mon Sep 17 00:00:00 2001 From: Dragory <2606411+Dragory@users.noreply.github.com> Date: Sat, 9 Oct 2021 12:50:47 +0300 Subject: [PATCH] perf: merge regexes in match_regex trigger while taking backreferences into account --- .../plugins/Automod/triggers/matchRegex.ts | 32 +++++++++++++++--- backend/src/utils/categorize.ts | 33 +++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 backend/src/utils/categorize.ts diff --git a/backend/src/plugins/Automod/triggers/matchRegex.ts b/backend/src/plugins/Automod/triggers/matchRegex.ts index 3e0b2e38..0f9ac41b 100644 --- a/backend/src/plugins/Automod/triggers/matchRegex.ts +++ b/backend/src/plugins/Automod/triggers/matchRegex.ts @@ -7,12 +7,30 @@ import { TRegex } from "../../../validatorUtils"; import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary"; import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage"; import { automodTrigger } from "../helpers"; +import { categorize } from "../../../utils/categorize"; interface MatchResultType { pattern: string; type: MatchableTextType; } +const regexCache = new WeakMap(); +const hasBackreference = /(?:^|[^\\]|[\\]{2})\\\d+/; + +function buildCacheableRegexes(sourceRegexes: RegExp[], flags: string) { + const categories = categorize(sourceRegexes, { + hasBackreferences: (regex) => hasBackreference.exec(regex.source) !== null, + safeToMerge: () => true, + }); + const regexes: RegExp[] = []; + if (categories.safeToMerge.length) { + const merged = categories.safeToMerge.map((r) => `(?:${r.source})`).join("|"); + regexes.push(new RegExp(merged, flags)); + } + regexes.push(...categories.hasBackreferences); + return regexes; +} + export const MatchRegexTrigger = automodTrigger()({ configType: t.type({ patterns: t.array(TRegex), @@ -44,6 +62,13 @@ export const MatchRegexTrigger = automodTrigger()({ return; } + if (!regexCache.has(trigger)) { + const flags = trigger.case_sensitive ? "" : "i"; + const toCache = buildCacheableRegexes(trigger.patterns, flags); + regexCache.set(trigger, toCache); + } + const regexes = regexCache.get(trigger)!; + for await (let [type, str] of matchMultipleTextTypesOnMessage(pluginData, trigger, context.message)) { if (trigger.strip_markdown) { str = stripMarkdown(str); @@ -53,13 +78,12 @@ export const MatchRegexTrigger = automodTrigger()({ str = normalizeText(str); } - for (const sourceRegex of trigger.patterns) { - const regex = new RegExp(sourceRegex.source, trigger.case_sensitive && !sourceRegex.ignoreCase ? "" : "i"); + for (const regex of regexes) { const matches = await pluginData.state.regexRunner.exec(regex, str).catch(allowTimeout); if (matches?.length) { return { extra: { - pattern: sourceRegex.source, + pattern: regex.source, type, }, }; @@ -72,6 +96,6 @@ export const MatchRegexTrigger = automodTrigger()({ renderMatchInformation({ pluginData, contexts, matchResult }) { const partialSummary = getTextMatchPartialSummary(pluginData, matchResult.extra.type, contexts[0]); - return `Matched regex \`${Util.escapeInlineCode(matchResult.extra.pattern)}\` in ${partialSummary}`; + return `Matched regex in ${partialSummary}`; }, }); diff --git a/backend/src/utils/categorize.ts b/backend/src/utils/categorize.ts new file mode 100644 index 00000000..ca1b98dc --- /dev/null +++ b/backend/src/utils/categorize.ts @@ -0,0 +1,33 @@ +type Categories = { + [key: string]: (item: T) => boolean; +}; + +type CategoryReturnType> = { + [key in keyof C]: T[]; +}; + +function initCategories>(categories: C): CategoryReturnType { + return Object.keys(categories).reduce((map, key) => { + map[key] = []; + return map; + }, {}) as CategoryReturnType; +} + +export function categorize>( + arr: T[], + categories: C, +): CategoryReturnType { + const result = initCategories(categories); + const categoryEntries = Object.entries(categories); + + itemLoop: for (const item of arr) { + for (const [category, fn] of categoryEntries) { + if (fn(item)) { + result[category].push(item); + continue itemLoop; + } + } + } + + return result; +}