perf(automod): also merge regexes in match_links, match_words
This commit is contained in:
parent
aea6999753
commit
44f5b77cc7
5 changed files with 85 additions and 43 deletions
|
@ -7,12 +7,18 @@ import { TRegex } from "../../../validatorUtils";
|
||||||
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
|
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
|
||||||
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
|
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
|
||||||
import { automodTrigger } from "../helpers";
|
import { automodTrigger } from "../helpers";
|
||||||
|
import { mergeRegexes } from "../../../utils/mergeRegexes";
|
||||||
|
import { mergeWordsIntoRegex } from "../../../utils/mergeWordsIntoRegex";
|
||||||
|
|
||||||
interface MatchResultType {
|
interface MatchResultType {
|
||||||
type: MatchableTextType;
|
type: MatchableTextType;
|
||||||
link: string;
|
link: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const regexCache = new WeakMap<any, RegExp[]>();
|
||||||
|
|
||||||
|
const quickLinkCheck = /^https?:\/\//i;
|
||||||
|
|
||||||
export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
||||||
configType: t.type({
|
configType: t.type({
|
||||||
include_domains: tNullable(t.array(t.string)),
|
include_domains: tNullable(t.array(t.string)),
|
||||||
|
@ -52,7 +58,7 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
||||||
|
|
||||||
for (const link of links) {
|
for (const link of links) {
|
||||||
// "real link" = a link that Discord highlights
|
// "real link" = a link that Discord highlights
|
||||||
if (trigger.only_real_links && !link.input.match(/^https?:\/\//i)) {
|
if (trigger.only_real_links && !quickLinkCheck.test(link.input)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +68,13 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
||||||
// In order of specificity, regex > word > domain
|
// In order of specificity, regex > word > domain
|
||||||
|
|
||||||
if (trigger.exclude_regex) {
|
if (trigger.exclude_regex) {
|
||||||
for (const sourceRegex of trigger.exclude_regex) {
|
if (!regexCache.has(trigger.exclude_regex)) {
|
||||||
|
const toCache = mergeRegexes(trigger.exclude_regex, "i");
|
||||||
|
regexCache.set(trigger.exclude_regex, toCache);
|
||||||
|
}
|
||||||
|
const regexes = regexCache.get(trigger.exclude_regex)!;
|
||||||
|
|
||||||
|
for (const sourceRegex of regexes) {
|
||||||
const matches = await pluginData.state.regexRunner.exec(sourceRegex, link.input).catch(allowTimeout);
|
const matches = await pluginData.state.regexRunner.exec(sourceRegex, link.input).catch(allowTimeout);
|
||||||
if (matches) {
|
if (matches) {
|
||||||
continue typeLoop;
|
continue typeLoop;
|
||||||
|
@ -71,7 +83,13 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trigger.include_regex) {
|
if (trigger.include_regex) {
|
||||||
for (const sourceRegex of trigger.include_regex) {
|
if (!regexCache.has(trigger.include_regex)) {
|
||||||
|
const toCache = mergeRegexes(trigger.include_regex, "i");
|
||||||
|
regexCache.set(trigger.include_regex, toCache);
|
||||||
|
}
|
||||||
|
const regexes = regexCache.get(trigger.include_regex)!;
|
||||||
|
|
||||||
|
for (const sourceRegex of regexes) {
|
||||||
const matches = await pluginData.state.regexRunner.exec(sourceRegex, link.input).catch(allowTimeout);
|
const matches = await pluginData.state.regexRunner.exec(sourceRegex, link.input).catch(allowTimeout);
|
||||||
if (matches) {
|
if (matches) {
|
||||||
return { extra: { type, link: link.input } };
|
return { extra: { type, link: link.input } };
|
||||||
|
@ -80,8 +98,13 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trigger.exclude_words) {
|
if (trigger.exclude_words) {
|
||||||
for (const word of trigger.exclude_words) {
|
if (!regexCache.has(trigger.exclude_words)) {
|
||||||
const regex = new RegExp(escapeStringRegexp(word), "i");
|
const toCache = mergeWordsIntoRegex(trigger.exclude_words, "i");
|
||||||
|
regexCache.set(trigger.exclude_words, [toCache]);
|
||||||
|
}
|
||||||
|
const regexes = regexCache.get(trigger.exclude_words)!;
|
||||||
|
|
||||||
|
for (const regex of regexes) {
|
||||||
if (regex.test(link.input)) {
|
if (regex.test(link.input)) {
|
||||||
continue typeLoop;
|
continue typeLoop;
|
||||||
}
|
}
|
||||||
|
@ -89,8 +112,13 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trigger.include_words) {
|
if (trigger.include_words) {
|
||||||
for (const word of trigger.include_words) {
|
if (!regexCache.has(trigger.include_words)) {
|
||||||
const regex = new RegExp(escapeStringRegexp(word), "i");
|
const toCache = mergeWordsIntoRegex(trigger.include_words, "i");
|
||||||
|
regexCache.set(trigger.include_words, [toCache]);
|
||||||
|
}
|
||||||
|
const regexes = regexCache.get(trigger.include_words)!;
|
||||||
|
|
||||||
|
for (const regex of regexes) {
|
||||||
if (regex.test(link.input)) {
|
if (regex.test(link.input)) {
|
||||||
return { extra: { type, link: link.input } };
|
return { extra: { type, link: link.input } };
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import { Util } from "discord.js";
|
|
||||||
import * as t from "io-ts";
|
import * as t from "io-ts";
|
||||||
import { allowTimeout } from "../../../RegExpRunner";
|
import { allowTimeout } from "../../../RegExpRunner";
|
||||||
import { normalizeText } from "../../../utils/normalizeText";
|
import { normalizeText } from "../../../utils/normalizeText";
|
||||||
|
@ -7,7 +6,7 @@ import { TRegex } from "../../../validatorUtils";
|
||||||
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
|
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
|
||||||
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
|
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
|
||||||
import { automodTrigger } from "../helpers";
|
import { automodTrigger } from "../helpers";
|
||||||
import { categorize } from "../../../utils/categorize";
|
import { mergeRegexes } from "../../../utils/mergeRegexes";
|
||||||
|
|
||||||
interface MatchResultType {
|
interface MatchResultType {
|
||||||
pattern: string;
|
pattern: string;
|
||||||
|
@ -15,21 +14,6 @@ interface MatchResultType {
|
||||||
}
|
}
|
||||||
|
|
||||||
const regexCache = new WeakMap<any, RegExp[]>();
|
const regexCache = new WeakMap<any, RegExp[]>();
|
||||||
const hasBackreference = /(?:^|[^\\]|[\\]{2})\\\d+/;
|
|
||||||
|
|
||||||
function buildCacheableRegexes(sourceRegexes: RegExp[], flags: string) {
|
|
||||||
const categories = categorize(sourceRegexes, {
|
|
||||||
hasBackreferences: (regex) => hasBackreference.exec(regex.source) !== null,
|
|
||||||
safeToMerge: () => true,
|
|
||||||
});
|
|
||||||
const regexes: RegExp[] = [];
|
|
||||||
if (categories.safeToMerge.length) {
|
|
||||||
const merged = categories.safeToMerge.map((r) => `(?:${r.source})`).join("|");
|
|
||||||
regexes.push(new RegExp(merged, flags));
|
|
||||||
}
|
|
||||||
regexes.push(...categories.hasBackreferences);
|
|
||||||
return regexes;
|
|
||||||
}
|
|
||||||
|
|
||||||
export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
||||||
configType: t.type({
|
configType: t.type({
|
||||||
|
@ -64,7 +48,7 @@ export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
|
||||||
|
|
||||||
if (!regexCache.has(trigger)) {
|
if (!regexCache.has(trigger)) {
|
||||||
const flags = trigger.case_sensitive ? "" : "i";
|
const flags = trigger.case_sensitive ? "" : "i";
|
||||||
const toCache = buildCacheableRegexes(trigger.patterns, flags);
|
const toCache = mergeRegexes(trigger.patterns, flags);
|
||||||
regexCache.set(trigger, toCache);
|
regexCache.set(trigger, toCache);
|
||||||
}
|
}
|
||||||
const regexes = regexCache.get(trigger)!;
|
const regexes = regexCache.get(trigger)!;
|
||||||
|
|
|
@ -12,6 +12,8 @@ interface MatchResultType {
|
||||||
type: MatchableTextType;
|
type: MatchableTextType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const regexCache = new WeakMap<any, RegExp[]>();
|
||||||
|
|
||||||
export const MatchWordsTrigger = automodTrigger<MatchResultType>()({
|
export const MatchWordsTrigger = automodTrigger<MatchResultType>()({
|
||||||
configType: t.type({
|
configType: t.type({
|
||||||
words: t.array(t.string),
|
words: t.array(t.string),
|
||||||
|
@ -49,6 +51,25 @@ export const MatchWordsTrigger = automodTrigger<MatchResultType>()({
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!regexCache.has(trigger)) {
|
||||||
|
const looseMatchingThreshold = Math.min(Math.max(trigger.loose_matching_threshold, 1), 64);
|
||||||
|
const patterns = trigger.words.map((word) => {
|
||||||
|
let pattern = trigger.loose_matching
|
||||||
|
? [...word].map((c) => escapeStringRegexp(c)).join(`(?:\\s*|.{0,${looseMatchingThreshold})`)
|
||||||
|
: escapeStringRegexp(word);
|
||||||
|
|
||||||
|
if (trigger.only_full_words) {
|
||||||
|
pattern = `\\b${pattern}\\b`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pattern;
|
||||||
|
});
|
||||||
|
|
||||||
|
const mergedRegex = new RegExp(patterns.map((p) => `(?:${p})`).join("|"), trigger.case_sensitive ? "" : "i");
|
||||||
|
regexCache.set(trigger, [mergedRegex]);
|
||||||
|
}
|
||||||
|
const regexes = regexCache.get(trigger)!;
|
||||||
|
|
||||||
for await (let [type, str] of matchMultipleTextTypesOnMessage(pluginData, trigger, context.message)) {
|
for await (let [type, str] of matchMultipleTextTypesOnMessage(pluginData, trigger, context.message)) {
|
||||||
if (trigger.strip_markdown) {
|
if (trigger.strip_markdown) {
|
||||||
str = stripMarkdown(str);
|
str = stripMarkdown(str);
|
||||||
|
@ -58,26 +79,12 @@ export const MatchWordsTrigger = automodTrigger<MatchResultType>()({
|
||||||
str = normalizeText(str);
|
str = normalizeText(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
const looseMatchingThreshold = Math.min(Math.max(trigger.loose_matching_threshold, 1), 64);
|
for (const regex of regexes) {
|
||||||
|
if (regex.test(str)) {
|
||||||
for (const word of trigger.words) {
|
|
||||||
// When performing loose matching, allow any amount of whitespace or up to looseMatchingThreshold number of other
|
|
||||||
// characters between the matched characters. E.g. if we're matching banana, a loose match could also match b a n a n a
|
|
||||||
let pattern = trigger.loose_matching
|
|
||||||
? [...word].map((c) => escapeStringRegexp(c)).join(`(?:\\s*|.{0,${looseMatchingThreshold})`)
|
|
||||||
: escapeStringRegexp(word);
|
|
||||||
|
|
||||||
if (trigger.only_full_words) {
|
|
||||||
pattern = `\\b${pattern}\\b`;
|
|
||||||
}
|
|
||||||
|
|
||||||
const regex = new RegExp(pattern, trigger.case_sensitive ? "" : "i");
|
|
||||||
const test = regex.test(str);
|
|
||||||
if (test) {
|
|
||||||
return {
|
return {
|
||||||
extra: {
|
extra: {
|
||||||
word,
|
|
||||||
type,
|
type,
|
||||||
|
word: "",
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -89,6 +96,6 @@ export const MatchWordsTrigger = automodTrigger<MatchResultType>()({
|
||||||
|
|
||||||
renderMatchInformation({ pluginData, contexts, matchResult }) {
|
renderMatchInformation({ pluginData, contexts, matchResult }) {
|
||||||
const partialSummary = getTextMatchPartialSummary(pluginData, matchResult.extra.type, contexts[0]);
|
const partialSummary = getTextMatchPartialSummary(pluginData, matchResult.extra.type, contexts[0]);
|
||||||
return `Matched word \`${Util.escapeInlineCode(matchResult.extra.word)}\` in ${partialSummary}`;
|
return `Matched word in ${partialSummary}`;
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
17
backend/src/utils/mergeRegexes.ts
Normal file
17
backend/src/utils/mergeRegexes.ts
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
import { categorize } from "./categorize";
|
||||||
|
|
||||||
|
const hasBackreference = /(?:^|[^\\]|[\\]{2})\\\d+/;
|
||||||
|
|
||||||
|
export function mergeRegexes(sourceRegexes: RegExp[], flags: string): RegExp[] {
|
||||||
|
const categories = categorize(sourceRegexes, {
|
||||||
|
hasBackreferences: (regex) => hasBackreference.exec(regex.source) !== null,
|
||||||
|
safeToMerge: () => true,
|
||||||
|
});
|
||||||
|
const regexes: RegExp[] = [];
|
||||||
|
if (categories.safeToMerge.length) {
|
||||||
|
const merged = categories.safeToMerge.map((r) => `(?:${r.source})`).join("|");
|
||||||
|
regexes.push(new RegExp(merged, flags));
|
||||||
|
}
|
||||||
|
regexes.push(...categories.hasBackreferences);
|
||||||
|
return regexes;
|
||||||
|
}
|
6
backend/src/utils/mergeWordsIntoRegex.ts
Normal file
6
backend/src/utils/mergeWordsIntoRegex.ts
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
import escapeStringRegexp from "escape-string-regexp";
|
||||||
|
|
||||||
|
export function mergeWordsIntoRegex(words: string[], flags?: string) {
|
||||||
|
const source = words.map((word) => `(?:${escapeStringRegexp(word)})`).join("|");
|
||||||
|
return new RegExp(source, flags);
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue