3
0
Fork 0
mirror of https://github.com/ZeppelinBot/Zeppelin.git synced 2025-03-15 05:41:51 +00:00

Run user-supplied regexes in worker threads with a timeout

This commit is contained in:
Dragory 2020-08-05 01:15:36 +03:00
parent 19b97bc32b
commit a7fa258f2a
No known key found for this signature in database
GPG key ID: 5F387BA66DF8AAC1
15 changed files with 237 additions and 43 deletions

View file

@ -3402,6 +3402,11 @@
"resolved": "https://registry.npmjs.org/regexp-tree/-/regexp-tree-0.1.14.tgz",
"integrity": "sha512-59v5A90TAh4cAMyDQEOzcnsu4q7Wb10RsyTjngEnJIZsWYM4siVGu+JmLT1WsxHvOWhiu4YS20XiTuxWMeVoHQ=="
},
"regexp-worker": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/regexp-worker/-/regexp-worker-1.1.0.tgz",
"integrity": "sha512-IDDOhDlI972T7bexYwyw+JKdqFsBtJvX8RA+ChVwjhgcK/gv4eG3oZu8Rbidnamh2U2b0ZWdawKksjzY2dmVFw=="
},
"registry-auth-token": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/registry-auth-token/-/registry-auth-token-4.1.1.tgz",

View file

@ -53,6 +53,7 @@
"passport-custom": "^1.0.5",
"passport-oauth2": "^1.5.0",
"reflect-metadata": "^0.1.12",
"regexp-worker": "^1.1.0",
"safe-regex": "^2.0.2",
"seedrandom": "^3.0.1",
"sharp": "^0.23.4",

View file

@ -0,0 +1,96 @@
import { RegExpWorker, TimeoutError } from "regexp-worker";
import { CooldownManager } from "knub";
import { MINUTES } from "./utils";
import { EventEmitter } from "events";
const isTimeoutError = (a): a is TimeoutError => {
return a.message != null && a.elapsedTimeMs != null;
};
export class RegExpTimeoutError extends Error {
constructor(message: string, public elapsedTimeMs: number) {
super(message);
}
}
export function allowTimeout(err: RegExpTimeoutError | Error) {
if (err instanceof RegExpTimeoutError) {
return null;
}
throw err;
}
const REGEX_TIMEOUT = 100; // ms
const REGEX_FAIL_TO_COOLDOWN_COUNT = 3; // If a regex fails this many times, it goes on cooldown...
const REGEX_FAIL_COOLDOWN = 5 * MINUTES; // ...for this long
export interface RegExpRunner {
on(event: "timeout", listener: (regexSource: string, timeoutMs: number) => void);
on(event: "repeatedTimeout", listener: (regexSource: string, timeoutMs: number, failTimes: number) => void);
}
/**
* Leverages RegExpWorker to run regular expressions in worker threads with a timeout.
* Repeatedly failing regexes are put on a cooldown where requests to execute them are ignored.
*/
export class RegExpRunner extends EventEmitter {
private _worker: RegExpWorker;
private cooldown: CooldownManager;
private failedTimes: Map<string, number>;
constructor() {
super();
this.cooldown = new CooldownManager();
this.failedTimes = new Map();
}
private get worker(): RegExpWorker {
if (!this._worker) {
this._worker = new RegExpWorker(REGEX_TIMEOUT);
}
return this._worker;
}
public async exec(regex: RegExp, str: string): Promise<null | RegExpExecArray[]> {
if (this.cooldown.isOnCooldown(regex.source)) {
return null;
}
try {
const result = await this.worker.execRegExp(regex, str);
return result.matches.length || regex.global ? result.matches : null;
} catch (e) {
if (isTimeoutError(e)) {
if (this.failedTimes.has(regex.source)) {
// Regex has failed before, increment fail counter
this.failedTimes.set(regex.source, this.failedTimes.get(regex.source) + 1);
} else {
// This is the first time this regex failed, init fail counter
this.failedTimes.set(regex.source, 1);
}
if (this.failedTimes.get(regex.source) >= REGEX_FAIL_TO_COOLDOWN_COUNT) {
// Regex has failed too many times, set it on cooldown
this.cooldown.setCooldown(regex.source, REGEX_FAIL_COOLDOWN);
this.failedTimes.delete(regex.source);
this.emit("repeatedTimeout", regex.source, REGEX_TIMEOUT, REGEX_FAIL_TO_COOLDOWN_COUNT);
}
this.emit("timeout", regex.source, REGEX_TIMEOUT);
throw new RegExpTimeoutError(e.message, e.elapsedTimeMs);
}
throw e;
}
}
public async dispose() {
await this.worker.dispose();
this._worker = null;
}
}

View file

@ -11,7 +11,7 @@ import { StrictValidationError } from "../../validatorUtils";
import { ConfigPreprocessorFn } from "knub/dist/config/configTypes";
import { availableActions } from "./actions/availableActions";
import { clearOldRecentActions } from "./functions/clearOldRecentActions";
import { MINUTES, SECONDS } from "../../utils";
import { disableCodeBlocks, MINUTES, SECONDS } from "../../utils";
import { clearOldRecentSpam } from "./functions/clearOldRecentSpam";
import { GuildAntiraidLevels } from "../../data/GuildAntiraidLevels";
import { GuildArchives } from "../../data/GuildArchives";
@ -23,6 +23,10 @@ import { AntiraidClearCmd } from "./commands/AntiraidClearCmd";
import { SetAntiraidCmd } from "./commands/SetAntiraidCmd";
import { ViewAntiraidCmd } from "./commands/ViewAntiraidCmd";
import { pluginInfo } from "./info";
import { RegExpRunner } from "../../RegExpRunner";
import { LogType } from "../../data/LogType";
import { logger } from "../../logger";
import { discardRegExpRunner, getRegExpRunner } from "../../regExpRunners";
const defaultOptions = {
config: {
@ -161,6 +165,8 @@ export const AutomodPlugin = zeppelinPlugin<AutomodPluginType>()("automod", {
async onLoad(pluginData) {
pluginData.state.queue = new Queue();
pluginData.state.regexRunner = getRegExpRunner(`guild-${pluginData.guild.id}`);
pluginData.state.recentActions = [];
pluginData.state.clearRecentActionsInterval = setInterval(() => clearOldRecentActions(pluginData), 1 * MINUTES);
@ -189,9 +195,11 @@ export const AutomodPlugin = zeppelinPlugin<AutomodPluginType>()("automod", {
pluginData.state.cachedAntiraidLevel = await pluginData.state.antiraidLevels.get();
},
onUnload(pluginData) {
async onUnload(pluginData) {
pluginData.state.queue.clear();
discardRegExpRunner(`guild-${pluginData.guild.id}`);
clearInterval(pluginData.state.clearRecentActionsInterval);
clearInterval(pluginData.state.clearRecentSpamInterval);

View file

@ -10,8 +10,9 @@ import {
verboseChannelMention,
} from "../../../utils";
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
import { TSafeRegex } from "../../../validatorUtils";
import { TRegex } from "../../../validatorUtils";
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
import { allowTimeout } from "../../../RegExpRunner";
interface MatchResultType {
type: MatchableTextType;
@ -25,8 +26,8 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
include_subdomains: t.boolean,
include_words: tNullable(t.array(t.string)),
exclude_words: tNullable(t.array(t.string)),
include_regex: tNullable(t.array(TSafeRegex)),
exclude_regex: tNullable(t.array(TSafeRegex)),
include_regex: tNullable(t.array(TRegex)),
exclude_regex: tNullable(t.array(TRegex)),
only_real_links: t.boolean,
match_messages: t.boolean,
match_embeds: t.boolean,
@ -67,16 +68,18 @@ export const MatchLinksTrigger = automodTrigger<MatchResultType>()({
// In order of specificity, regex > word > domain
if (trigger.exclude_regex) {
for (const pattern of trigger.exclude_regex) {
if (pattern.test(link.input)) {
for (const sourceRegex of trigger.exclude_regex) {
const matches = await pluginData.state.regexRunner.exec(sourceRegex, link.input).catch(allowTimeout);
if (matches) {
continue typeLoop;
}
}
}
if (trigger.include_regex) {
for (const pattern of trigger.include_regex) {
if (pattern.test(link.input)) {
for (const sourceRegex of trigger.include_regex) {
const matches = await pluginData.state.regexRunner.exec(sourceRegex, link.input).catch(allowTimeout);
if (matches) {
return { extra: { type, link: link.input } };
}
}

View file

@ -3,8 +3,9 @@ import { transliterate } from "transliteration";
import { automodTrigger } from "../helpers";
import { disableInlineCode, verboseChannelMention } from "../../../utils";
import { MatchableTextType, matchMultipleTextTypesOnMessage } from "../functions/matchMultipleTextTypesOnMessage";
import { TSafeRegex } from "../../../validatorUtils";
import { getTextMatchPartialSummary } from "../functions/getTextMatchPartialSummary";
import { allowTimeout } from "../../../RegExpRunner";
import { TRegex } from "../../../validatorUtils";
interface MatchResultType {
pattern: string;
@ -13,7 +14,7 @@ interface MatchResultType {
export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
configType: t.type({
patterns: t.array(TSafeRegex),
patterns: t.array(TRegex),
case_sensitive: t.boolean,
normalize: t.boolean,
match_messages: t.boolean,
@ -46,9 +47,9 @@ export const MatchRegexTrigger = automodTrigger<MatchResultType>()({
}
for (const sourceRegex of trigger.patterns) {
const regex = new RegExp(sourceRegex.source, trigger.case_sensitive ? "" : "i");
const test = regex.test(str);
if (test) {
const regex = new RegExp(sourceRegex.source, trigger.case_sensitive && !sourceRegex.ignoreCase ? "" : "i");
const matches = await pluginData.state.regexRunner.exec(regex, str).catch(allowTimeout);
if (matches?.length) {
return {
extra: {
pattern: sourceRegex.source,

View file

@ -12,6 +12,7 @@ import { GuildAntiraidLevels } from "../../data/GuildAntiraidLevels";
import { GuildArchives } from "../../data/GuildArchives";
import { RecentActionType } from "./constants";
import Timeout = NodeJS.Timeout;
import { RegExpRunner } from "../../RegExpRunner";
export const Rule = t.type({
enabled: t.boolean,
@ -45,6 +46,11 @@ export interface AutomodPluginType extends BasePluginType {
*/
queue: Queue;
/**
* Per-server regex runner
*/
regexRunner: RegExpRunner;
/**
* Recent actions are used for spam triggers
*/

View file

@ -6,6 +6,8 @@ import { GuildSavedMessages } from "src/data/GuildSavedMessages";
import { onMessageCreate } from "./util/onMessageCreate";
import { onMessageUpdate } from "./util/onMessageUpdate";
import { trimPluginDescription } from "../../utils";
import { discardRegExpRunner, getRegExpRunner } from "../../regExpRunners";
import { LogsPlugin } from "../Logs/LogsPlugin";
const defaultOptions: PluginOptions<CensorPluginType> = {
config: {
@ -51,6 +53,7 @@ export const CensorPlugin = zeppelinPlugin<CensorPluginType>()("censor", {
`),
},
dependencies: [LogsPlugin],
configSchema: ConfigSchema,
defaultOptions,
@ -60,6 +63,8 @@ export const CensorPlugin = zeppelinPlugin<CensorPluginType>()("censor", {
state.serverLogs = new GuildLogs(guild.id);
state.savedMessages = GuildSavedMessages.getGuildInstance(guild.id);
state.regexRunner = getRegExpRunner(`guild-${pluginData.guild.id}`);
state.onMessageCreateFn = msg => onMessageCreate(pluginData, msg);
state.savedMessages.events.on("create", state.onMessageCreateFn);
@ -68,6 +73,8 @@ export const CensorPlugin = zeppelinPlugin<CensorPluginType>()("censor", {
},
onUnload(pluginData) {
discardRegExpRunner(`guild-${pluginData.guild.id}`);
pluginData.state.savedMessages.events.off("create", pluginData.state.onMessageCreateFn);
pluginData.state.savedMessages.events.off("update", pluginData.state.onMessageUpdateFn);
},

View file

@ -1,9 +1,10 @@
import * as t from "io-ts";
import { BasePluginType, eventListener } from "knub";
import { tNullable } from "src/utils";
import { TSafeRegex } from "src/validatorUtils";
import { TRegex } from "src/validatorUtils";
import { GuildLogs } from "src/data/GuildLogs";
import { GuildSavedMessages } from "src/data/GuildSavedMessages";
import { RegExpRunner } from "../../RegExpRunner";
export const ConfigSchema = t.type({
filter_zalgo: t.boolean,
@ -18,7 +19,7 @@ export const ConfigSchema = t.type({
domain_blacklist: tNullable(t.array(t.string)),
blocked_tokens: tNullable(t.array(t.string)),
blocked_words: tNullable(t.array(t.string)),
blocked_regex: tNullable(t.array(TSafeRegex)),
blocked_regex: tNullable(t.array(TRegex)),
});
export type TConfigSchema = t.TypeOf<typeof ConfigSchema>;
@ -28,6 +29,8 @@ export interface CensorPluginType extends BasePluginType {
serverLogs: GuildLogs;
savedMessages: GuildSavedMessages;
regexRunner: RegExpRunner;
onMessageCreateFn;
onMessageUpdateFn;
};

View file

@ -8,6 +8,7 @@ import cloneDeep from "lodash.clonedeep";
import { censorMessage } from "./censorMessage";
import escapeStringRegexp from "escape-string-regexp";
import { logger } from "src/logger";
import { allowTimeout } from "../../../RegExpRunner";
export async function applyFiltersToMsg(
pluginData: PluginData<CensorPluginType>,
@ -137,17 +138,13 @@ export async function applyFiltersToMsg(
}
// Filter regex
const blockedRegex: RegExp[] = config.blocked_regex || [];
for (const [i, regex] of blockedRegex.entries()) {
if (typeof regex.test !== "function") {
logger.info(
`[DEBUG] Regex <${regex}> was not a regex; index ${i} of censor.blocked_regex for guild ${pluginData.guild.name} (${pluginData.guild.id})`,
);
continue;
}
for (const regex of config.blocked_regex || []) {
// We're testing both the original content and content + attachments/embeds here so regexes that use ^ and $ still match the regular content properly
if (regex.test(savedMessage.data.content) || regex.test(messageContent)) {
const matches =
(await pluginData.state.regexRunner.exec(regex, savedMessage.data.content).catch(allowTimeout)) ||
(await pluginData.state.regexRunner.exec(regex, messageContent).catch(allowTimeout));
if (matches) {
censorMessage(pluginData, savedMessage, `blocked regex (\`${regex.source}\`) found`);
return true;
}

View file

@ -19,6 +19,9 @@ import { LogsVoiceJoinEvt, LogsVoiceLeaveEvt, LogsVoiceSwitchEvt } from "./event
import { log } from "./util/log";
import { LogType } from "../../data/LogType";
import { getLogMessage } from "./util/getLogMessage";
import { discardRegExpRunner, getRegExpRunner } from "../../regExpRunners";
import { disableCodeBlocks } from "../../utils";
import { logger } from "../../logger";
const defaultOptions: PluginOptions<LogsPluginType> = {
config: {
@ -100,6 +103,24 @@ export const LogsPlugin = zeppelinPlugin<LogsPluginType>()("logs", {
state.onMessageUpdateFn = (newMsg, oldMsg) => onMessageUpdate(pluginData, newMsg, oldMsg);
state.savedMessages.events.on("update", state.onMessageUpdateFn);
state.regexRunner = getRegExpRunner(`guild-${pluginData.guild.id}`);
state.regexRunnerTimeoutListener = (regexSource, timeoutMs) => {
logger.warn(`Heavy regex (${timeoutMs}): ${regexSource}`);
};
state.regexRunnerRepeatedTimeoutListener = (regexSource, timeoutMs, failedTimes) => {
log(pluginData, LogType.BOT_ALERT, {
body:
`
The following regex has taken longer than ${timeoutMs}ms for ${failedTimes} times and has been temporarily disabled:
`.trim() +
"\n```" +
disableCodeBlocks(regexSource) +
"```",
});
};
state.regexRunner.on("timeout", state.regexRunnerTimeoutListener);
state.regexRunner.on("repeatedTimeout", state.regexRunnerRepeatedTimeoutListener);
},
onUnload(pluginData) {
@ -108,5 +129,9 @@ export const LogsPlugin = zeppelinPlugin<LogsPluginType>()("logs", {
pluginData.state.savedMessages.events.off("delete", pluginData.state.onMessageDeleteFn);
pluginData.state.savedMessages.events.off("deleteBulk", pluginData.state.onMessageDeleteBulkFn);
pluginData.state.savedMessages.events.off("update", pluginData.state.onMessageUpdateFn);
pluginData.state.regexRunner.off("timeout", pluginData.state.regexRunnerTimeoutListener);
pluginData.state.regexRunner.off("repeatedTimeout", pluginData.state.regexRunnerRepeatedTimeoutListener);
discardRegExpRunner(`guild-${pluginData.guild.id}`);
},
});

View file

@ -1,11 +1,12 @@
import * as t from "io-ts";
import { BasePluginType, eventListener } from "knub";
import { TSafeRegex } from "src/validatorUtils";
import { TRegex } from "src/validatorUtils";
import { GuildLogs } from "src/data/GuildLogs";
import { GuildSavedMessages } from "src/data/GuildSavedMessages";
import { GuildArchives } from "src/data/GuildArchives";
import { GuildCases } from "src/data/GuildCases";
import { tMessageContent, tNullable } from "../../utils";
import { RegExpRunner } from "../../RegExpRunner";
export const tLogFormats = t.record(t.string, t.union([t.string, tMessageContent]));
export type TLogFormats = t.TypeOf<typeof tLogFormats>;
@ -16,7 +17,7 @@ const LogChannel = t.partial({
batched: t.boolean,
batch_time: t.number,
excluded_users: t.array(t.string),
excluded_message_regexes: t.array(TSafeRegex),
excluded_message_regexes: t.array(TRegex),
excluded_channels: t.array(t.string),
format: tNullable(tLogFormats),
});
@ -45,6 +46,10 @@ export interface LogsPluginType extends BasePluginType {
archives: GuildArchives;
cases: GuildCases;
regexRunner: RegExpRunner;
regexRunnerTimeoutListener;
regexRunnerRepeatedTimeoutListener;
logListener;
batches: Map<string, string[]>;

View file

@ -4,6 +4,7 @@ import { LogType } from "src/data/LogType";
import { TextChannel } from "eris";
import { createChunkedMessage, noop } from "src/utils";
import { getLogMessage } from "./getLogMessage";
import { allowTimeout } from "../../../RegExpRunner";
const excludedUserProps = ["user", "member", "mod"];
@ -45,7 +46,8 @@ export async function log(pluginData: PluginData<LogsPluginType>, type: LogType,
// If this entry contains a message with an excluded regex, skip it
if (type === LogType.MESSAGE_DELETE && opts.excluded_message_regexes && data.message.data.content) {
for (const regex of opts.excluded_message_regexes) {
if (regex.test(data.message.data.content)) {
const matches = await pluginData.state.regexRunner.exec(regex, data.message.data.content).catch(allowTimeout);
if (matches) {
continue logChannelLoop;
}
}
@ -53,7 +55,8 @@ export async function log(pluginData: PluginData<LogsPluginType>, type: LogType,
if (type === LogType.MESSAGE_EDIT && opts.excluded_message_regexes && data.before.data.content) {
for (const regex of opts.excluded_message_regexes) {
if (regex.test(data.before.data.content)) {
const matches = await pluginData.state.regexRunner.exec(regex, data.message.data.content).catch(allowTimeout);
if (matches) {
continue logChannelLoop;
}
}

View file

@ -0,0 +1,37 @@
import { RegExpRunner } from "./RegExpRunner";
interface RunnerInfo {
users: number;
runner: RegExpRunner;
}
const runners: Map<string, RunnerInfo> = new Map();
export function getRegExpRunner(key: string) {
if (!runners.has(key)) {
const runner = new RegExpRunner();
runners.set(key, {
users: 0,
runner,
});
}
const info = runners.get(key);
info.users++;
return info.runner;
}
export function discardRegExpRunner(key: string) {
if (!runners.has(key)) {
throw new Error(`No runners with key ${key}, cannot discard`);
}
const info = runners.get(key);
info.users--;
if (info.users <= 0) {
info.runner.dispose();
runners.delete(key);
}
}

View file

@ -8,23 +8,20 @@ import safeRegex from "safe-regex";
const regexWithFlags = /^\/(.*?)\/([i]*)$/;
/**
* The TSafeRegex type supports two syntaxes for regexes: /<regex>/<flags> and just <regex>
* The value is then checked for "catastrophic exponential-time regular expressions" by
* https://www.npmjs.com/package/safe-regex
* This function supports two input syntaxes for regexes: /<pattern>/<flags> and just <pattern>
*/
const safeRegexAllowedFlags = ["i"];
export const TSafeRegex = new t.Type<RegExp, string>(
"TSafeRegex",
export function inputPatternToRegExp(pattern: string) {
const advancedSyntaxMatch = pattern.match(regexWithFlags);
const [finalPattern, flags] = advancedSyntaxMatch ? [advancedSyntaxMatch[1], advancedSyntaxMatch[2]] : [pattern, ""];
return new RegExp(finalPattern, flags);
}
export const TRegex = new t.Type<RegExp, string>(
"TRegex",
(s): s is RegExp => s instanceof RegExp,
(from, to) =>
either.chain(t.string.validate(from, to), s => {
const advancedSyntaxMatch = s.match(regexWithFlags);
const [regexStr, flags] = advancedSyntaxMatch ? [advancedSyntaxMatch[1], advancedSyntaxMatch[2]] : [s, ""];
const finalFlags = flags
.split("")
.filter(flag => safeRegexAllowedFlags.includes(flag))
.join("");
return safeRegex(regexStr) ? t.success(new RegExp(regexStr, finalFlags)) : t.failure(from, to, "Unsafe regex");
return t.success(inputPatternToRegExp(s));
}),
s => `/${s.source}/${s.flags}`,
);