3
0
Fork 0
mirror of https://github.com/ZeppelinBot/Zeppelin.git synced 2025-03-16 14:11:50 +00:00

Improve invite detection

Several less common patterns are now detected:
discord.gg/anything/here/<code>
discord.com/invite/<code>/anything/here
discordapp.com/invite/<code>/anything/here

Potential invite URLs are also parsed as URLs to clean out any
shenanigans related to valid-but-uncommon URL formats.
This commit is contained in:
Dragory 2021-04-28 21:12:19 +03:00
parent c26ab2977f
commit 4410f20562
No known key found for this signature in database
GPG key ID: 5F387BA66DF8AAC1

View file

@ -20,7 +20,7 @@ import {
TextChannel, TextChannel,
User, User,
} from "eris"; } from "eris";
import url from "url"; import { URL } from "url";
import tlds from "tlds"; import tlds from "tlds";
import emojiRegex from "emoji-regex"; import emojiRegex from "emoji-regex";
import * as t from "io-ts"; import * as t from "io-ts";
@ -481,7 +481,7 @@ const plainLinkRegex = /((?!https?:\/\/)\S)+\.\S+/; // anything.anything, withou
const urlRegex = new RegExp(`(${realLinkRegex.source}|${plainLinkRegex.source})`, "g"); const urlRegex = new RegExp(`(${realLinkRegex.source}|${plainLinkRegex.source})`, "g");
const protocolRegex = /^[a-z]+:\/\//; const protocolRegex = /^[a-z]+:\/\//;
interface MatchedURL extends url.URL { interface MatchedURL extends URL {
input: string; input: string;
} }
@ -496,7 +496,7 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] {
let matchUrl: MatchedURL; let matchUrl: MatchedURL;
try { try {
matchUrl = new url.URL(withProtocol) as MatchedURL; matchUrl = new URL(withProtocol) as MatchedURL;
matchUrl.input = match; matchUrl.input = match;
} catch (e) { } catch (e) {
return urls; return urls;
@ -520,9 +520,61 @@ export function parseInviteCodeInput(str: string): string {
return getInviteCodesInString(str)[0]; return getInviteCodesInString(str)[0];
} }
export function isNotNull(value): value is Exclude<typeof value, null> {
return value != null;
}
// discord.com/invite/<code>
// discordapp.com/invite/<code>
// discord.gg/invite/<code>
// discord.gg/<code>
const quickInviteDetection = /(?:discord.com|discordapp.com)\/invite\/([^\s\/#?]+)|discord.gg\/(?:\S+\/)?([^\s\/#?]+)/gi;
const isInviteHostRegex = /(?:^|\.)(?:discord.gg|discord.com|discordapp.com)$/;
const longInvitePathRegex = /^\/invite\/([^\s\/]+)$/;
export function getInviteCodesInString(str: string): string[] { export function getInviteCodesInString(str: string): string[] {
const inviteCodeRegex = /(?:discord.gg|discordapp.com\/invite|discord.com\/invite)\/([a-z0-9\-]+)/gi; const inviteCodes: string[] = [];
return Array.from(str.matchAll(inviteCodeRegex)).map(m => m[1]);
// Clean up markdown
str = str.replace(/[|*_~]/g, "");
// Quick detection
const quickDetectionMatch = str.matchAll(quickInviteDetection);
if (quickDetectionMatch) {
inviteCodes.push(...[...quickDetectionMatch].map(m => m[1] || m[2]));
}
// Deep detection via URL parsing
const linksInString = getUrlsInString(str, true);
const potentialInviteLinks = linksInString.filter(url => isInviteHostRegex.test(url.hostname));
const withNormalizedPaths = potentialInviteLinks.map(url => {
url.pathname = url.pathname.replace(/\/{2,}/g, "/").replace(/\/+$/g, "");
return url;
});
const codesFromInviteLinks = withNormalizedPaths
.map(url => {
// discord.gg/[anything/]<code>
if (url.hostname === "discord.gg") {
const parts = url.pathname.split("/").filter(Boolean);
return parts[parts.length - 1];
}
// discord.com/invite/<code>[/anything]
// discordapp.com/invite/<code>[/anything]
const longInviteMatch = url.pathname.match(longInvitePathRegex);
if (longInviteMatch) {
return longInviteMatch[1];
}
return null;
})
.filter(Boolean) as string[];
inviteCodes.push(...codesFromInviteLinks);
return unique(inviteCodes);
} }
export const unicodeEmojiRegex = emojiRegex(); export const unicodeEmojiRegex = emojiRegex();