Improve invite detection

Several less common patterns are now detected:
discord.gg/anything/here/<code>
discord.com/invite/<code>/anything/here
discordapp.com/invite/<code>/anything/here

Potential invite URLs are also parsed as URLs to clean out any
shenanigans related to valid-but-uncommon URL formats.
This commit is contained in:
Dragory 2021-04-28 21:12:19 +03:00
parent c26ab2977f
commit 4410f20562
No known key found for this signature in database
GPG key ID: 5F387BA66DF8AAC1

View file

@ -20,7 +20,7 @@ import {
TextChannel,
User,
} from "eris";
import url from "url";
import { URL } from "url";
import tlds from "tlds";
import emojiRegex from "emoji-regex";
import * as t from "io-ts";
@ -481,7 +481,7 @@ const plainLinkRegex = /((?!https?:\/\/)\S)+\.\S+/; // anything.anything, withou
const urlRegex = new RegExp(`(${realLinkRegex.source}|${plainLinkRegex.source})`, "g");
const protocolRegex = /^[a-z]+:\/\//;
interface MatchedURL extends url.URL {
interface MatchedURL extends URL {
input: string;
}
@ -496,7 +496,7 @@ export function getUrlsInString(str: string, onlyUnique = false): MatchedURL[] {
let matchUrl: MatchedURL;
try {
matchUrl = new url.URL(withProtocol) as MatchedURL;
matchUrl = new URL(withProtocol) as MatchedURL;
matchUrl.input = match;
} catch (e) {
return urls;
@ -520,9 +520,61 @@ export function parseInviteCodeInput(str: string): string {
return getInviteCodesInString(str)[0];
}
export function isNotNull(value): value is Exclude<typeof value, null> {
return value != null;
}
// discord.com/invite/<code>
// discordapp.com/invite/<code>
// discord.gg/invite/<code>
// discord.gg/<code>
const quickInviteDetection = /(?:discord.com|discordapp.com)\/invite\/([^\s\/#?]+)|discord.gg\/(?:\S+\/)?([^\s\/#?]+)/gi;
const isInviteHostRegex = /(?:^|\.)(?:discord.gg|discord.com|discordapp.com)$/;
const longInvitePathRegex = /^\/invite\/([^\s\/]+)$/;
export function getInviteCodesInString(str: string): string[] {
const inviteCodeRegex = /(?:discord.gg|discordapp.com\/invite|discord.com\/invite)\/([a-z0-9\-]+)/gi;
return Array.from(str.matchAll(inviteCodeRegex)).map(m => m[1]);
const inviteCodes: string[] = [];
// Clean up markdown
str = str.replace(/[|*_~]/g, "");
// Quick detection
const quickDetectionMatch = str.matchAll(quickInviteDetection);
if (quickDetectionMatch) {
inviteCodes.push(...[...quickDetectionMatch].map(m => m[1] || m[2]));
}
// Deep detection via URL parsing
const linksInString = getUrlsInString(str, true);
const potentialInviteLinks = linksInString.filter(url => isInviteHostRegex.test(url.hostname));
const withNormalizedPaths = potentialInviteLinks.map(url => {
url.pathname = url.pathname.replace(/\/{2,}/g, "/").replace(/\/+$/g, "");
return url;
});
const codesFromInviteLinks = withNormalizedPaths
.map(url => {
// discord.gg/[anything/]<code>
if (url.hostname === "discord.gg") {
const parts = url.pathname.split("/").filter(Boolean);
return parts[parts.length - 1];
}
// discord.com/invite/<code>[/anything]
// discordapp.com/invite/<code>[/anything]
const longInviteMatch = url.pathname.match(longInvitePathRegex);
if (longInviteMatch) {
return longInviteMatch[1];
}
return null;
})
.filter(Boolean) as string[];
inviteCodes.push(...codesFromInviteLinks);
return unique(inviteCodes);
}
export const unicodeEmojiRegex = emojiRegex();