3
0
Fork 0
mirror of https://github.com/ZeppelinBot/Zeppelin.git synced 2025-05-10 20:35:02 +00:00

Add username/nickname history retention periods

This commit is contained in:
Dragory 2020-06-02 00:26:06 +03:00
parent a6e650810c
commit de71520747
No known key found for this signature in database
GPG key ID: 5F387BA66DF8AAC1
8 changed files with 253 additions and 111 deletions

View file

@ -1,7 +1,22 @@
import { BaseGuildRepository } from "./BaseGuildRepository";
import { getRepository, Repository } from "typeorm";
import { getRepository, In, Repository } from "typeorm";
import { NicknameHistoryEntry } from "./entities/NicknameHistoryEntry";
import { sorter } from "../utils";
import { MINUTES, SECONDS, sorter } from "../utils";
import { MAX_USERNAME_ENTRIES_PER_USER } from "./UsernameHistory";
import { isAPI } from "../globals";
import { cleanupNicknames } from "./cleanup/nicknames";
if (!isAPI()) {
const CLEANUP_INTERVAL = 5 * MINUTES;
async function cleanup() {
await cleanupNicknames();
setTimeout(cleanup, CLEANUP_INTERVAL);
}
// Start first cleanup 30 seconds after startup
setTimeout(cleanup, 30 * SECONDS);
}
export const MAX_NICKNAME_ENTRIES_PER_USER = 10;
@ -44,25 +59,20 @@ export class GuildNicknameHistory extends BaseGuildRepository {
nickname,
});
// Cleanup (leave only the last MAX_NICKNAME_ENTRIES_PER_USER entries)
const lastEntries = await this.getByUserId(userId);
if (lastEntries.length > MAX_NICKNAME_ENTRIES_PER_USER) {
const earliestEntry = lastEntries
.sort(sorter("timestamp", "DESC"))
.slice(0, 10)
.reduce((earliest, entry) => {
if (earliest == null) return entry;
if (entry.id < earliest.id) return entry;
return earliest;
}, null);
// Cleanup (leave only the last MAX_USERNAME_ENTRIES_PER_USER entries)
const toDelete = await this.nicknameHistory
.createQueryBuilder()
.where("guild_id = :guildId", { guildId: this.guildId })
.andWhere("user_id = :userId", { userId })
.orderBy("id", "DESC")
.skip(MAX_USERNAME_ENTRIES_PER_USER)
.take(99_999)
.getMany();
this.nicknameHistory
.createQueryBuilder()
.where("guild_id = :guildId", { guildId: this.guildId })
.andWhere("user_id = :userId", { userId })
.andWhere("id < :id", { id: earliestEntry.id })
.delete()
.execute();
if (toDelete.length > 0) {
await this.nicknameHistory.delete({
id: In(toDelete.map(v => v.id)),
});
}
}
}

View file

@ -7,68 +7,16 @@ import moment from "moment-timezone";
import { DAYS, DBDateFormat, MINUTES, SECONDS } from "../utils";
import { isAPI } from "../globals";
import { connection } from "./db";
const CLEANUP_INTERVAL = 5 * MINUTES;
/**
* How long message edits, deletions, etc. will include the original message content.
* This is very heavy storage-wise, so keeping it as low as possible is ideal.
*/
const RETENTION_PERIOD = 1 * DAYS;
const BOT_MESSAGE_RETENTION_PERIOD = 30 * MINUTES;
const CLEAN_PER_LOOP = 250;
async function cleanup() {
const repo = getRepository(SavedMessage);
const deletedAtThreshold = moment()
.subtract(CLEANUP_INTERVAL, "ms")
.format(DBDateFormat);
const postedAtThreshold = moment()
.subtract(RETENTION_PERIOD, "ms")
.format(DBDateFormat);
const botPostedAtThreshold = moment()
.subtract(BOT_MESSAGE_RETENTION_PERIOD, "ms")
.format(DBDateFormat);
// SELECT + DELETE messages in batches
// This is to avoid deadlocks that happened frequently when deleting with the same criteria as the select below
// when a message was being inserted at the same time
let rows;
do {
rows = await connection.query(
`
SELECT id
FROM messages
WHERE (
deleted_at IS NOT NULL
AND deleted_at <= ?
)
OR (
posted_at <= ?
AND is_permanent = 0
)
OR (
is_bot = 1
AND posted_at <= ?
AND is_permanent = 0
)
LIMIT ${CLEAN_PER_LOOP}
`,
[deletedAtThreshold, postedAtThreshold, botPostedAtThreshold],
);
if (rows.length > 0) {
await repo.delete({
id: In(rows.map(r => r.id)),
});
}
} while (rows.length === CLEAN_PER_LOOP);
setTimeout(cleanup, CLEANUP_INTERVAL);
}
import { cleanupMessages } from "./cleanup/messages";
if (!isAPI()) {
const CLEANUP_INTERVAL = 5 * MINUTES;
async function cleanup() {
await cleanupMessages();
setTimeout(cleanup, CLEANUP_INTERVAL);
}
// Start first cleanup 30 seconds after startup
setTimeout(cleanup, 30 * SECONDS);
}

View file

@ -1,9 +1,24 @@
import { getRepository, Repository } from "typeorm";
import { getRepository, In, Repository } from "typeorm";
import { UsernameHistoryEntry } from "./entities/UsernameHistoryEntry";
import { sorter } from "../utils";
import { MINUTES, SECONDS, sorter } from "../utils";
import { BaseRepository } from "./BaseRepository";
import { connection } from "./db";
import { isAPI } from "../globals";
import { cleanupUsernames } from "./cleanup/usernames";
export const MAX_USERNAME_ENTRIES_PER_USER = 10;
if (!isAPI()) {
const CLEANUP_INTERVAL = 5 * MINUTES;
async function cleanup() {
await cleanupUsernames();
setTimeout(cleanup, CLEANUP_INTERVAL);
}
// Start first cleanup 30 seconds after startup
setTimeout(cleanup, 1 * SECONDS);
}
export const MAX_USERNAME_ENTRIES_PER_USER = 5;
export class UsernameHistory extends BaseRepository {
private usernameHistory: Repository<UsernameHistoryEntry>;
@ -43,23 +58,18 @@ export class UsernameHistory extends BaseRepository {
});
// Cleanup (leave only the last MAX_USERNAME_ENTRIES_PER_USER entries)
const lastEntries = await this.getByUserId(userId);
if (lastEntries.length > MAX_USERNAME_ENTRIES_PER_USER) {
const earliestEntry = lastEntries
.sort(sorter("timestamp", "DESC"))
.slice(0, 10)
.reduce((earliest, entry) => {
if (earliest == null) return entry;
if (entry.id < earliest.id) return entry;
return earliest;
}, null);
const toDelete = await this.usernameHistory
.createQueryBuilder()
.where("user_id = :userId", { userId })
.orderBy("id", "DESC")
.skip(MAX_USERNAME_ENTRIES_PER_USER)
.take(99_999)
.getMany();
this.usernameHistory
.createQueryBuilder()
.andWhere("user_id = :userId", { userId })
.andWhere("id < :id", { id: earliestEntry.id })
.delete()
.execute();
if (toDelete.length > 0) {
await this.usernameHistory.delete({
id: In(toDelete.map(v => v.id)),
});
}
}
}

View file

@ -0,0 +1,68 @@
import { DAYS, DBDateFormat, MINUTES } from "../../utils";
import { getRepository, In } from "typeorm";
import { SavedMessage } from "../entities/SavedMessage";
import moment from "moment-timezone";
import { connection } from "../db";
/**
* How long message edits, deletions, etc. will include the original message content.
* This is very heavy storage-wise, so keeping it as low as possible is ideal.
*/
const RETENTION_PERIOD = 1 * DAYS;
const BOT_MESSAGE_RETENTION_PERIOD = 30 * MINUTES;
const DELETED_MESSAGE_RETENTION_PERIOD = 5 * MINUTES;
const CLEAN_PER_LOOP = 250;
export async function cleanupMessages(): Promise<number> {
let cleaned = 0;
const messagesRepository = getRepository(SavedMessage);
const deletedAtThreshold = moment()
.subtract(DELETED_MESSAGE_RETENTION_PERIOD, "ms")
.format(DBDateFormat);
const postedAtThreshold = moment()
.subtract(RETENTION_PERIOD, "ms")
.format(DBDateFormat);
const botPostedAtThreshold = moment()
.subtract(BOT_MESSAGE_RETENTION_PERIOD, "ms")
.format(DBDateFormat);
// SELECT + DELETE messages in batches
// This is to avoid deadlocks that happened frequently when deleting with the same criteria as the select below
// when a message was being inserted at the same time
let rows;
do {
rows = await connection.query(
`
SELECT id
FROM messages
WHERE (
deleted_at IS NOT NULL
AND deleted_at <= ?
)
OR (
posted_at <= ?
AND is_permanent = 0
)
OR (
is_bot = 1
AND posted_at <= ?
AND is_permanent = 0
)
LIMIT ${CLEAN_PER_LOOP}
`,
[deletedAtThreshold, postedAtThreshold, botPostedAtThreshold],
);
if (rows.length > 0) {
await messagesRepository.delete({
id: In(rows.map(r => r.id)),
});
}
cleaned += rows.length;
} while (rows.length === CLEAN_PER_LOOP);
return cleaned;
}

View file

@ -0,0 +1,41 @@
import { getRepository, In } from "typeorm";
import moment from "moment-timezone";
import { NicknameHistoryEntry } from "../entities/NicknameHistoryEntry";
import { DAYS, DBDateFormat } from "../../utils";
import { connection } from "../db";
export const NICKNAME_RETENTION_PERIOD = 30 * DAYS;
const CLEAN_PER_LOOP = 500;
export async function cleanupNicknames(): Promise<number> {
let cleaned = 0;
const nicknameHistoryRepository = getRepository(NicknameHistoryEntry);
const dateThreshold = moment()
.subtract(NICKNAME_RETENTION_PERIOD, "ms")
.format(DBDateFormat);
// Clean old nicknames (NICKNAME_RETENTION_PERIOD)
let rows;
do {
rows = await connection.query(
`
SELECT id
FROM nickname_history
WHERE timestamp < ?
LIMIT ${CLEAN_PER_LOOP}
`,
[dateThreshold],
);
if (rows.length > 0) {
await nicknameHistoryRepository.delete({
id: In(rows.map(r => r.id)),
});
}
cleaned += rows.length;
} while (rows.length === CLEAN_PER_LOOP);
return cleaned;
}

View file

@ -0,0 +1,45 @@
import { getRepository, In } from "typeorm";
import moment from "moment-timezone";
import { UsernameHistoryEntry } from "../entities/UsernameHistoryEntry";
import { DAYS, DBDateFormat } from "../../utils";
import { connection } from "../db";
export const USERNAME_RETENTION_PERIOD = 30 * DAYS;
const CLEAN_PER_LOOP = 500;
export async function cleanupUsernames(): Promise<number> {
let cleaned = 0;
const usernameHistoryRepository = getRepository(UsernameHistoryEntry);
const dateThreshold = moment()
.subtract(USERNAME_RETENTION_PERIOD, "ms")
.format(DBDateFormat);
// Clean old usernames (USERNAME_RETENTION_PERIOD)
let rows;
do {
rows = await connection.query(
`
SELECT id
FROM username_history
WHERE timestamp < ?
LIMIT ${CLEAN_PER_LOOP}
`,
[dateThreshold],
);
if (rows.length > 0) {
console.log(
"ids",
rows.map(r => r.id),
);
await usernameHistoryRepository.delete({
id: In(rows.map(r => r.id)),
});
}
cleaned += rows.length;
} while (rows.length === CLEAN_PER_LOOP);
return cleaned;
}