This commit is contained in:
Michael Zhang 2024-05-30 12:22:44 -05:00
commit d9eefa17d4
14 changed files with 535 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
layout python3

6
.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
node_modules
# Keep environment variables out of version control
.env
.venv
prisma/dev.db

17
biome.json Normal file
View file

@ -0,0 +1,17 @@
{
"$schema": "https://biomejs.dev/schemas/1.7.3/schema.json",
"organizeImports": {
"enabled": true
},
"formatter": {
"enabled": true,
"indentStyle": "space",
"indentWidth": 2
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
}
}

BIN
bun.lockb Executable file

Binary file not shown.

218
main.ts Normal file
View file

@ -0,0 +1,218 @@
import "dotenv/config";
import Datastore from "nedb";
import WebSocket from "ws";
import { createInterface } from "readline";
import { Channel, Client, Score } from "osu-web.js";
import { PrismaClient } from "@prisma/client";
const prisma = new PrismaClient();
const redirectUri = "http://localhost:3000/auth/callback";
const db = {
scores: new Datastore({ filename: "data/scores.db", autoload: true }),
transitions: new Datastore({
filename: "data/transitions.db",
autoload: true,
}),
};
db.scores.ensureIndex({ fieldName: "key", unique: true });
db.scores.persistence.setAutocompactionInterval(5000);
// await new Promise((resolve) => db.loadDatabase(resolve));
async function getUserToken() {
const rlInterface = createInterface({
input: process.stdin,
output: process.stdout,
});
let params = new URLSearchParams();
params.set("client_id", process.env.OSU_CLIENT_ID);
params.set("redirect_uri", redirectUri);
params.set("response_type", "code");
params.set(
"scope",
[
"chat.read",
"chat.write",
"chat.write_manage",
"friends.read",
"identify",
"public",
].join(" "),
);
console.log(`https://osu.ppy.sh/oauth/authorize?${params.toString()}`);
const answer = await new Promise((resolve) => {
rlInterface.question("Code: ", (answer) => {
resolve(answer);
rlInterface.close();
});
});
params = new URLSearchParams();
params.set("client_id", process.env.OSU_CLIENT_ID);
params.set("client_secret", process.env.OSU_CLIENT_SECRET);
params.set("code", answer);
params.set("grant_type", "authorization_code");
params.set("redirect_uri", redirectUri);
const resp = await fetch("https://osu.ppy.sh/oauth/token", {
method: "POST",
headers: { "Content-Type": "application/x-www-form-urlencoded" },
body: params.toString(),
});
const data = await resp.json();
console.log("data", data);
return data.access_token;
}
const token = process.env.OSU_TOKEN ?? (await getUserToken());
const headers = { Authorization: `Bearer ${token}` };
async function fetchApi(url: string, init?: RequestInit) {
const headers = {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
};
const resp = await fetch(`https://osu.ppy.sh/api/v2${url}`, {
headers,
...(init ?? {}),
});
const data = await resp.json();
return data;
}
const client = new Client(token);
function sleep(time): Promise<void> {
return new Promise((resolve) => {
setTimeout(() => resolve(), time);
});
}
// async function listen() {
// const url = "https://osu.ppy.sh/api/v2/notifications";
// const ws = new WebSocket(url, [], { headers });
// ws.on("message", (data) => {
// console.log("data", data.toString());
// });
// ws.send(JSON.stringify({ event: "chat.start" }));
// }
async function ensureBeatmap(beatmap, beatmapSet) {
try {
{
const {
id,
artist,
artist_unicode: artistUnicode,
title,
title_unicode: titleUnicode,
ranked,
} = beatmapSet;
const add = {
artist,
artistUnicode,
title,
titleUnicode,
ranked: ranked === 1,
};
await prisma.beatmapSet.upsert({
where: { id },
create: { ...add, id },
update: add,
});
}
{
const { id, version: difficulty, ...rest } = beatmap;
const add = { difficulty, beatmapset_id: beatmapSet.id };
await prisma.beatmap.upsert({
where: { id },
create: { ...add, id },
update: add,
});
}
} catch (e) {
console.log("failed on", beatmapSet.id, beatmap.id, e.message);
}
}
async function scrapeUser(userId) {
const scores: Score[] = await fetchApi(
`/users/${userId}/scores/recent?include_fails=1&limit=50&mode=osu`,
);
if (!Array.isArray(scores)) return;
const newScores = await Promise.all(
scores.map(async (score) => {
const core = {
user_id: score.user_id,
beatmap_id: score.beatmap.id,
created_at: score.created_at,
score: score.score,
};
const add = {
beatmapset_id: score.beatmapset.id,
accuracy: score.accuracy,
score_id: score.id,
best_id: score.best_id,
};
await ensureBeatmap(score.beatmap, score.beatmapset);
return await prisma.score.upsert({
where: { user_id_beatmap_id_created_at_score: core },
create: { ...core, ...add },
update: { ...add },
});
}),
);
newScores.sort((a, b) => a.created_at.getTime() - b.created_at.getTime());
for (let i = 1; i < newScores.length; ++i) {
const prevScore = newScores[i - 1];
const currScore = newScores[i];
const msBetween =
currScore.created_at.getTime() - prevScore.created_at.getTime();
const core = { before_id: prevScore.id, after_id: currScore.id };
const add = { ms_between: msBetween, user_id: currScore.user_id };
await prisma.transition.upsert({
where: { before_id_after_id: core },
create: { ...core, ...add },
update: add,
});
}
}
async function scrapeSingle(channelId) {
const messages =
(await fetchApi(`/chat/channels/${channelId}/messages?limit=50`)) ?? [];
if (!Array.isArray(messages)) return;
const userIds = messages
.map((msg) => msg.sender_id)
.filter((id) => Number.isInteger(id));
await Promise.all(userIds.map((userId) => scrapeUser(userId)));
}
async function scrapeChannels() {
const channels: Channel[] = await fetchApi("/chat/channels");
// // biome-ignore lint/style/noNonNullAssertion: <explanation>
// const osuChannel = channels.find((channel) => channel.name === "#osu")!;
// const { channel_id: osuChannelId } = osuChannel;
await Promise.all(
channels.map((channel) => scrapeSingle(channel.channel_id)),
);
}
async function mainLoop() {
while (true) {
await scrapeChannels();
await sleep(10000);
}
}
mainLoop();

13
package.json Normal file
View file

@ -0,0 +1,13 @@
{
"dependencies": {
"@prisma/client": "^5.14.0",
"dotenv": "^16.4.5",
"nedb": "^1.8.0",
"osu-web.js": "^2.4.0"
},
"devDependencies": {
"@types/nedb": "^1.8.16",
"@types/ws": "^8.5.10",
"prisma": "^5.14.0"
}
}

View file

@ -0,0 +1,25 @@
-- CreateTable
CREATE TABLE "Score" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"accuracy" REAL NOT NULL,
"best_id" INTEGER,
"created_at" DATETIME NOT NULL,
"score_id" INTEGER,
"score" INTEGER NOT NULL,
"beatmap_id" INTEGER NOT NULL,
"beatmapset_id" INTEGER NOT NULL,
"user_id" INTEGER NOT NULL
);
-- CreateTable
CREATE TABLE "Transition" (
"before_id" INTEGER NOT NULL,
"after_id" INTEGER NOT NULL,
PRIMARY KEY ("before_id", "after_id"),
CONSTRAINT "Transition_before_id_fkey" FOREIGN KEY ("before_id") REFERENCES "Score" ("id") ON DELETE RESTRICT ON UPDATE CASCADE,
CONSTRAINT "Transition_after_id_fkey" FOREIGN KEY ("after_id") REFERENCES "Score" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);
-- CreateIndex
CREATE UNIQUE INDEX "Score_user_id_beatmap_id_created_at_score_key" ON "Score"("user_id", "beatmap_id", "created_at", "score");

View file

@ -0,0 +1,26 @@
/*
Warnings:
- You are about to alter the column `best_id` on the `Score` table. The data in that column could be lost. The data in that column will be cast from `Int` to `BigInt`.
- You are about to alter the column `score_id` on the `Score` table. The data in that column could be lost. The data in that column will be cast from `Int` to `BigInt`.
*/
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_Score" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"accuracy" REAL NOT NULL,
"best_id" BIGINT,
"created_at" DATETIME NOT NULL,
"score_id" BIGINT,
"score" INTEGER NOT NULL,
"beatmap_id" INTEGER NOT NULL,
"beatmapset_id" INTEGER NOT NULL,
"user_id" INTEGER NOT NULL
);
INSERT INTO "new_Score" ("accuracy", "beatmap_id", "beatmapset_id", "best_id", "created_at", "id", "score", "score_id", "user_id") SELECT "accuracy", "beatmap_id", "beatmapset_id", "best_id", "created_at", "id", "score", "score_id", "user_id" FROM "Score";
DROP TABLE "Score";
ALTER TABLE "new_Score" RENAME TO "Score";
CREATE UNIQUE INDEX "Score_user_id_beatmap_id_created_at_score_key" ON "Score"("user_id", "beatmap_id", "created_at", "score");
PRAGMA foreign_key_check("Score");
PRAGMA foreign_keys=ON;

View file

@ -0,0 +1,22 @@
/*
Warnings:
- Added the required column `ms_between` to the `Transition` table without a default value. This is not possible if the table is not empty.
*/
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_Transition" (
"before_id" INTEGER NOT NULL,
"after_id" INTEGER NOT NULL,
"ms_between" BIGINT NOT NULL,
PRIMARY KEY ("before_id", "after_id"),
CONSTRAINT "Transition_before_id_fkey" FOREIGN KEY ("before_id") REFERENCES "Score" ("id") ON DELETE RESTRICT ON UPDATE CASCADE,
CONSTRAINT "Transition_after_id_fkey" FOREIGN KEY ("after_id") REFERENCES "Score" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);
INSERT INTO "new_Transition" ("after_id", "before_id") SELECT "after_id", "before_id" FROM "Transition";
DROP TABLE "Transition";
ALTER TABLE "new_Transition" RENAME TO "Transition";
PRAGMA foreign_key_check("Transition");
PRAGMA foreign_keys=ON;

View file

@ -0,0 +1,58 @@
/*
Warnings:
- Added the required column `user_id` to the `Transition` table without a default value. This is not possible if the table is not empty.
*/
-- CreateTable
CREATE TABLE "Beatmap" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"difficulty" TEXT NOT NULL
);
-- CreateTable
CREATE TABLE "BeatmapSet" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"artist" TEXT NOT NULL,
"artistUnicode" TEXT NOT NULL,
"title" TEXT NOT NULL,
"titleUnicode" TEXT NOT NULL,
"genre" INTEGER NOT NULL,
"language" INTEGER NOT NULL
);
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_Transition" (
"before_id" INTEGER NOT NULL,
"after_id" INTEGER NOT NULL,
"user_id" INTEGER NOT NULL,
"ms_between" BIGINT NOT NULL,
PRIMARY KEY ("before_id", "after_id"),
CONSTRAINT "Transition_before_id_fkey" FOREIGN KEY ("before_id") REFERENCES "Score" ("id") ON DELETE RESTRICT ON UPDATE CASCADE,
CONSTRAINT "Transition_after_id_fkey" FOREIGN KEY ("after_id") REFERENCES "Score" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);
INSERT INTO "new_Transition" ("after_id", "before_id", "ms_between") SELECT "after_id", "before_id", "ms_between" FROM "Transition";
DROP TABLE "Transition";
ALTER TABLE "new_Transition" RENAME TO "Transition";
CREATE TABLE "new_Score" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"accuracy" REAL NOT NULL,
"best_id" BIGINT,
"created_at" DATETIME NOT NULL,
"score_id" BIGINT,
"score" INTEGER NOT NULL,
"beatmap_id" INTEGER NOT NULL,
"beatmapset_id" INTEGER NOT NULL,
"user_id" INTEGER NOT NULL,
CONSTRAINT "Score_beatmap_id_fkey" FOREIGN KEY ("beatmap_id") REFERENCES "Beatmap" ("id") ON DELETE RESTRICT ON UPDATE CASCADE,
CONSTRAINT "Score_beatmapset_id_fkey" FOREIGN KEY ("beatmapset_id") REFERENCES "BeatmapSet" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);
INSERT INTO "new_Score" ("accuracy", "beatmap_id", "beatmapset_id", "best_id", "created_at", "id", "score", "score_id", "user_id") SELECT "accuracy", "beatmap_id", "beatmapset_id", "best_id", "created_at", "id", "score", "score_id", "user_id" FROM "Score";
DROP TABLE "Score";
ALTER TABLE "new_Score" RENAME TO "Score";
CREATE UNIQUE INDEX "Score_user_id_beatmap_id_created_at_score_key" ON "Score"("user_id", "beatmap_id", "created_at", "score");
PRAGMA foreign_key_check("Transition");
PRAGMA foreign_key_check("Score");
PRAGMA foreign_keys=ON;

View file

@ -0,0 +1,34 @@
/*
Warnings:
- You are about to drop the column `genre` on the `BeatmapSet` table. All the data in the column will be lost.
- You are about to drop the column `language` on the `BeatmapSet` table. All the data in the column will be lost.
- Added the required column `beatmapset_id` to the `Beatmap` table without a default value. This is not possible if the table is not empty.
- Added the required column `ranked` to the `BeatmapSet` table without a default value. This is not possible if the table is not empty.
*/
-- RedefineTables
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_Beatmap" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"difficulty" TEXT NOT NULL,
"beatmapset_id" INTEGER NOT NULL,
CONSTRAINT "Beatmap_beatmapset_id_fkey" FOREIGN KEY ("beatmapset_id") REFERENCES "BeatmapSet" ("id") ON DELETE RESTRICT ON UPDATE CASCADE
);
INSERT INTO "new_Beatmap" ("difficulty", "id") SELECT "difficulty", "id" FROM "Beatmap";
DROP TABLE "Beatmap";
ALTER TABLE "new_Beatmap" RENAME TO "Beatmap";
CREATE TABLE "new_BeatmapSet" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"artist" TEXT NOT NULL,
"artistUnicode" TEXT NOT NULL,
"title" TEXT NOT NULL,
"titleUnicode" TEXT NOT NULL,
"ranked" BOOLEAN NOT NULL
);
INSERT INTO "new_BeatmapSet" ("artist", "artistUnicode", "id", "title", "titleUnicode") SELECT "artist", "artistUnicode", "id", "title", "titleUnicode" FROM "BeatmapSet";
DROP TABLE "BeatmapSet";
ALTER TABLE "new_BeatmapSet" RENAME TO "BeatmapSet";
PRAGMA foreign_key_check("Beatmap");
PRAGMA foreign_key_check("BeatmapSet");
PRAGMA foreign_keys=ON;

View file

@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (i.e. Git)
provider = "sqlite"

64
prisma/schema.prisma Normal file
View file

@ -0,0 +1,64 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "sqlite"
url = env("DATABASE_URL")
}
model Beatmap {
id Int @id
difficulty String
beatmapset BeatmapSet @relation(fields: [beatmapset_id], references: [id])
beatmapset_id Int
scores Score[]
}
model BeatmapSet {
id Int @id
artist String
artistUnicode String
title String
titleUnicode String
ranked Boolean
scores Score[]
beatmaps Beatmap[]
}
model Score {
id Int @id @default(autoincrement())
accuracy Float
best_id BigInt?
created_at DateTime
score_id BigInt?
score Int
beatmap Beatmap @relation(fields: [beatmap_id], references: [id])
beatmap_id Int
beatmapset BeatmapSet @relation(fields: [beatmapset_id], references: [id])
beatmapset_id Int
user_id Int
transition_from Transition[] @relation("before")
transition_to Transition[] @relation("after")
@@unique([user_id, beatmap_id, created_at, score])
}
model Transition {
before Score @relation("before", fields: [before_id], references: [id])
before_id Int
after Score @relation("after", fields: [after_id], references: [id])
after_id Int
user_id Int
ms_between BigInt
@@id([before_id, after_id])
}

48
test.py Normal file
View file

@ -0,0 +1,48 @@
from surprise import Dataset, SVD
import numpy as np
import pandas as pd
import sqlite3
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
c = sqlite3.connect("./prisma/dev.db")
df = pd.read_sql_query(
"""
SELECT
s1.user_id as user_id,
s1.beatmap_id as before_beatmap_id,
s2.beatmap_id as after_beatmap_id,
ms_between
FROM Transition as t
JOIN Score as s1 ON s1.id = t.before_id
JOIN Score as s2 ON s2.id = t.after_id
""",
c,
)
print(df)
# Beatmaps with most data:
"""
SELECT
beatmapset_id, artist, title, COUNT(*) as count
FROM Score
JOIN BeatmapSet ON Score.beatmapset_id = BeatmapSet.id
GROUP BY beatmapset_id
ORDER BY count DESC;
"""
# Given a specific beatmap, what maps do they go on to
"""
SELECT
bs1.artist, bs1.title, b1.difficulty, bs2.artist, bs2.title, b2.difficulty, COUNT(*) as count
FROM Transition
JOIN Score as s1 ON s1.id = Transition.before_id
JOIN Score as s2 ON s2.id = Transition.after_id
JOIN Beatmap as b1 on s1.beatmap_id = b1.id
JOIN BeatmapSet as bs1 on s1.beatmapset_id = bs1.id
JOIN Beatmap as b2 on s2.beatmap_id = b2.id
JOIN BeatmapSet as bs2 on s2.beatmapset_id = bs2.id
WHERE s1.beatmapset_id = 320118
GROUP BY s2.beatmap_id
ORDER BY count DESC;
"""