ddr-scores/turn-into-sqlite.py
Michael Zhang 0f8fb69e5f
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
delete existing rows
2024-05-20 00:00:48 -05:00

60 lines
1.7 KiB
Python

from git import Repo, RefLog
from io import BytesIO, StringIO, TextIOWrapper
import pandas as pd
from utils import to_jst_timestamp
import sqlite3
db = sqlite3.connect("./scores.db")
c = db.cursor()
c.execute("""
CREATE TABLE IF NOT EXISTS "scores" (
"Song ID" TEXT,
"Time Played" TIMESTAMP,
"Song Name" TEXT,
"Difficulty" TEXT,
"Rating" INTEGER,
"Score" INTEGER,
"Grade" TEXT,
"Lamp" TEXT,
"Time Uploaded" TEXT,
PRIMARY KEY ("Song ID", "Time Played")
);
""")
c.execute("""DELETE FROM "scores";""")
repo = Repo(".")
log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n')
commits = [repo.rev_parse(commit_hash) for commit_hash in log_output]
for commit in commits:
files = commit.tree.traverse()
files = list(filter(lambda e: e.path == "data/output.csv", files))
if not files: continue
file = files[0]
data = BytesIO()
file.stream_data(data)
data.seek(0)
reader = TextIOWrapper(data, encoding='utf-8')
df = pd.read_csv(reader, delimiter='\t')
df = df[df['Time Played'].notna()]
df['Time Played'] = df['Time Played'].map(to_jst_timestamp)
df.set_index(['Song ID', 'Time Played'], inplace=True)
def insert(pd_table, conn, keys, data_iter):
data = list(data_iter)
# print(pd_table, keys, data)
query = ("""
INSERT
INTO scores
(%s)
VALUES
(%s)
ON CONFLICT("Song ID", "Time Played") DO NOTHING
""" % (
",".join(map(lambda x: repr(x), keys)),
",".join(map(lambda _: "?", keys))
))
# print(query)
conn.executemany(query, data)
df.to_sql("scores", con=db, if_exists="append", method=insert)