This commit is contained in:
Michael Zhang 2024-05-04 05:29:20 -05:00
parent 851c8d5690
commit e7ae46b7d0
3 changed files with 75 additions and 1 deletions

6
.gitignore vendored
View file

@ -1 +1,5 @@
.venv
.venv
.direnv
scores.db
__pycache__
*.pyc

58
turn-into-sqlite.py Normal file
View file

@ -0,0 +1,58 @@
from git import Repo, RefLog
from io import BytesIO, StringIO, TextIOWrapper
import pandas as pd
from utils import to_jst_timestamp
import sqlite3
db = sqlite3.connect("./scores.db")
c = db.cursor()
c.execute("""
CREATE TABLE IF NOT EXISTS "scores" (
"Song ID" TEXT,
"Time Played" TIMESTAMP,
"Song Name" TEXT,
"Difficulty" TEXT,
"Rating" INTEGER,
"Score" INTEGER,
"Grade" TEXT,
"Lamp" TEXT,
"Time Uploaded" TEXT,
PRIMARY KEY ("Song ID", "Time Played")
);
""")
repo = Repo(".")
log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n')
commits = [repo.rev_parse(commit_hash) for commit_hash in log_output]
for commit in commits:
files = commit.tree.traverse()
files = list(filter(lambda e: e.path == "data/output.csv", files))
if not files: continue
file = files[0]
data = BytesIO()
file.stream_data(data)
data.seek(0)
reader = TextIOWrapper(data, encoding='utf-8')
df = pd.read_csv(reader, delimiter='\t')
df = df[df['Time Played'].notna()]
df['Time Played'] = df['Time Played'].map(to_jst_timestamp)
df.set_index(['Song ID', 'Time Played'], inplace=True)
def insert(pd_table, conn, keys, data_iter):
data = list(data_iter)
# print(pd_table, keys, data)
query = ("""
INSERT
INTO scores
(%s)
VALUES
(%s)
ON CONFLICT("Song ID", "Time Played") DO NOTHING
""" % (
",".join(map(lambda x: repr(x), keys)),
",".join(map(lambda _: "?", keys))
))
# print(query)
conn.executemany(query, data)
df.to_sql("scores", con=db, if_exists="append", method=insert)

12
utils.py Normal file
View file

@ -0,0 +1,12 @@
from datetime import datetime
import pytz
JST = pytz.timezone("Asia/Tokyo")
CST = pytz.timezone("America/Chicago")
def to_jst_timestamp(s: str, format_str="%Y-%m-%d %H:%M:%S"):
if type(s) is not str: return None
naive_dt = datetime.strptime(s, format_str)
jst_dt = JST.localize(naive_dt)
cst_dt = jst_dt.astimezone(CST)
return cst_dt