diff --git a/.gitignore b/.gitignore index b694934..93f6cb6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -.venv \ No newline at end of file +.venv +.direnv +scores.db +__pycache__ +*.pyc \ No newline at end of file diff --git a/turn-into-sqlite.py b/turn-into-sqlite.py new file mode 100644 index 0000000..ca4b7d7 --- /dev/null +++ b/turn-into-sqlite.py @@ -0,0 +1,58 @@ +from git import Repo, RefLog +from io import BytesIO, StringIO, TextIOWrapper +import pandas as pd +from utils import to_jst_timestamp +import sqlite3 + +db = sqlite3.connect("./scores.db") + +c = db.cursor() +c.execute(""" +CREATE TABLE IF NOT EXISTS "scores" ( + "Song ID" TEXT, + "Time Played" TIMESTAMP, + "Song Name" TEXT, + "Difficulty" TEXT, + "Rating" INTEGER, + "Score" INTEGER, + "Grade" TEXT, + "Lamp" TEXT, + "Time Uploaded" TEXT, + PRIMARY KEY ("Song ID", "Time Played") +); +""") + +repo = Repo(".") +log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n') +commits = [repo.rev_parse(commit_hash) for commit_hash in log_output] + +for commit in commits: + files = commit.tree.traverse() + files = list(filter(lambda e: e.path == "data/output.csv", files)) + if not files: continue + file = files[0] + data = BytesIO() + file.stream_data(data) + data.seek(0) + reader = TextIOWrapper(data, encoding='utf-8') + df = pd.read_csv(reader, delimiter='\t') + df = df[df['Time Played'].notna()] + df['Time Played'] = df['Time Played'].map(to_jst_timestamp) + df.set_index(['Song ID', 'Time Played'], inplace=True) + def insert(pd_table, conn, keys, data_iter): + data = list(data_iter) + # print(pd_table, keys, data) + query = (""" + INSERT + INTO scores + (%s) + VALUES + (%s) + ON CONFLICT("Song ID", "Time Played") DO NOTHING + """ % ( + ",".join(map(lambda x: repr(x), keys)), + ",".join(map(lambda _: "?", keys)) + )) + # print(query) + conn.executemany(query, data) + df.to_sql("scores", con=db, if_exists="append", method=insert) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..dccfcc3 --- /dev/null +++ b/utils.py @@ -0,0 +1,12 @@ +from datetime import datetime +import pytz + +JST = pytz.timezone("Asia/Tokyo") +CST = pytz.timezone("America/Chicago") + +def to_jst_timestamp(s: str, format_str="%Y-%m-%d %H:%M:%S"): + if type(s) is not str: return None + naive_dt = datetime.strptime(s, format_str) + jst_dt = JST.localize(naive_dt) + cst_dt = jst_dt.astimezone(CST) + return cst_dt