from git import Repo, RefLog from io import BytesIO, StringIO, TextIOWrapper import pandas as pd from utils import to_jst_timestamp import sqlite3 db = sqlite3.connect("./scores.db") c = db.cursor() c.execute(""" CREATE TABLE IF NOT EXISTS "scores" ( "Song ID" TEXT, "Time Played" TIMESTAMP, "Song Name" TEXT, "Difficulty" TEXT, "Rating" INTEGER, "Score" INTEGER, "Grade" TEXT, "Lamp" TEXT, "Time Uploaded" TEXT, PRIMARY KEY ("Song ID", "Time Played") ); """) c.execute("""DELETE FROM "scores";""") repo = Repo(".") log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n') commits = [repo.rev_parse(commit_hash) for commit_hash in log_output] for commit in commits: files = commit.tree.traverse() files = list(filter(lambda e: e.path == "data/output.csv", files)) if not files: continue file = files[0] data = BytesIO() file.stream_data(data) data.seek(0) reader = TextIOWrapper(data, encoding='utf-8') df = pd.read_csv(reader, delimiter='\t') df = df[df['Time Played'].notna()] df['Time Played'] = df['Time Played'].map(to_jst_timestamp) df.set_index(['Song ID', 'Time Played'], inplace=True) def insert(pd_table, conn, keys, data_iter): data = list(data_iter) # print(pd_table, keys, data) query = (""" INSERT INTO scores (%s) VALUES (%s) ON CONFLICT("Song ID", "Time Played") DO NOTHING """ % ( ",".join(map(lambda x: repr(x), keys)), ",".join(map(lambda _: "?", keys)) )) # print(query) conn.executemany(query, data) df.to_sql("scores", con=db, if_exists="append", method=insert)