59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
|
from git import Repo, RefLog
|
||
|
from io import BytesIO, StringIO, TextIOWrapper
|
||
|
import pandas as pd
|
||
|
from utils import to_jst_timestamp
|
||
|
import sqlite3
|
||
|
|
||
|
db = sqlite3.connect("./scores.db")
|
||
|
|
||
|
c = db.cursor()
|
||
|
c.execute("""
|
||
|
CREATE TABLE IF NOT EXISTS "scores" (
|
||
|
"Song ID" TEXT,
|
||
|
"Time Played" TIMESTAMP,
|
||
|
"Song Name" TEXT,
|
||
|
"Difficulty" TEXT,
|
||
|
"Rating" INTEGER,
|
||
|
"Score" INTEGER,
|
||
|
"Grade" TEXT,
|
||
|
"Lamp" TEXT,
|
||
|
"Time Uploaded" TEXT,
|
||
|
PRIMARY KEY ("Song ID", "Time Played")
|
||
|
);
|
||
|
""")
|
||
|
|
||
|
repo = Repo(".")
|
||
|
log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n')
|
||
|
commits = [repo.rev_parse(commit_hash) for commit_hash in log_output]
|
||
|
|
||
|
for commit in commits:
|
||
|
files = commit.tree.traverse()
|
||
|
files = list(filter(lambda e: e.path == "data/output.csv", files))
|
||
|
if not files: continue
|
||
|
file = files[0]
|
||
|
data = BytesIO()
|
||
|
file.stream_data(data)
|
||
|
data.seek(0)
|
||
|
reader = TextIOWrapper(data, encoding='utf-8')
|
||
|
df = pd.read_csv(reader, delimiter='\t')
|
||
|
df = df[df['Time Played'].notna()]
|
||
|
df['Time Played'] = df['Time Played'].map(to_jst_timestamp)
|
||
|
df.set_index(['Song ID', 'Time Played'], inplace=True)
|
||
|
def insert(pd_table, conn, keys, data_iter):
|
||
|
data = list(data_iter)
|
||
|
# print(pd_table, keys, data)
|
||
|
query = ("""
|
||
|
INSERT
|
||
|
INTO scores
|
||
|
(%s)
|
||
|
VALUES
|
||
|
(%s)
|
||
|
ON CONFLICT("Song ID", "Time Played") DO NOTHING
|
||
|
""" % (
|
||
|
",".join(map(lambda x: repr(x), keys)),
|
||
|
",".join(map(lambda _: "?", keys))
|
||
|
))
|
||
|
# print(query)
|
||
|
conn.executemany(query, data)
|
||
|
df.to_sql("scores", con=db, if_exists="append", method=insert)
|