sqlite
This commit is contained in:
parent
851c8d5690
commit
e7ae46b7d0
3 changed files with 75 additions and 1 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -1 +1,5 @@
|
||||||
.venv
|
.venv
|
||||||
|
.direnv
|
||||||
|
scores.db
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
58
turn-into-sqlite.py
Normal file
58
turn-into-sqlite.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
from git import Repo, RefLog
|
||||||
|
from io import BytesIO, StringIO, TextIOWrapper
|
||||||
|
import pandas as pd
|
||||||
|
from utils import to_jst_timestamp
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
db = sqlite3.connect("./scores.db")
|
||||||
|
|
||||||
|
c = db.cursor()
|
||||||
|
c.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS "scores" (
|
||||||
|
"Song ID" TEXT,
|
||||||
|
"Time Played" TIMESTAMP,
|
||||||
|
"Song Name" TEXT,
|
||||||
|
"Difficulty" TEXT,
|
||||||
|
"Rating" INTEGER,
|
||||||
|
"Score" INTEGER,
|
||||||
|
"Grade" TEXT,
|
||||||
|
"Lamp" TEXT,
|
||||||
|
"Time Uploaded" TEXT,
|
||||||
|
PRIMARY KEY ("Song ID", "Time Played")
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
|
||||||
|
repo = Repo(".")
|
||||||
|
log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n')
|
||||||
|
commits = [repo.rev_parse(commit_hash) for commit_hash in log_output]
|
||||||
|
|
||||||
|
for commit in commits:
|
||||||
|
files = commit.tree.traverse()
|
||||||
|
files = list(filter(lambda e: e.path == "data/output.csv", files))
|
||||||
|
if not files: continue
|
||||||
|
file = files[0]
|
||||||
|
data = BytesIO()
|
||||||
|
file.stream_data(data)
|
||||||
|
data.seek(0)
|
||||||
|
reader = TextIOWrapper(data, encoding='utf-8')
|
||||||
|
df = pd.read_csv(reader, delimiter='\t')
|
||||||
|
df = df[df['Time Played'].notna()]
|
||||||
|
df['Time Played'] = df['Time Played'].map(to_jst_timestamp)
|
||||||
|
df.set_index(['Song ID', 'Time Played'], inplace=True)
|
||||||
|
def insert(pd_table, conn, keys, data_iter):
|
||||||
|
data = list(data_iter)
|
||||||
|
# print(pd_table, keys, data)
|
||||||
|
query = ("""
|
||||||
|
INSERT
|
||||||
|
INTO scores
|
||||||
|
(%s)
|
||||||
|
VALUES
|
||||||
|
(%s)
|
||||||
|
ON CONFLICT("Song ID", "Time Played") DO NOTHING
|
||||||
|
""" % (
|
||||||
|
",".join(map(lambda x: repr(x), keys)),
|
||||||
|
",".join(map(lambda _: "?", keys))
|
||||||
|
))
|
||||||
|
# print(query)
|
||||||
|
conn.executemany(query, data)
|
||||||
|
df.to_sql("scores", con=db, if_exists="append", method=insert)
|
12
utils.py
Normal file
12
utils.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from datetime import datetime
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
JST = pytz.timezone("Asia/Tokyo")
|
||||||
|
CST = pytz.timezone("America/Chicago")
|
||||||
|
|
||||||
|
def to_jst_timestamp(s: str, format_str="%Y-%m-%d %H:%M:%S"):
|
||||||
|
if type(s) is not str: return None
|
||||||
|
naive_dt = datetime.strptime(s, format_str)
|
||||||
|
jst_dt = JST.localize(naive_dt)
|
||||||
|
cst_dt = jst_dt.astimezone(CST)
|
||||||
|
return cst_dt
|
Loading…
Reference in a new issue