sqlite
This commit is contained in:
parent
851c8d5690
commit
e7ae46b7d0
3 changed files with 75 additions and 1 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -1 +1,5 @@
|
|||
.venv
|
||||
.direnv
|
||||
scores.db
|
||||
__pycache__
|
||||
*.pyc
|
58
turn-into-sqlite.py
Normal file
58
turn-into-sqlite.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
from git import Repo, RefLog
|
||||
from io import BytesIO, StringIO, TextIOWrapper
|
||||
import pandas as pd
|
||||
from utils import to_jst_timestamp
|
||||
import sqlite3
|
||||
|
||||
db = sqlite3.connect("./scores.db")
|
||||
|
||||
c = db.cursor()
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS "scores" (
|
||||
"Song ID" TEXT,
|
||||
"Time Played" TIMESTAMP,
|
||||
"Song Name" TEXT,
|
||||
"Difficulty" TEXT,
|
||||
"Rating" INTEGER,
|
||||
"Score" INTEGER,
|
||||
"Grade" TEXT,
|
||||
"Lamp" TEXT,
|
||||
"Time Uploaded" TEXT,
|
||||
PRIMARY KEY ("Song ID", "Time Played")
|
||||
);
|
||||
""")
|
||||
|
||||
repo = Repo(".")
|
||||
log_output = repo.git.log('--pretty=%H', '--follow', '--', 'data/output.csv').split('\n')
|
||||
commits = [repo.rev_parse(commit_hash) for commit_hash in log_output]
|
||||
|
||||
for commit in commits:
|
||||
files = commit.tree.traverse()
|
||||
files = list(filter(lambda e: e.path == "data/output.csv", files))
|
||||
if not files: continue
|
||||
file = files[0]
|
||||
data = BytesIO()
|
||||
file.stream_data(data)
|
||||
data.seek(0)
|
||||
reader = TextIOWrapper(data, encoding='utf-8')
|
||||
df = pd.read_csv(reader, delimiter='\t')
|
||||
df = df[df['Time Played'].notna()]
|
||||
df['Time Played'] = df['Time Played'].map(to_jst_timestamp)
|
||||
df.set_index(['Song ID', 'Time Played'], inplace=True)
|
||||
def insert(pd_table, conn, keys, data_iter):
|
||||
data = list(data_iter)
|
||||
# print(pd_table, keys, data)
|
||||
query = ("""
|
||||
INSERT
|
||||
INTO scores
|
||||
(%s)
|
||||
VALUES
|
||||
(%s)
|
||||
ON CONFLICT("Song ID", "Time Played") DO NOTHING
|
||||
""" % (
|
||||
",".join(map(lambda x: repr(x), keys)),
|
||||
",".join(map(lambda _: "?", keys))
|
||||
))
|
||||
# print(query)
|
||||
conn.executemany(query, data)
|
||||
df.to_sql("scores", con=db, if_exists="append", method=insert)
|
12
utils.py
Normal file
12
utils.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
from datetime import datetime
|
||||
import pytz
|
||||
|
||||
JST = pytz.timezone("Asia/Tokyo")
|
||||
CST = pytz.timezone("America/Chicago")
|
||||
|
||||
def to_jst_timestamp(s: str, format_str="%Y-%m-%d %H:%M:%S"):
|
||||
if type(s) is not str: return None
|
||||
naive_dt = datetime.strptime(s, format_str)
|
||||
jst_dt = JST.localize(naive_dt)
|
||||
cst_dt = jst_dt.astimezone(CST)
|
||||
return cst_dt
|
Loading…
Reference in a new issue