68 lines
2 KiB
Python
68 lines
2 KiB
Python
import numpy as np
|
|
import random
|
|
import click
|
|
import pathlib
|
|
|
|
def evaluate(w, p):
|
|
result = sum(map(lambda s: s[0] * s[1], zip(w, p)))
|
|
return result
|
|
|
|
@click.command()
|
|
@click.argument('data_path')
|
|
@click.argument('label_path')
|
|
@click.argument('out_path')
|
|
def generate_test_data(data_path: str, label_path: str, out_path: str):
|
|
with open(data_path, "r") as f:
|
|
desc = f.readline().strip()
|
|
rows, dimensions = map(int, desc.split())
|
|
data = np.loadtxt(f)
|
|
print("loaded data")
|
|
|
|
with open(label_path, "r") as f:
|
|
desc = f.readline().strip()
|
|
rows = int(desc)
|
|
labels = np.loadtxt(f)
|
|
print("loaded labels")
|
|
|
|
indices = list(range(rows))
|
|
random.shuffle(indices)
|
|
split_at = int(0.7 * rows)
|
|
train_indices = indices[:split_at]
|
|
test_indices = indices[split_at:]
|
|
# print("WTF?", train_indices, test_indices)
|
|
|
|
train_data = data[train_indices,:]
|
|
train_label = labels[train_indices]
|
|
|
|
test_data = data[test_indices,:]
|
|
test_label = labels[test_indices]
|
|
|
|
out_path2 = pathlib.Path(out_path)
|
|
out_path2.mkdir(exist_ok=True, parents=True)
|
|
|
|
with open(out_path2 / "train_data.txt", "w") as f:
|
|
f.write(f"{len(train_data)} {dimensions}\n")
|
|
for row in train_data:
|
|
for i, cell in enumerate(row):
|
|
if i > 0: f.write(" ")
|
|
f.write(str(cell))
|
|
f.write("\n")
|
|
with open(out_path2 / "test_data.txt", "w") as f:
|
|
f.write(f"{len(test_data)} {dimensions}\n")
|
|
for row in test_data:
|
|
for i, cell in enumerate(row):
|
|
if i > 0: f.write(" ")
|
|
f.write(str(cell))
|
|
f.write("\n")
|
|
|
|
with open(out_path2 / "train_label.txt", "w") as f:
|
|
f.write(f"{len(train_label)}\n")
|
|
for cell in train_label:
|
|
f.write(f"{cell}\n")
|
|
with open(out_path2 / "test_label.txt", "w") as f:
|
|
f.write(f"{len(test_label)}\n")
|
|
for cell in test_label:
|
|
f.write(f"{cell}\n")
|
|
|
|
if __name__ == '__main__':
|
|
generate_test_data()
|