Training in < 15 Lines
import sys, os, torch, urllib.request
from torch.utils.data import DataLoader
from tempfile import TemporaryDirectory
from olm import Dataset, HFTokenizer, Trainer, LM
with TemporaryDirectory() as tmp:
urllib.request.urlretrieve("https://github.com/.../input.txt",
os.path.join(tmp, "i.txt"))
tokenizer, device = HFTokenizer("gpt2"), "cuda" if torch.cuda.is_available() else "cpu"
# Define Model
model = LM(tokenizer.vocab_size, 64, 4, 2, 33)
optimizer = torch.optim.AdamW(model.parameters(), 3e-4)
# Data & Training
dataset = Dataset(tmp, tokenizer, 32)
dataloader = DataLoader(dataset, 4)
trainer = Trainer(model, optimizer, dataloader, device, 32, use_amp=False)
losses = trainer.train(1, 10, 100)
print(f"S:{losses[0]:.4f} E:{losses[-1]:.4f} OK:{losses[-1]<losses[0]}")
Models come from olm.models. Data pipelines come from olm.data. Training orchestration lives in olm.train. Start with this structure and gradually customize any part of it.