Page MenuHomePhabricator
Paste P86473

Train 'jhu-clsp/mmBERT-base' using Trainer on GPU
ActivePublic

Authored by gkyziridis on Dec 9 2025, 1:22 PM.
$ docker run --rm --network=host -it \
--device=/dev/kfd --device=/dev/dri \
--group-add=$(getent group video | cut -d: -f3) \
--group-add=$(getent group render | cut -d: -f3) \
--ipc=host \
--security-opt seccomp=unconfined \
torch_rocm_gpu
# Python 3.11.2 (main, Apr 28 2025, 14:11:48) [GCC 12.2.0] on linux
# Type "help", "copyright", "credits" or "license" for more information.
import os
os.environ["TORCH_DISABLE_JIT"] = "1"
os.environ["TORCHINDUCTOR_DISABLE"] = "1"
os.environ["TORCH_COMPILE_DISABLE"] = "1"
import torch._dynamo
torch._dynamo.config.suppress_errors = True
import torch
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
Trainer,
TrainingArguments
)
print("PyTorch version:", torch.__version__)
# PyTorch version: 2.6.0+rocm6.1
print("CUDA/ROCm available:", torch.cuda.is_available())
# amdgpu.ids: No such file or directory
# amdgpu.ids: No such file or directory
# CUDA/ROCm available: True
print("Number of GPUs:", torch.cuda.device_count())
# Number of GPUs: 2
print("Current GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")
# Current GPU name: AMD Radeon Graphics
print(torch.version.hip)
# 6.1.40091-a8dbc0c19
# MODEL = "answerdotai/ModernBERT-base"
MODEL = "jhu-clsp/mmBERT-base"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Running on {device}")
# Running on cuda
tokenizer = AutoTokenizer.from_pretrained(MODEL)
# tokenizer_config.json: 20.8kB [00:00, 47.0MB/s]
# tokenizer.json: 2.13MB [00:00, 113MB/s]
# special_tokens_map.json: 100%|███████████████████████████████████████████████████████████████| 694/694 [00:00<00:00, 4.95MB/s]
model = AutoModelForSequenceClassification.from_pretrained(
MODEL, num_labels=2
).to(device)
# config.json: 1.19kB [00:00, 4.65MB/s]
# amdgpu.ids: No such file or directory
# amdgpu.ids: No such file or directory
# model.safetensors: 100%|████████████████████████████████████████████████████████████████████| 599M/599M [00:01<00:00, 435MB/s]
# Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly # initialized: ['classifier.bias', 'classifier.weight']
# You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
texts = [f"example sentence {i}" for i in range(2050)]
labels = [i % 2 for i in range(2050)]
batch = tokenizer(
texts,
padding=True,
truncation=True,
return_tensors="pt"
)
batch["labels"] = torch.tensor(labels)
train_data = []
for i in range(len(labels)):
train_data.append({
"input_ids": batch["input_ids"][i],
"attention_mask": batch["attention_mask"][i],
"labels": batch["labels"][i]
})
args = TrainingArguments(
output_dir="./out",
per_device_train_batch_size=2,
max_steps=1, # just 1 training step
report_to="none",
no_cuda=not torch.cuda.is_available(), # auto disable if no GPU
)
not torch.cuda.is_available()
# >>> False
not False
# >>> True
with torch.no_grad():
outputs = model(
batch["input_ids"].to(device),
attention_mask=batch["attention_mask"].to(device)
)
logits = outputs.logits.cpu()
:0:rocdevice.cpp :2881: 15722138585593 us: [pid:1 tid:0x7faccf3ff6c0] Callback: Queue 0x7faa34100000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016

Event Timeline

gkyziridis edited the content of this paste. (Show Details)
gkyziridis edited the content of this paste. (Show Details)
gkyziridis edited the content of this paste. (Show Details)
gkyziridis changed the title of this paste from Train 'answerdotai/ModernBERT-base' using Trainer on GPU to Train 'jhu-clsp/mmBERT-base' using Trainer on GPU.Dec 10 2025, 11:14 AM