diff --git a/.github/workflows/nvidia-smoke.yml b/.github/workflows/nvidia-smoke.yml new file mode 100644 index 00000000..1cfdbec6 --- /dev/null +++ b/.github/workflows/nvidia-smoke.yml @@ -0,0 +1,50 @@ +name: gpu-smoke +on: + push: + branches: [ nvidia-gpu-runners ] + workflow_dispatch: {} +jobs: + gpu-test: + runs-on: [self-hosted, nvidia-docker-b200-8-x86-64] + + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Show GPU info + run: | + echo "===== nvidia-smi =====" + nvidia-smi || echo "nvidia-smi not available" + echo "======================" + + - name: Run CUDA sanity test with PyTorch + run: | + python3 - << 'EOF' + import torch, time + + print("PyTorch version:", torch.__version__) + print("CUDA available:", torch.cuda.is_available()) + print("CUDA device count:", torch.cuda.device_count()) + + if not torch.cuda.is_available(): + raise SystemExit("ERROR: CUDA not available on this runner ❌") + + # list all visible GPUs + for i in range(torch.cuda.device_count()): + print(f"Device {i}: {torch.cuda.get_device_name(i)}") + + # simple GPU compute test on cuda:0 + device = torch.device("cuda:0") + a = torch.randn(4096, 4096, device=device) + b = torch.randn(4096, 4096, device=device) + + torch.cuda.synchronize() + t0 = time.time() + c = a @ b + torch.cuda.synchronize() + t1 = time.time() + + print("Matmul result shape:", tuple(c.shape)) + print(f"Matmul took {t1 - t0:.3f} sec on GPU") + print("All good ✅") + EOF diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index 5fe8aeba..238364bb 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -276,7 +276,11 @@ async def leaderboard_create_impl( # noqa: C901 forum_thread = await forum_channel.create_thread( name=leaderboard_name, content=self._leaderboard_opening_message( - leaderboard_name, date_value, definition.description + leaderboard_name, + date_value, + definition.description[:1500] + if len(definition.description) > 1500 + else definition.description, ), auto_archive_duration=10080, # 7 days )