File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
Filter options
Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
Original file line number Diff line number Diff line change @@ -1512,6 +1512,14 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
1512
1512
i01_high = row_high % ne01;
1513
1513
}
1514
1514
}
1515
+
1516
+ // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables.
1517
+ // Removing the first assert or changing the order of the arguments causes the second assert to fail.
1518
+ // Removing both asserts results in i01_high becoming 0 which in turn results in garbage output.
1519
+ // The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be >0 (for single GPU).
1520
+ GGML_ASSERT (i01_low == 0 || g_device_count > 1 );
1521
+ GGML_ASSERT (i01_high == ne01 || g_device_count > 1 );
1522
+
1515
1523
const int64_t i01_diff = i01_high - i01_low;
1516
1524
if (i01_diff == 0 ) {
1517
1525
continue ;
@@ -1727,6 +1735,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
1727
1735
row_low -= row_low % GGML_CUDA_DMMV_Y;
1728
1736
row_high = id == g_device_count - 1 ? nrows : nrows*g_tensor_split[id + 1 ];
1729
1737
row_high -= row_high % GGML_CUDA_DMMV_Y;
1738
+ GGML_ASSERT (nrows % GGML_CUDA_DMMV_Y == 0 );
1730
1739
} else {
1731
1740
GGML_ASSERT (false );
1732
1741
}
You can’t perform that action at this time.
0 commit comments