Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b3824a8

Browse filesBrowse files
committed
[BE] Import CUDATemplateCaller non-lazily in select_algorithm.py
Pull Request resolved: #153765 ghstack-source-id: 284959940 Differential Revision: [D74911280](https://our.internmc.facebook.com/intern/diff/D74911280/)
1 parent bb3cd46 commit b3824a8
Copy full SHA for b3824a8

File tree

Expand file treeCollapse file tree

1 file changed

+1
-10
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+1
-10
lines changed

‎torch/_inductor/select_algorithm.py

Copy file name to clipboardExpand all lines: torch/_inductor/select_algorithm.py
+1-10Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from torch._dynamo.device_interface import get_interface_for_device
3030
from torch._dynamo.testing import rand_strided
3131
from torch._dynamo.utils import counters, dynamo_timed, identity, preserve_rng_state
32+
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
3233
from torch._inductor.utils import clear_on_fresh_inductor_cache
3334
from torch.utils._filelock import FileLock
3435
from torch.utils._ordered_set import OrderedSet
@@ -1834,8 +1835,6 @@ def __call__(
18341835
precompilation_timeout_seconds: int = 60 * 60,
18351836
return_multi_template=False,
18361837
):
1837-
from .codegen.cuda.cuda_kernel import CUDATemplateCaller
1838-
18391838
# Templates selected with input_gen_fns require specific input data to avoid IMA
18401839
# Passing custom input gen fns to benchmark_fusion NYI, so skip deferred template selection
18411840
# TODO(jgong5): support multi-template on CPU
@@ -2141,10 +2140,6 @@ def wait_on_futures():
21412140
timeout=precompilation_timeout_seconds,
21422141
):
21432142
if e := future.exception():
2144-
from torch._inductor.codegen.cuda.cuda_kernel import (
2145-
CUDATemplateCaller,
2146-
)
2147-
21482143
if isinstance(e, CUDACompileError) and isinstance(
21492144
futures[future], CUDATemplateCaller
21502145
):
@@ -2263,8 +2258,6 @@ def benchmark_choices(
22632258
try:
22642259
timing = cls.benchmark_choice(choice, autotune_args)
22652260
except CUDACompileError as e:
2266-
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
2267-
22682261
if not isinstance(choice, CUDATemplateCaller):
22692262
log.error(
22702263
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
@@ -2275,8 +2268,6 @@ def benchmark_choices(
22752268
log.warning("Not yet implemented: %s", e)
22762269
timing = float("inf")
22772270
except RuntimeError as e:
2278-
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
2279-
22802271
msg = str(e)
22812272
if "invalid argument" in msg:
22822273
msg += "\n\nThis may mean this GPU is too small for max_autotune mode.\n\n"

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.