Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Big performance problem with closed intervals looping #45222

Copy link
Copy link
@leonardo-m

Description

@leonardo-m
Issue body actions

In my code I've essentially stopped using loops with ... (intervals closed on the right, recently written with the syntax ..=) because they give performance problems. This is a simple example that shows the problem:

#![feature(inclusive_range_syntax)]
#![allow(private_no_mangle_fns)]

#[inline(never)]
#[no_mangle]
fn foo1(n: u64) -> u64 {
    let mut count = 0;
    for _ in 0 .. n {
        for j in (0 .. n + 1).rev() {
            count += j;
        }
    }
    count
}

#[inline(never)]
#[no_mangle]
fn foo2(n: u64) -> u64 {
    let mut count = 0;
    for _ in 0 .. n {
        for j in (0 ..= n).rev() {
            count += j;
        }
    }
    count
}

fn main() {
    let n: u64 = std::env::args().nth(1).unwrap().parse().unwrap();
    let what: u32 = std::env::args().nth(2).unwrap().parse().unwrap();

    match what {
        1 => println!("{}", foo1(n)),
        2 => println!("{}", foo2(n)),
        _ => panic!(),
    }
}

Compiled with the last Nightly:

rustc 1.22.0-nightly (d6d711dd8 2017-10-10)
binary: rustc
commit-hash: d6d711dd8f7ad5885294b8e1f0009a23dc1f8b1f
commit-date: 2017-10-10
host: x86_64-pc-windows-gnu
release: 1.22.0-nightly
LLVM version: 4.0

Compiled with:
rustc -O test.rs

Running it calling foo1 takes 0.02 seconds:

...>elaps test 100000 1
500005000000000

Running it calling foo2 takes about 13.65 seconds:


...>elaps test 100000 2
500005000000000
The asm I am seeing using "--emit asm"
foo1:
	testq	%rcx, %rcx
	je	.LBB5_1
	movq	%rcx, %r8
	imulq	%r8, %r8
	leaq	-1(%rcx), %rdx
	movq	%rcx, %rax
	mulq	%rdx
	shldq	$63, %rax, %rdx
	subq	%rdx, %r8
	cmpq	$3, %rcx
	jbe	.LBB5_3
	movq	%rcx, %rdx
	andq	$-4, %rdx
	je	.LBB5_3
	movd	%r8, %xmm0
	pshufd	$68, %xmm0, %xmm2
	leaq	-4(%rdx), %r9
	movl	%r9d, %eax
	shrl	$2, %eax
	incl	%eax
	andq	$3, %rax
	je	.LBB5_8
	cmpq	$-1, %rcx
	pxor	%xmm0, %xmm0
	pxor	%xmm3, %xmm3
	je	.LBB5_11
	movdqa	%xmm2, %xmm3
.LBB5_11:
	negq	%rax
	xorl	%r10d, %r10d
	pxor	%xmm1, %xmm1
	.p2align	4, 0x90
.LBB5_12:
	paddq	%xmm3, %xmm0
	paddq	%xmm3, %xmm1
	addq	$4, %r10
	incq	%rax
	jne	.LBB5_12
	jmp	.LBB5_13
.LBB5_3:
	xorl	%eax, %eax
	xorl	%edx, %edx
.LBB5_4:
	xorl	%r9d, %r9d
	cmpq	$-1, %rcx
	cmoveq	%r9, %r8
	.p2align	4, 0x90
.LBB5_5:
	incq	%rdx
	addq	%r8, %rax
	cmpq	%rcx, %rdx
	jb	.LBB5_5
.LBB5_19:
	retq
.LBB5_1:
	xorl	%eax, %eax
	retq
.LBB5_8:
	xorl	%r10d, %r10d
	pxor	%xmm0, %xmm0
	pxor	%xmm1, %xmm1
.LBB5_13:
	cmpq	$12, %r9
	jb	.LBB5_18
	cmpq	$-1, %rcx
	pxor	%xmm3, %xmm3
	je	.LBB5_16
	movdqa	%xmm2, %xmm3
.LBB5_16:
	movq	%rdx, %rax
	subq	%r10, %rax
	.p2align	4, 0x90
.LBB5_17:
	paddq	%xmm3, %xmm0
	paddq	%xmm3, %xmm1
	paddq	%xmm3, %xmm0
	paddq	%xmm3, %xmm1
	paddq	%xmm3, %xmm0
	paddq	%xmm3, %xmm1
	paddq	%xmm3, %xmm0
	paddq	%xmm3, %xmm1
	addq	$-16, %rax
	jne	.LBB5_17
.LBB5_18:
	paddq	%xmm1, %xmm0
	pshufd	$78, %xmm0, %xmm1
	paddq	%xmm0, %xmm1
	movd	%xmm1, %rax
	cmpq	%rcx, %rdx
	jne	.LBB5_4
	jmp	.LBB5_19



foo2:
	pushq	%rsi
	pushq	%rdi
	pushq	%rbx
	testq	%rcx, %rcx
	je	.LBB6_1
	testb	$1, %cl
	jne	.LBB6_4
	xorl	%eax, %eax
	xorl	%r8d, %r8d
	cmpq	$1, %rcx
	jne	.LBB6_11
	jmp	.LBB6_23
.LBB6_1:
	xorl	%eax, %eax
	jmp	.LBB6_23
.LBB6_4:
	xorl	%r8d, %r8d
	movq	$-1, %r9
	xorl	%r10d, %r10d
	movq	%rcx, %r11
	xorl	%eax, %eax
	jmp	.LBB6_5
	.p2align	4, 0x90
.LBB6_8:
	addq	%r11, %rax
	movq	%rdi, %r10
	movq	%rdx, %r11
.LBB6_5:
	cmpq	%r11, %r10
	movl	$1, %esi
	cmovbq	%r9, %rsi
	cmoveq	%r8, %rsi
	testq	%rsi, %rsi
	movl	$1, %edi
	movl	$0, %edx
	je	.LBB6_8
	cmpq	$-1, %rsi
	jne	.LBB6_9
	leaq	-1(%r11), %rdx
	movq	%r10, %rdi
	jmp	.LBB6_8
.LBB6_9:
	movl	$1, %r8d
	cmpq	$1, %rcx
	je	.LBB6_23
.LBB6_11:
	xorl	%r9d, %r9d
	movq	$-1, %r10
	.p2align	4, 0x90
.LBB6_12:
	xorl	%r11d, %r11d
	movq	%rcx, %rdx
	jmp	.LBB6_13
	.p2align	4, 0x90
.LBB6_16:
	addq	%rdx, %rax
	movq	%rbx, %r11
	movq	%rsi, %rdx
.LBB6_13:
	cmpq	%rdx, %r11
	movl	$1, %edi
	cmovbq	%r10, %rdi
	cmoveq	%r9, %rdi
	testq	%rdi, %rdi
	movl	$1, %ebx
	movl	$0, %esi
	je	.LBB6_16
	cmpq	$-1, %rdi
	jne	.LBB6_17
	leaq	-1(%rdx), %rsi
	movq	%r11, %rbx
	jmp	.LBB6_16
	.p2align	4, 0x90
.LBB6_17:
	addq	$2, %r8
	xorl	%r11d, %r11d
	movq	%rcx, %rdx
	jmp	.LBB6_18
	.p2align	4, 0x90
.LBB6_21:
	addq	%rdx, %rax
	movq	%rbx, %r11
	movq	%rsi, %rdx
.LBB6_18:
	cmpq	%rdx, %r11
	movl	$1, %edi
	cmovbq	%r10, %rdi
	cmoveq	%r9, %rdi
	testq	%rdi, %rdi
	movl	$1, %ebx
	movl	$0, %esi
	je	.LBB6_21
	cmpq	$-1, %rdi
	jne	.LBB6_22
	leaq	-1(%rdx), %rsi
	movq	%r11, %rbx
	jmp	.LBB6_21
	.p2align	4, 0x90
.LBB6_22:
	cmpq	%rcx, %r8
	jb	.LBB6_12
.LBB6_23:
	popq	%rbx
	popq	%rdi
	popq	%rsi
	retq
RagnarGrootKoerkamp, reneleonhardt and danielkrashbluss

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-enhancementCategory: An issue proposing an enhancement or a PR with one.Category: An issue proposing an enhancement or a PR with one.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchCategory: An issue highlighting optimization opportunities or PRs implementing suchI-slowIssue: Problems and improvements with respect to performance of generated code.Issue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.Relevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      Morty Proxy This is a proxified and sanitized view of the page, visit original site.