Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Merged
2,536 changes: 1,280 additions & 1,256 deletions 2,536 Include/internal/pycore_uop_ids.h

Large diffs are not rendered by default.

114 changes: 114 additions & 0 deletions 114 Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

148 changes: 148 additions & 0 deletions 148 Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,6 +3390,154 @@ def testfunc(args):
uops = get_opnames(ex)
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

def test_int_add_inplace_unique_lhs(self):
# a * b produces a unique compact int; adding c reuses it in place
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += a * b + c
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 10000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_INT_INPLACE", uops)

def test_int_add_inplace_unique_rhs(self):
# a * b produces a unique compact int on the right side of +
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += c + a * b
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 10000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)

def test_int_add_no_inplace_non_unique(self):
# Both operands of a + b are locals — neither is unique,
# so the first add uses the regular op. But total += (a+b)
# has a unique RHS (result of a+b), so it uses _INPLACE_RIGHT.
def testfunc(args):
a, b, n = args
total = 0
for _ in range(n):
total += a + b
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 5000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# a + b: both are locals, no inplace
self.assertIn("_BINARY_OP_ADD_INT", uops)
# total += result: result is unique RHS
self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)
# No LHS inplace variant for the first add
self.assertNotIn("_BINARY_OP_ADD_INT_INPLACE", uops)

def test_int_add_inplace_small_int_result(self):
# When the result is a small int, the inplace path falls back
# to _PyCompactLong_Add. Verify correctness (no singleton corruption).
def testfunc(args):
a, b, n = args
total = 0
for _ in range(n):
total += a * b + 1 # a*b=6, +1=7, small int
return total

res, ex = self._run_with_optimizer(testfunc, (2, 3, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 7)
# Verify small int singletons are not corrupted
self.assertEqual(7, 3 + 4)

def test_int_subtract_inplace_unique_lhs(self):
# a * b produces a unique compact int; subtracting c reuses it
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += a * b - c
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 1000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 5000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE", uops)

def test_int_subtract_inplace_unique_rhs(self):
# a * b produces a unique compact int on the right of -
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += c - a * b
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 10000, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 4000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT", uops)

def test_int_multiply_inplace_unique_lhs(self):
# (a + b) produces a unique compact int; multiplying by c reuses it
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += (a + b) * c
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 20000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE", uops)

def test_int_multiply_inplace_unique_rhs(self):
# (a + b) produces a unique compact int on the right side of *
def testfunc(args):
a, b, c, n = args
total = 0
for _ in range(n):
total += c * (a + b)
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 20000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT", uops)

def test_int_inplace_chain_propagation(self):
# a * b + c * d: both products are unique, the + reuses one;
# result of + is also unique for the subsequent +=
def testfunc(args):
a, b, c, d, n = args
total = 0
for _ in range(n):
total += a * b + c * d
return total

res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, 5, TIER2_THRESHOLD))
self.assertEqual(res, TIER2_THRESHOLD * 26000)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
inplace_add = (
"_BINARY_OP_ADD_INT_INPLACE" in uops
or "_BINARY_OP_ADD_INT_INPLACE_RIGHT" in uops
)
self.assertTrue(inplace_add,
"Expected an inplace add for unique intermediate results")

def test_load_attr_instance_value(self):
def testfunc(n):
class C():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Optimize compact integer arithmetic in the JIT by mutating
uniquely-referenced operands in place, avoiding allocation of a new int
object. Speeds up the pyperformance ``spectral_norm`` benchmark by ~10%.
57 changes: 57 additions & 0 deletions 57 Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,63 @@ dummy_func(
macro(BINARY_OP_SUBTRACT_INT) =
_GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT + _POP_TOP_INT + _POP_TOP_INT;

// Inplace compact int ops: mutate the uniquely-referenced operand
// when possible. The op handles decref of TARGET internally so
// the following _POP_TOP_INT becomes _POP_TOP_NOP. Tier 2 only.
tier2 op(_BINARY_OP_ADD_INT_INPLACE, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, left, +, _PyCompactLong_Add);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be ERROR_IF instead? The only way this can be null after the compactlong_add operation is that it fails?

Same for below.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The non-inplace _BINARY_OP_ADD_INT uses EXIT_IF as well. The _PyCompactLong_Add can error for two reasons: OOM and the result of the add being non-compact (e.g. requiring more than one digit). I think for the latter we want the EXIT_IF?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh yeah that's fine then ok. thanks!

res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, left, -, _PyCompactLong_Subtract);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, left, *, _PyCompactLong_Multiply);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_ADD_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, right, +, _PyCompactLong_Add);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, right, -, _PyCompactLong_Subtract);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
INT_INPLACE_OP(left, right, right, *, _PyCompactLong_Multiply);
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
res = _int_inplace_res;
l = left;
r = right;
INPUTS_DEAD();
}

op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
EXIT_IF(!PyFloat_CheckExact(left_o));
Expand Down
Loading
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.