python · brandtbucher · Jan 11, 2022 · Jan 12, 2022 · Jan 12, 2022 · Jan 12, 2022
diff --git a/Include/opcode.h b/Include/opcode.h
diff --git a/Lib/opcode.py b/Lib/opcode.py
@@ -232,6 +232,8 @@ def jabs_op(name, op):
    "BINARY_OP_ADD_FLOAT",
    "BINARY_OP_ADD_UNICODE",
    "BINARY_OP_INPLACE_ADD_UNICODE",
+    "BINARY_OP_INPLACE_ADD_FLOAT",
+    "BINARY_OP_INPLACE_SUBTRACT_FLOAT",
    "BINARY_OP_MULTIPLY_INT",
    "BINARY_OP_MULTIPLY_FLOAT",
    "BINARY_OP_SUBTRACT_INT",

diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-13-22-50-49.bpo-46372.b00Vdn.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-13-22-50-49.bpo-46372.b00Vdn.rst
@@ -0,0 +1,2 @@
+Improve the performance of specialized :class:`float`
+operations by mutating the left operand in-place when it is safe to do so.
@@ -857,6 +857,43 @@ static const binaryfunc binary_ops[] = {
    [NB_INPLACE_XOR] = PyNumber_InPlaceXor,
 };

+// NOTE: INPLACE must be a compile-time constant to avoid runtime branching!
+#define BINARY_OP_FAST_FLOAT(OP, INPLACE)                                 \
+    do {                                                                  \
+        assert(cframe.use_tracing == 0);                                  \
+        PyObject *lhs = SECOND();                                         \
+        PyObject *rhs = TOP();                                            \
+        DEOPT_IF(!PyFloat_CheckExact(lhs), BINARY_OP);                    \
+        DEOPT_IF(!PyFloat_CheckExact(rhs), BINARY_OP);                    \
+        if (INPLACE) {                                                    \
+            assert(_Py_OPCODE(*next_instr) == STORE_FAST ||               \
+                   _Py_OPCODE(*next_instr) == STORE_FAST__LOAD_FAST);     \
+            DEOPT_IF(GETLOCAL(_Py_OPARG(*next_instr)) != lhs, BINARY_OP); \
+        }                                                                 \
+        STAT_INC(BINARY_OP, hit);                                         \
+        double l = PyFloat_AS_DOUBLE(lhs);                                \
+        double r = PyFloat_AS_DOUBLE(rhs);                                \
+        double d = l OP r;                                                \
+        STACK_SHRINK(1);                                                  \
+        Py_DECREF(rhs);                                                   \
+        if (Py_REFCNT(lhs) == 1 + !!(INPLACE)) {                          \
+            if (INPLACE) {                                                \
+                Py_DECREF(lhs);                                           \
+                STACK_SHRINK(1);                                          \
+                next_instr++;                                             \
+            }                                                             \
+            PyFloat_AS_DOUBLE(lhs) = d;                                   \
+            NOTRACE_DISPATCH();                                           \
+        }                                                                 \
+        Py_DECREF(lhs);                                                   \
+        PyObject *res = PyFloat_FromDouble(d);                            \
+        SET_TOP(res);                                                     \
+        if (res == NULL) {                                                \
+            goto error;                                                   \
+        }                                                                 \
+        NOTRACE_DISPATCH();                                               \
+    } while (0)
+

 // PEP 634: Structural Pattern Matching

@@ -1986,23 +2023,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
        }

        TARGET(BINARY_OP_MULTIPLY_FLOAT) {
-            assert(cframe.use_tracing == 0);
-            PyObject *left = SECOND();
-            PyObject *right = TOP();
-            DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
-            STAT_INC(BINARY_OP, hit);
-            double dprod = ((PyFloatObject *)left)->ob_fval *
-                ((PyFloatObject *)right)->ob_fval;
-            PyObject *prod = PyFloat_FromDouble(dprod);
-            SET_SECOND(prod);
-            Py_DECREF(right);
-            Py_DECREF(left);
-            STACK_SHRINK(1);
-            if (prod == NULL) {
-                goto error;
-            }
-            NOTRACE_DISPATCH();
+            BINARY_OP_FAST_FLOAT(*, false);
        }

        TARGET(BINARY_OP_SUBTRACT_INT) {
@@ -2024,22 +2045,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
        }

        TARGET(BINARY_OP_SUBTRACT_FLOAT) {
-            assert(cframe.use_tracing == 0);
-            PyObject *left = SECOND();
-            PyObject *right = TOP();
-            DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-            DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
-            STAT_INC(BINARY_OP, hit);
-            double dsub = ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval;
-            PyObject *sub = PyFloat_FromDouble(dsub);
-            SET_SECOND(sub);
-            Py_DECREF(right);
-            Py_DECREF(left);
-            STACK_SHRINK(1);
-            if (sub == NULL) {
-                goto error;
-            }
-            NOTRACE_DISPATCH();
+            BINARY_OP_FAST_FLOAT(-, false);
+        }
+
+        TARGET(BINARY_OP_INPLACE_SUBTRACT_FLOAT) {
+            BINARY_OP_FAST_FLOAT(-, true);
        }

        TARGET(BINARY_OP_ADD_UNICODE) {
@@ -2090,23 +2100,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
        }

        TARGET(BINARY_OP_ADD_FLOAT) {
-            assert(cframe.use_tracing == 0);
-            PyObject *left = SECOND();
-            PyObject *right = TOP();
-            DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-            DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP);
-            STAT_INC(BINARY_OP, hit);
-            double dsum = ((PyFloatObject *)left)->ob_fval +
-                ((PyFloatObject *)right)->ob_fval;
-            PyObject *sum = PyFloat_FromDouble(dsum);
-            SET_SECOND(sum);
-            Py_DECREF(right);
-            Py_DECREF(left);
-            STACK_SHRINK(1);
-            if (sum == NULL) {
-                goto error;
-            }
-            NOTRACE_DISPATCH();
+            BINARY_OP_FAST_FLOAT(+, false);
+        }
+
+        TARGET(BINARY_OP_INPLACE_ADD_FLOAT) {
+            BINARY_OP_FAST_FLOAT(+, true);
        }

        TARGET(BINARY_OP_ADD_INT) {

diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Improve the performance of specialized :class:`float`
		operations by mutating the left operand in-place when it is safe to do so.
-Original file line number
+Diff line change
@@ -857,6 +857,43 @@ static const binaryfunc binary_ops[] = {
         [NB_INPLACE_XOR] = PyNumber_InPlaceXor,
     };
+    // NOTE: INPLACE must be a compile-time constant to avoid runtime branching!
+    #define BINARY_OP_FAST_FLOAT(OP, INPLACE)                                 \
         Comment thread


        

        
      

        
          


  
    
        
        
  
    
      
          
      

      
            markshannon
  

      

      

      


        Feb 10, 2022



      
    

  

  
    
      
          
    
  


        
            
  
      
              Copy link

  
              
  
      
                Copy Markdown

  
        
      
    

    
        

  
  Member


        

    
  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      I'm not a fan of giant macros and I don't like the hidden compile time control flow.

Given that the inplace and non-inplace forms specialized forms differ in a fundamental way, I'd like that to be explicit in the instructions themselves.
Maybe have two different macros, one for the inplace form and one for the non-inplace form.

You should be able to factor out some of the common code into helper macros.

That might be clearer. Up to you.
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
              
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    


      



    
        
  
    
      
          
      

      
            brandtbucher
  

      

      

      


        Feb 11, 2022



      
    

  

  
    
      
          
    
  


        
            
  
      
              Copy link

  
              
  
      
                Copy Markdown

  
        
      
    

    
        

  
  Member


        

  Author


    
  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      Makes sense. I'll go ahead and split it up.
    
  
  



    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
              
  
    
    
      
        
            
    All reactions
+        do {                                                                  \
+            assert(cframe.use_tracing == 0);                                  \
+            PyObject *lhs = SECOND();                                         \
+            PyObject *rhs = TOP();                                            \
+            DEOPT_IF(!PyFloat_CheckExact(lhs), BINARY_OP);                    \
+            DEOPT_IF(!PyFloat_CheckExact(rhs), BINARY_OP);                    \
+            if (INPLACE) {                                                    \
+                assert(_Py_OPCODE(*next_instr) == STORE_FAST ||               \
+                       _Py_OPCODE(*next_instr) == STORE_FAST__LOAD_FAST);     \
+                DEOPT_IF(GETLOCAL(_Py_OPARG(*next_instr)) != lhs, BINARY_OP); \
+            }                                                                 \
+            STAT_INC(BINARY_OP, hit);                                         \
+            double l = PyFloat_AS_DOUBLE(lhs);                                \
+            double r = PyFloat_AS_DOUBLE(rhs);                                \
+            double d = l OP r;                                                \
+            STACK_SHRINK(1);                                                  \
+            Py_DECREF(rhs);                                                   \
+            if (Py_REFCNT(lhs) == 1 + !!(INPLACE)) {                          \
+                if (INPLACE) {                                                \
+                    Py_DECREF(lhs);                                           \
+                    STACK_SHRINK(1);                                          \
+                    next_instr++;                                             \
+                }                                                             \
+                PyFloat_AS_DOUBLE(lhs) = d;                                   \
+                NOTRACE_DISPATCH();                                           \
+            }                                                                 \
+            Py_DECREF(lhs);                                                   \
+            PyObject *res = PyFloat_FromDouble(d);                            \
+            SET_TOP(res);                                                     \
+            if (res == NULL) {                                                \
+                goto error;                                                   \
+            }                                                                 \
+            NOTRACE_DISPATCH();                                               \
+        } while (0)
     // PEP 634: Structural Pattern Matching
@@ -1986,23 +2023,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             }
             TARGET(BINARY_OP_MULTIPLY_FLOAT) {
-                assert(cframe.use_tracing == 0);
-                PyObject *left = SECOND();
-                PyObject *right = TOP();
-                DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-                DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
-                STAT_INC(BINARY_OP, hit);
-                double dprod = ((PyFloatObject *)left)->ob_fval *
-                    ((PyFloatObject *)right)->ob_fval;
-                PyObject *prod = PyFloat_FromDouble(dprod);
-                SET_SECOND(prod);
-                Py_DECREF(right);
-                Py_DECREF(left);
-                STACK_SHRINK(1);
-                if (prod == NULL) {
-                    goto error;
-                }
-                NOTRACE_DISPATCH();
+                BINARY_OP_FAST_FLOAT(*, false);
             }
             TARGET(BINARY_OP_SUBTRACT_INT) {
@@ -2024,22 +2045,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             }
             TARGET(BINARY_OP_SUBTRACT_FLOAT) {
-                assert(cframe.use_tracing == 0);
-                PyObject *left = SECOND();
-                PyObject *right = TOP();
-                DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-                DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP);
-                STAT_INC(BINARY_OP, hit);
-                double dsub = ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval;
-                PyObject *sub = PyFloat_FromDouble(dsub);
-                SET_SECOND(sub);
-                Py_DECREF(right);
-                Py_DECREF(left);
-                STACK_SHRINK(1);
-                if (sub == NULL) {
-                    goto error;
-                }
-                NOTRACE_DISPATCH();
+                BINARY_OP_FAST_FLOAT(-, false);
+            }
+            TARGET(BINARY_OP_INPLACE_SUBTRACT_FLOAT) {
+                BINARY_OP_FAST_FLOAT(-, true);
             }
             TARGET(BINARY_OP_ADD_UNICODE) {
@@ -2090,23 +2100,11 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
             }
             TARGET(BINARY_OP_ADD_FLOAT) {
-                assert(cframe.use_tracing == 0);
-                PyObject *left = SECOND();
-                PyObject *right = TOP();
-                DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP);
-                DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP);
-                STAT_INC(BINARY_OP, hit);
-                double dsum = ((PyFloatObject *)left)->ob_fval +
-                    ((PyFloatObject *)right)->ob_fval;
-                PyObject *sum = PyFloat_FromDouble(dsum);
-                SET_SECOND(sum);
-                Py_DECREF(right);
-                Py_DECREF(left);
-                STACK_SHRINK(1);
-                if (sum == NULL) {
-                    goto error;
-                }
-                NOTRACE_DISPATCH();
+                BINARY_OP_FAST_FLOAT(+, false);
+            }
+            TARGET(BINARY_OP_INPLACE_ADD_FLOAT) {
+                BINARY_OP_FAST_FLOAT(+, true);
             }
             TARGET(BINARY_OP_ADD_INT) {
-          Expand Down