diff --git a/Cargo.lock b/Cargo.lock
index 9a017ab9c15..c8d03427088 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -328,18 +328,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "cpython"
-version = "0.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3052106c29da7390237bc2310c1928335733b286287754ea85e6093d2495280e"
-dependencies = [
- "libc",
- "num-traits",
- "paste",
- "python3-sys",
-]
-
 [[package]]
 name = "cranelift"
 version = "0.88.2"
@@ -978,6 +966,12 @@ dependencies = [
  "hashbrown",
 ]
 
+[[package]]
+name = "indoc"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8"
+
 [[package]]
 name = "insta"
 version = "1.33.0"
@@ -1686,13 +1680,64 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fe7765e19fb2ba6fd4373b8d90399f5321683ea7c11b598c6bbaa3a72e9c83b8"
 
 [[package]]
-name = "python3-sys"
-version = "0.7.1"
+name = "pyo3"
+version = "0.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49f8b50d72fb3015735aa403eebf19bbd72c093bfeeae24ee798be5f2f1aab52"
+checksum = "9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0"
 dependencies = [
+ "cfg-if",
+ "indoc",
  "libc",
- "regex",
+ "memoffset 0.9.0",
+ "parking_lot",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f738b4e40d50b5711957f142878cfa0f28e054aa0ebdfc3fd137a843f74ed3"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn 2.0.32",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fc910d4851847827daf9d6cdd4a823fbdaab5b8818325c5e97a86da79e8881f"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1902,7 +1947,6 @@ dependencies = [
  "atty",
  "cfg-if",
  "clap",
- "cpython",
  "criterion",
  "dirs-next",
  "env_logger",
@@ -1910,7 +1954,7 @@ dependencies = [
  "flamescope",
  "libc",
  "log",
- "python3-sys",
+ "pyo3",
  "rustpython-compiler",
  "rustpython-parser",
  "rustpython-pylib",
@@ -2932,6 +2976,12 @@ dependencies = [
  "time",
 ]
 
+[[package]]
+name = "unindent"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.0"
diff --git a/Cargo.toml b/Cargo.toml
index b7b3db3dc87..541a5aa3f37 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -41,7 +41,6 @@ rustpython-format = { git = "https://github.com/RustPython/Parser.git", rev = "2
 # rustpython-format = { path = "../RustPython-parser/format" }
 
 ahash = "0.8.3"
-anyhow = "1.0.45"
 ascii = "1.0"
 atty = "0.2.14"
 bitflags = "2.4.0"
@@ -118,9 +117,8 @@ libc = { workspace = true }
 rustyline = { workspace = true }
 
 [dev-dependencies]
-cpython = "0.7.0"
-criterion = "0.3.5"
-python3-sys = "0.7.1"
+criterion = { version = "0.3.5", features = ["html_reports"] }
+pyo3 = { version = "0.20.2", features = ["auto-initialize"] }
 
 [[bench]]
 name = "execution"
diff --git a/benches/execution.rs b/benches/execution.rs
index 14fadfc2a58..de81cd08098 100644
--- a/benches/execution.rs
+++ b/benches/execution.rs
@@ -1,30 +1,32 @@
 use criterion::measurement::WallTime;
 use criterion::{
-    criterion_group, criterion_main, Bencher, BenchmarkGroup, BenchmarkId, Criterion, Throughput,
+    black_box, criterion_group, criterion_main, Bencher, BenchmarkGroup, BenchmarkId, Criterion,
+    Throughput,
 };
 use rustpython_compiler::Mode;
 use rustpython_parser::ast;
 use rustpython_parser::Parse;
-use rustpython_vm::{Interpreter, PyResult};
+use rustpython_vm::{Interpreter, PyResult, Settings};
 use std::collections::HashMap;
 use std::path::Path;
 
 fn bench_cpython_code(b: &mut Bencher, source: &str) {
-    let gil = cpython::Python::acquire_gil();
-    let python = gil.python();
-
-    b.iter(|| {
-        let res: cpython::PyResult<()> = python.run(source, None, None);
-        if let Err(e) = res {
-            e.print(python);
-            panic!("Error running source")
-        }
-    });
+    pyo3::Python::with_gil(|py| {
+        b.iter(|| {
+            let module =
+                pyo3::types::PyModule::from_code(py, source, "", "").expect("Error running source");
+            black_box(module);
+        })
+    })
 }
 
 fn bench_rustpy_code(b: &mut Bencher, name: &str, source: &str) {
     // NOTE: Take long time.
-    Interpreter::without_stdlib(Default::default()).enter(|vm| {
+    let mut settings = Settings::default();
+    settings.path_list.push("Lib/".to_string());
+    settings.dont_write_bytecode = true;
+    settings.no_user_site = true;
+    Interpreter::without_stdlib(settings).enter(|vm| {
         // Note: bench_cpython is both compiling and executing the code.
         // As such we compile the code in the benchmark loop as well.
         b.iter(|| {
@@ -36,16 +38,12 @@ fn bench_rustpy_code(b: &mut Bencher, name: &str, source: &str) {
     })
 }
 
-pub fn benchmark_file_execution(
-    group: &mut BenchmarkGroup<WallTime>,
-    name: &str,
-    contents: &String,
-) {
+pub fn benchmark_file_execution(group: &mut BenchmarkGroup<WallTime>, name: &str, contents: &str) {
     group.bench_function(BenchmarkId::new(name, "cpython"), |b| {
-        bench_cpython_code(b, &contents)
+        bench_cpython_code(b, contents)
     });
     group.bench_function(BenchmarkId::new(name, "rustpython"), |b| {
-        bench_rustpy_code(b, name, &contents)
+        bench_rustpy_code(b, name, contents)
     });
 }
 
@@ -55,44 +53,20 @@ pub fn benchmark_file_parsing(group: &mut BenchmarkGroup<WallTime>, name: &str,
         b.iter(|| ast::Suite::parse(contents, name).unwrap())
     });
     group.bench_function(BenchmarkId::new("cpython", name), |b| {
-        let gil = cpython::Python::acquire_gil();
-        let py = gil.python();
-
-        let code = std::ffi::CString::new(contents).unwrap();
-        let fname = cpython::PyString::new(py, name);
-
-        b.iter(|| parse_program_cpython(py, &code, &fname))
+        pyo3::Python::with_gil(|py| {
+            let builtins =
+                pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins");
+            let compile = builtins.getattr("compile").expect("no compile in builtins");
+            b.iter(|| {
+                let x = compile
+                    .call1((contents, name, "exec"))
+                    .expect("Failed to parse code");
+                black_box(x);
+            })
+        })
     });
 }
 
-fn parse_program_cpython(
-    py: cpython::Python<'_>,
-    code: &std::ffi::CStr,
-    fname: &cpython::PyString,
-) {
-    extern "C" {
-        fn PyArena_New() -> *mut python3_sys::PyArena;
-        fn PyArena_Free(arena: *mut python3_sys::PyArena);
-    }
-    use cpython::PythonObject;
-    let fname = fname.as_object();
-    unsafe {
-        let arena = PyArena_New();
-        assert!(!arena.is_null());
-        let ret = python3_sys::PyParser_ASTFromStringObject(
-            code.as_ptr() as _,
-            fname.as_ptr(),
-            python3_sys::Py_file_input,
-            std::ptr::null_mut(),
-            arena,
-        );
-        if ret.is_null() {
-            cpython::PyErr::fetch(py).print(py);
-        }
-        PyArena_Free(arena);
-    }
-}
-
 pub fn benchmark_pystone(group: &mut BenchmarkGroup<WallTime>, contents: String) {
     // Default is 50_000. This takes a while, so reduce it to 30k.
     for idx in (10_000..=30_000).step_by(10_000) {
diff --git a/benches/microbenchmarks.rs b/benches/microbenchmarks.rs
index c30d86722af..befdc63fd1a 100644
--- a/benches/microbenchmarks.rs
+++ b/benches/microbenchmarks.rs
@@ -5,7 +5,7 @@ use criterion::{
 use rustpython_compiler::Mode;
 use rustpython_vm::{AsObject, Interpreter, PyResult, Settings};
 use std::{
-    ffi, fs, io,
+    fs, io,
     path::{Path, PathBuf},
 };
 
@@ -36,95 +36,68 @@ pub struct MicroBenchmark {
 }
 
 fn bench_cpython_code(group: &mut BenchmarkGroup<WallTime>, bench: &MicroBenchmark) {
-    let gil = cpython::Python::acquire_gil();
-    let py = gil.python();
-
-    let setup_code = ffi::CString::new(&*bench.setup).unwrap();
-    let setup_name = ffi::CString::new(format!("{}_setup", bench.name)).unwrap();
-    let setup_code = cpy_compile_code(py, &setup_code, &setup_name).unwrap();
-
-    let code = ffi::CString::new(&*bench.code).unwrap();
-    let name = ffi::CString::new(&*bench.name).unwrap();
-    let code = cpy_compile_code(py, &code, &name).unwrap();
-
-    let bench_func = |(globals, locals): &mut (cpython::PyDict, cpython::PyDict)| {
-        let res = cpy_run_code(py, &code, globals, locals);
-        if let Err(e) = res {
-            e.print(py);
-            panic!("Error running microbenchmark")
-        }
-    };
-
-    let bench_setup = |iterations| {
-        let globals = cpython::PyDict::new(py);
-        // setup the __builtins__ attribute - no other way to do this (other than manually) as far
-        // as I can tell
-        let _ = py.run("", Some(&globals), None);
-        let locals = cpython::PyDict::new(py);
-        if let Some(idx) = iterations {
-            globals.set_item(py, "ITERATIONS", idx).unwrap();
-        }
+    pyo3::Python::with_gil(|py| {
+        let setup_name = format!("{}_setup", bench.name);
+        let setup_code = cpy_compile_code(py, &bench.setup, &setup_name).unwrap();
+
+        let code = cpy_compile_code(py, &bench.code, &bench.name).unwrap();
+
+        // Grab the exec function in advance so we don't have lookups in the hot code
+        let builtins =
+            pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins");
+        let exec = builtins.getattr("exec").expect("no exec in builtins");
+
+        let bench_func = |(globals, locals): &mut (&pyo3::types::PyDict, &pyo3::types::PyDict)| {
+            let res = exec.call((code, &*globals, &*locals), None);
+            if let Err(e) = res {
+                e.print(py);
+                panic!("Error running microbenchmark")
+            }
+        };
 
-        let res = cpy_run_code(py, &setup_code, &globals, &locals);
-        if let Err(e) = res {
-            e.print(py);
-            panic!("Error running microbenchmark setup code")
-        }
-        (globals, locals)
-    };
-
-    if bench.iterate {
-        for idx in (100..=1_000).step_by(200) {
-            group.throughput(Throughput::Elements(idx as u64));
-            group.bench_with_input(BenchmarkId::new("cpython", &bench.name), &idx, |b, idx| {
-                b.iter_batched_ref(
-                    || bench_setup(Some(*idx)),
-                    bench_func,
-                    BatchSize::LargeInput,
-                );
-            });
-        }
-    } else {
-        group.bench_function(BenchmarkId::new("cpython", &bench.name), move |b| {
-            b.iter_batched_ref(|| bench_setup(None), bench_func, BatchSize::LargeInput);
-        });
-    }
-}
+        let bench_setup = |iterations| {
+            let globals = pyo3::types::PyDict::new(py);
+            let locals = pyo3::types::PyDict::new(py);
+            if let Some(idx) = iterations {
+                globals.set_item("ITERATIONS", idx).unwrap();
+            }
 
-unsafe fn cpy_res(
-    py: cpython::Python<'_>,
-    x: *mut python3_sys::PyObject,
-) -> cpython::PyResult<cpython::PyObject> {
-    cpython::PyObject::from_owned_ptr_opt(py, x).ok_or_else(|| cpython::PyErr::fetch(py))
-}
+            let res = exec.call((setup_code, &globals, &locals), None);
+            if let Err(e) = res {
+                e.print(py);
+                panic!("Error running microbenchmark setup code")
+            }
+            (globals, locals)
+        };
 
-fn cpy_compile_code(
-    py: cpython::Python<'_>,
-    s: &ffi::CStr,
-    fname: &ffi::CStr,
-) -> cpython::PyResult<cpython::PyObject> {
-    unsafe {
-        let res =
-            python3_sys::Py_CompileString(s.as_ptr(), fname.as_ptr(), python3_sys::Py_file_input);
-        cpy_res(py, res)
-    }
+        if bench.iterate {
+            for idx in (100..=1_000).step_by(200) {
+                group.throughput(Throughput::Elements(idx as u64));
+                group.bench_with_input(BenchmarkId::new("cpython", &bench.name), &idx, |b, idx| {
+                    b.iter_batched_ref(
+                        || bench_setup(Some(*idx)),
+                        bench_func,
+                        BatchSize::LargeInput,
+                    );
+                });
+            }
+        } else {
+            group.bench_function(BenchmarkId::new("cpython", &bench.name), move |b| {
+                b.iter_batched_ref(|| bench_setup(None), bench_func, BatchSize::LargeInput);
+            });
+        }
+    })
 }
 
-fn cpy_run_code(
-    py: cpython::Python<'_>,
-    code: &cpython::PyObject,
-    locals: &cpython::PyDict,
-    globals: &cpython::PyDict,
-) -> cpython::PyResult<cpython::PyObject> {
-    use cpython::PythonObject;
-    unsafe {
-        let res = python3_sys::PyEval_EvalCode(
-            code.as_ptr(),
-            locals.as_object().as_ptr(),
-            globals.as_object().as_ptr(),
-        );
-        cpy_res(py, res)
-    }
+fn cpy_compile_code<'a>(
+    py: pyo3::Python<'a>,
+    code: &str,
+    name: &str,
+) -> pyo3::PyResult<&'a pyo3::types::PyCode> {
+    let builtins =
+        pyo3::types::PyModule::import(py, "builtins").expect("Failed to import builtins");
+    let compile = builtins.getattr("compile").expect("no compile in builtins");
+    compile.call1((code, name, "exec"))?.extract()
 }
 
 fn bench_rustpy_code(group: &mut BenchmarkGroup<WallTime>, bench: &MicroBenchmark) {