diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index 111702eac6c..85816aabb7c 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -2,11 +2,11 @@ use super::{PyBytesRef, PyStrRef, PyTupleRef, PyType}; use crate::{ - AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine, + AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, builtins::PyStrInterned, bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag}, class::{PyClassImpl, StaticType}, - convert::ToPyObject, + convert::{ToPyException, ToPyObject}, frozen, function::OptionalArg, types::{Constructor, Representable}, @@ -336,6 +336,44 @@ impl PyCode { pub const fn new(code: CodeObject) -> Self { Self { code } } + pub fn from_pyc_path(path: &std::path::Path, vm: &VirtualMachine) -> PyResult> { + let name = match path.file_stem() { + Some(stem) => stem.display().to_string(), + None => "".to_owned(), + }; + let content = std::fs::read(path).map_err(|e| e.to_pyexception(vm))?; + Self::from_pyc( + &content, + Some(&name), + Some(&path.display().to_string()), + Some(""), + vm, + ) + } + pub fn from_pyc( + pyc_bytes: &[u8], + name: Option<&str>, + bytecode_path: Option<&str>, + source_path: Option<&str>, + vm: &VirtualMachine, + ) -> PyResult> { + if !crate::import::check_pyc_magic_number_bytes(pyc_bytes) { + return Err(vm.new_value_error("pyc bytes has wrong MAGIC")); + } + let bootstrap_external = vm.import("_frozen_importlib_external", 0)?; + let compile_bytecode = bootstrap_external.get_attr("_compile_bytecode", vm)?; + // 16 is the pyc header length + let Some((_, code_bytes)) = pyc_bytes.split_at_checked(16) else { + return Err(vm.new_value_error(format!( + "pyc_bytes header is broken. 16 bytes expected but {} bytes given.", + pyc_bytes.len() + ))); + }; + let code_bytes_obj = vm.ctx.new_bytes(code_bytes.to_vec()); + let compiled = + compile_bytecode.call((code_bytes_obj, name, bytecode_path, source_path), vm)?; + compiled.try_downcast(vm) + } } impl fmt::Debug for PyCode { diff --git a/crates/vm/src/import.rs b/crates/vm/src/import.rs index d67bf59b35f..80ccbe30def 100644 --- a/crates/vm/src/import.rs +++ b/crates/vm/src/import.rs @@ -8,6 +8,10 @@ use crate::{ vm::{VirtualMachine, resolve_frozen_alias, thread}, }; +pub(crate) fn check_pyc_magic_number_bytes(buf: &[u8]) -> bool { + buf.starts_with(&crate::version::PYC_MAGIC_NUMBER_BYTES[..2]) +} + pub(crate) fn init_importlib_base(vm: &mut VirtualMachine) -> PyResult { flame_guard!("init importlib"); diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index d3f18bf9f46..d2172a43a00 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -19,8 +19,8 @@ mod vm_ops; use crate::{ AsObject, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, builtins::{ - PyBaseExceptionRef, PyDictRef, PyInt, PyList, PyModule, PyStr, PyStrInterned, PyStrRef, - PyTypeRef, code::PyCode, pystr::AsPyStr, tuple::PyTuple, + PyBaseExceptionRef, PyDict, PyDictRef, PyInt, PyList, PyModule, PyStr, PyStrInterned, + PyStrRef, PyTypeRef, code::PyCode, pystr::AsPyStr, tuple::PyTuple, }, codecs::CodecsRegistry, common::{hash::HashSecret, lock::PyMutex, rc::PyRc}, @@ -460,6 +460,42 @@ impl VirtualMachine { self.signal_rx = Some(signal_rx); } + /// Execute Python bytecode (`.pyc`) from an in-memory buffer. + /// + /// When the RustPython CLI is available, `.pyc` files are normally executed by + /// invoking `rustpython .pyc`. This method provides an alternative for + /// environments where the binary is unavailable or file I/O is restricted + /// (e.g. WASM). + /// + /// ## Preparing a `.pyc` file + /// + /// First, compile a Python source file into bytecode: + /// + /// ```sh + /// # Generate a .pyc file + /// $ rustpython -m py_compile .py + /// ``` + /// + /// ## Running the bytecode + /// + /// Load the resulting `.pyc` file into memory and execute it using the VM: + /// + /// ```no_run + /// use rustpython_vm::Interpreter; + /// Interpreter::without_stdlib(Default::default()).enter(|vm| { + /// let bytes = std::fs::read("__pycache__/.rustpython-313.pyc").unwrap(); + /// let main_scope = vm.new_scope_with_main().unwrap(); + /// vm.run_pyc_bytes(&bytes, main_scope); + /// }); + /// ``` + pub fn run_pyc_bytes(&self, pyc_bytes: &[u8], scope: Scope) -> PyResult<()> { + let code = PyCode::from_pyc(pyc_bytes, Some(""), None, None, self)?; + self.with_simple_run("", |_module_dict| { + self.run_code_obj(code, scope)?; + Ok(()) + }) + } + pub fn run_code_obj(&self, code: PyRef, scope: Scope) -> PyResult { use crate::builtins::PyFunction; @@ -500,6 +536,52 @@ impl VirtualMachine { } } + /// Run `run` with main scope. + fn with_simple_run( + &self, + path: &str, + run: impl FnOnce(&Py) -> PyResult<()>, + ) -> PyResult<()> { + let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?; + let main_module = sys_modules.get_item(identifier!(self, __main__), self)?; + let module_dict = main_module.dict().expect("main module must have __dict__"); + + // Track whether we set __file__ (for cleanup) + let set_file_name = !module_dict.contains_key(identifier!(self, __file__), self); + if set_file_name { + module_dict.set_item( + identifier!(self, __file__), + self.ctx.new_str(path).into(), + self, + )?; + module_dict.set_item(identifier!(self, __cached__), self.ctx.none(), self)?; + } + + let result = run(&module_dict); + + self.flush_io(); + + // Cleanup __file__ and __cached__ after execution + if set_file_name { + let _ = module_dict.del_item(identifier!(self, __file__), self); + let _ = module_dict.del_item(identifier!(self, __cached__), self); + } + + result + } + + /// flush_io + /// + /// Flush stdout and stderr. Errors are silently ignored. + fn flush_io(&self) { + if let Ok(stdout) = self.sys_module.get_attr("stdout", self) { + let _ = self.call_method(&stdout, identifier!(self, flush).as_str(), ()); + } + if let Ok(stderr) = self.sys_module.get_attr("stderr", self) { + let _ = self.call_method(&stderr, identifier!(self, flush).as_str(), ()); + } + } + pub fn current_recursion_depth(&self) -> usize { self.recursion_depth.get() } diff --git a/crates/vm/src/vm/python_run.rs b/crates/vm/src/vm/python_run.rs index 31e8c7be45e..1c712279f8b 100644 --- a/crates/vm/src/vm/python_run.rs +++ b/crates/vm/src/vm/python_run.rs @@ -1,8 +1,8 @@ //! Python code execution functions. use crate::{ - PyResult, VirtualMachine, - builtins::{PyCode, PyDictRef}, + Py, PyResult, VirtualMachine, + builtins::{PyCode, PyDict}, compiler::{self}, scope::Scope, }; @@ -25,37 +25,14 @@ impl VirtualMachine { /// Execute a Python file with __main__ module setup. /// Sets __file__ and __cached__ before execution, removes them after. fn run_simple_file(&self, scope: Scope, path: &str) -> PyResult<()> { - let sys_modules = self.sys_module.get_attr(identifier!(self, modules), self)?; - let main_module = sys_modules.get_item(identifier!(self, __main__), self)?; - let module_dict = main_module.dict().expect("main module must have __dict__"); - - // Track whether we set __file__ (for cleanup) - let set_file_name = !module_dict.contains_key(identifier!(self, __file__), self); - if set_file_name { - module_dict.set_item( - identifier!(self, __file__), - self.ctx.new_str(path).into(), - self, - )?; - module_dict.set_item(identifier!(self, __cached__), self.ctx.none(), self)?; - } - - let result = self.run_simple_file_inner(&module_dict, scope, path); - - self.flush_io(); - - // Cleanup __file__ and __cached__ after execution - if set_file_name { - let _ = module_dict.del_item(identifier!(self, __file__), self); - let _ = module_dict.del_item(identifier!(self, __cached__), self); - } - - result + self.with_simple_run(path, |module_dict| { + self.run_simple_file_inner(module_dict, scope, path) + }) } fn run_simple_file_inner( &self, - module_dict: &PyDictRef, + module_dict: &Py, scope: Scope, path: &str, ) -> PyResult<()> { @@ -123,22 +100,10 @@ impl VirtualMachine { .map_err(|err| self.new_syntax_error(&err, Some(source)))?; self.run_code_obj(code_obj, scope) } - - /// flush_io - /// - /// Flush stdout and stderr. Errors are silently ignored. - fn flush_io(&self) { - if let Ok(stdout) = self.sys_module.get_attr("stdout", self) { - let _ = self.call_method(&stdout, identifier!(self, flush).as_str(), ()); - } - if let Ok(stderr) = self.sys_module.get_attr("stderr", self) { - let _ = self.call_method(&stderr, identifier!(self, flush).as_str(), ()); - } - } } fn set_main_loader( - module_dict: &PyDictRef, + module_dict: &Py, filename: &str, loader_name: &str, vm: &VirtualMachine, @@ -162,10 +127,10 @@ fn maybe_pyc_file(path: &str) -> bool { if path.ends_with(".pyc") { return true; } - maybe_pyc_file_with_magic(path, &crate::version::PYC_MAGIC_NUMBER_BYTES).unwrap_or(false) + maybe_pyc_file_with_magic(path).unwrap_or(false) } -fn maybe_pyc_file_with_magic(path: &str, magic_number: &[u8]) -> std::io::Result { +fn maybe_pyc_file_with_magic(path: &str) -> std::io::Result { let path_obj = std::path::Path::new(path); if !path_obj.is_file() { return Ok(false); @@ -175,12 +140,12 @@ fn maybe_pyc_file_with_magic(path: &str, magic_number: &[u8]) -> std::io::Result let mut buf = [0u8; 2]; use std::io::Read; - if file.read(&mut buf)? != 2 || magic_number.len() < 2 { + if file.read(&mut buf)? != 2 { return Ok(false); } // Read only two bytes of the magic. If the file was opened in // text mode, the bytes 3 and 4 of the magic (\r\n) might not // be read as they are on disk. - Ok(buf == magic_number[..2]) + Ok(crate::import::check_pyc_magic_number_bytes(&buf)) } diff --git a/crates/vm/src/vm/vm_new.rs b/crates/vm/src/vm/vm_new.rs index 4f25d6e2036..c74fb922a82 100644 --- a/crates/vm/src/vm/vm_new.rs +++ b/crates/vm/src/vm/vm_new.rs @@ -1,5 +1,5 @@ use crate::{ - AsObject, Py, PyObject, PyObjectRef, PyRef, + AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, builtins::{ PyBaseException, PyBaseExceptionRef, PyBytesRef, PyDictRef, PyModule, PyOSError, PyStrRef, PyType, PyTypeRef, @@ -62,6 +62,23 @@ impl VirtualMachine { Scope::with_builtins(None, self.ctx.new_dict(), self) } + pub fn new_scope_with_main(&self) -> PyResult { + let scope = self.new_scope_with_builtins(); + let main_module = self.new_module("__main__", scope.globals.clone(), None); + main_module + .dict() + .set_item("__annotations__", self.ctx.new_dict().into(), self) + .expect("Failed to initialize __main__.__annotations__"); + + self.sys_module.get_attr("modules", self)?.set_item( + "__main__", + main_module.into(), + self, + )?; + + Ok(scope) + } + pub fn new_function(&self, name: &'static str, f: F) -> PyRef where F: IntoPyNativeFn, diff --git a/src/lib.rs b/src/lib.rs index ad5894860d2..c6de6520de8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,21 +114,6 @@ pub fn run(init: impl FnOnce(&mut VirtualMachine) + 'static) -> ExitCode { rustpython_vm::common::os::exit_code(exitcode) } -fn setup_main_module(vm: &VirtualMachine) -> PyResult { - let scope = vm.new_scope_with_builtins(); - let main_module = vm.new_module("__main__", scope.globals.clone(), None); - main_module - .dict() - .set_item("__annotations__", vm.ctx.new_dict().into(), vm) - .expect("Failed to initialize __main__.__annotations__"); - - vm.sys_module - .get_attr("modules", vm)? - .set_item("__main__", main_module.into(), vm)?; - - Ok(scope) -} - fn get_pip(scope: Scope, vm: &VirtualMachine) -> PyResult<()> { let get_getpip = rustpython_vm::py_compile!( source = r#"\ @@ -221,7 +206,7 @@ fn run_rustpython(vm: &VirtualMachine, run_mode: RunMode) -> PyResult<()> { #[cfg(feature = "flame-it")] let main_guard = flame::start_guard("RustPython main"); - let scope = setup_main_module(vm)?; + let scope = vm.new_scope_with_main()?; // Import site first, before setting sys.path[0] // This matches CPython's behavior where site.removeduppaths() runs @@ -366,11 +351,11 @@ mod tests { fn test_run_script() { interpreter().enter(|vm| { vm.unwrap_pyresult((|| { - let scope = setup_main_module(vm)?; + let scope = vm.new_scope_with_main()?; // test file run vm.run_any_file(scope, "extra_tests/snippets/dir_main/__main__.py")?; - let scope = setup_main_module(vm)?; + let scope = vm.new_scope_with_main()?; // test module run (directory with __main__.py) run_file(vm, scope, "extra_tests/snippets/dir_main")?;