From 49597034459cdb4cfdf6e08f7c5ce12235ae8726 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 30 Mar 2026 02:01:04 +0900 Subject: [PATCH 1/4] Add CFG block splitting, jump threading, backward jump normalization, genexpr StopIteration wrapper - split_blocks_at_jumps: split blocks at branch points so each has one exit - jump_threading: thread jumps through single-jump blocks (flowgraph.c jump_thread) - Backward conditional jump normalization: invert and create NOT_TAKEN+JUMP block - Follow empty blocks in jump-to-return optimization (next_nonempty_block) - Add PEP 479 StopIteration handler to compile_comprehension for generators --- crates/codegen/src/compile.rs | 35 +++ crates/codegen/src/ir.rs | 216 ++++++++++++++++-- ...pile__tests__nested_double_async_with.snap | 45 ++-- 3 files changed, 251 insertions(+), 45 deletions(-) diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index bdc7fb7f1e..0cda77f983 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -8407,6 +8407,24 @@ impl Compiler { let arg0 = self.varname(".0")?; let return_none = init_collection.is_none(); + + // PEP 479: Wrap generator/coroutine body with StopIteration handler + let is_gen_scope = self.current_symbol_table().is_generator || is_async; + let stop_iteration_block = if is_gen_scope { + let handler_block = self.new_block(); + emit!( + self, + PseudoInstruction::SetupCleanup { + delta: handler_block + } + ); + self.set_no_location(); + self.push_fblock(FBlockType::StopIteration, handler_block, handler_block)?; + Some(handler_block) + } else { + None + }; + // Create empty object of proper type: if let Some(init_collection) = init_collection { self._emit(init_collection, OpArg::new(0), BlockIdx::NULL) @@ -8496,6 +8514,23 @@ impl Compiler { self.emit_return_value(); + // Close StopIteration handler and emit handler code + if let Some(handler_block) = stop_iteration_block { + emit!(self, PseudoInstruction::PopBlock); + self.set_no_location(); + self.pop_fblock(FBlockType::StopIteration); + self.switch_to_block(handler_block); + emit!( + self, + Instruction::CallIntrinsic1 { + func: oparg::IntrinsicFunction1::StopIterationError + } + ); + self.set_no_location(); + emit!(self, Instruction::Reraise { depth: 1u32 }); + self.set_no_location(); + } + let code = self.exit_scope(); self.ctx = prev_ctx; diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 52aa9b0c3e..84ac89ce00 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -210,8 +210,14 @@ impl CodeInfo { self.peephole_optimize(); // Phase 1: _PyCfg_OptimizeCodeUnit (flowgraph.c) + // Split blocks so each block has at most one branch as its last instruction + split_blocks_at_jumps(&mut self.blocks); mark_except_handlers(&mut self.blocks); label_exception_targets(&mut self.blocks); + // optimize_cfg: jump threading (before push_cold_blocks_to_end) + jump_threading(&mut self.blocks); + self.eliminate_unreachable_blocks(); + self.remove_nops(); // TODO: insert_superinstructions disabled pending StoreFastLoadFast VM fix push_cold_blocks_to_end(&mut self.blocks); @@ -2169,6 +2175,84 @@ fn push_cold_blocks_to_end(blocks: &mut Vec) { } } +/// Split blocks at branch points so each block has at most one branch +/// (conditional/unconditional jump) as its last instruction. +/// This matches CPython's CFG structure where each basic block has one exit. +fn split_blocks_at_jumps(blocks: &mut Vec) { + let mut bi = 0; + while bi < blocks.len() { + // Find the first jump/branch instruction in the block + let split_at = { + let block = &blocks[bi]; + let mut found = None; + for (i, ins) in block.instructions.iter().enumerate() { + if is_conditional_jump(&ins.instr) + || ins.instr.is_unconditional_jump() + || ins.instr.is_scope_exit() + { + if i + 1 < block.instructions.len() { + found = Some(i + 1); + } + break; + } + } + found + }; + if let Some(pos) = split_at { + let new_block_idx = BlockIdx(blocks.len() as u32); + let tail: Vec = blocks[bi].instructions.drain(pos..).collect(); + let old_next = blocks[bi].next; + let cold = blocks[bi].cold; + blocks[bi].next = new_block_idx; + blocks.push(Block { + instructions: tail, + next: old_next, + cold, + ..Block::default() + }); + // Don't increment bi - re-check current block (it might still have issues) + } else { + bi += 1; + } + } +} + +/// Jump threading: when a block's last jump targets a block whose first +/// instruction is an unconditional jump, redirect to the final target. +/// flowgraph.c optimize_basic_block + jump_thread +fn jump_threading(blocks: &mut [Block]) { + let mut changed = true; + while changed { + changed = false; + for bi in 0..blocks.len() { + let last_idx = match blocks[bi].instructions.len().checked_sub(1) { + Some(i) => i, + None => continue, + }; + let ins = &blocks[bi].instructions[last_idx]; + let target = ins.target; + if target == BlockIdx::NULL { + continue; + } + if !ins.instr.is_unconditional_jump() && !is_conditional_jump(&ins.instr) { + continue; + } + // Check if target block's first instruction is an unconditional jump + let target_block = &blocks[target.idx()]; + if let Some(target_ins) = target_block.instructions.first() { + if target_ins.instr.is_unconditional_jump() + && target_ins.target != BlockIdx::NULL + && target_ins.target != target + { + let final_target = target_ins.target; + blocks[bi].instructions[last_idx].target = final_target; + changed = true; + } + } + } + } +} + fn is_conditional_jump(instr: &AnyInstruction) -> bool { matches!( instr.real(), @@ -2181,8 +2265,31 @@ fn is_conditional_jump(instr: &AnyInstruction) -> bool { ) } +/// Invert a conditional jump opcode. +fn reversed_conditional(instr: &AnyInstruction) -> Option { + Some(match instr.real()? { + Instruction::PopJumpIfFalse { .. } => Instruction::PopJumpIfTrue { + delta: Arg::marker(), + } + .into(), + Instruction::PopJumpIfTrue { .. } => Instruction::PopJumpIfFalse { + delta: Arg::marker(), + } + .into(), + Instruction::PopJumpIfNone { .. } => Instruction::PopJumpIfNotNone { + delta: Arg::marker(), + } + .into(), + Instruction::PopJumpIfNotNone { .. } => Instruction::PopJumpIfNone { + delta: Arg::marker(), + } + .into(), + _ => return None, + }) +} + /// flowgraph.c normalize_jumps + remove_redundant_jumps -fn normalize_jumps(blocks: &mut [Block]) { +fn normalize_jumps(blocks: &mut Vec) { let mut visit_order = Vec::new(); let mut visited = vec![false; blocks.len()]; let mut current = BlockIdx(0); @@ -2194,7 +2301,8 @@ fn normalize_jumps(blocks: &mut [Block]) { visited.fill(false); - for &block_idx in &visit_order { + for vi in 0..visit_order.len() { + let block_idx = visit_order[vi]; let idx = block_idx.idx(); visited[idx] = true; @@ -2213,32 +2321,89 @@ fn normalize_jumps(blocks: &mut [Block]) { } } - // Insert NOT_TAKEN after forward conditional jumps - let mut insert_positions: Vec<(usize, InstructionInfo)> = Vec::new(); - for (i, ins) in blocks[idx].instructions.iter().enumerate() { - if is_conditional_jump(&ins.instr) - && ins.target != BlockIdx::NULL - && !visited[ins.target.idx()] - { - insert_positions.push(( - i + 1, - InstructionInfo { + // Normalize conditional jumps: forward gets NOT_TAKEN, backward gets inverted + let last = blocks[idx].instructions.last(); + if let Some(last_ins) = last { + if is_conditional_jump(&last_ins.instr) && last_ins.target != BlockIdx::NULL { + let target = last_ins.target; + let is_forward = !visited[target.idx()]; + + if is_forward { + // Insert NOT_TAKEN after forward conditional jump + let not_taken = InstructionInfo { instr: Instruction::NotTaken.into(), arg: OpArg::new(0), target: BlockIdx::NULL, - location: ins.location, - end_location: ins.end_location, - except_handler: ins.except_handler, + location: last_ins.location, + end_location: last_ins.end_location, + except_handler: last_ins.except_handler, lineno_override: None, cache_entries: 0, - }, - )); + }; + blocks[idx].instructions.push(not_taken); + } else { + // Backward conditional jump: invert and create new block + // Transform: `cond_jump T` (backward) + // Into: `reversed_cond_jump b_next` + new block [NOT_TAKEN, JUMP T] + let loc = last_ins.location; + let end_loc = last_ins.end_location; + let exc_handler = last_ins.except_handler; + + if let Some(reversed) = reversed_conditional(&last_ins.instr) { + let old_next = blocks[idx].next; + let is_cold = blocks[idx].cold; + + // Create new block with NOT_TAKEN + JUMP to original backward target + let new_block_idx = BlockIdx(blocks.len() as u32); + let mut new_block = Block { + cold: is_cold, + ..Block::default() + }; + new_block.instructions.push(InstructionInfo { + instr: Instruction::NotTaken.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location: loc, + end_location: end_loc, + except_handler: exc_handler, + lineno_override: None, + cache_entries: 0, + }); + new_block.instructions.push(InstructionInfo { + instr: PseudoInstruction::Jump { delta: Arg::marker() }.into(), + arg: OpArg::new(0), + target, + location: loc, + end_location: end_loc, + except_handler: exc_handler, + lineno_override: None, + cache_entries: 0, + }); + new_block.next = old_next; + + // Update the conditional jump: invert opcode, target = old next block + let last_mut = blocks[idx].instructions.last_mut().unwrap(); + last_mut.instr = reversed; + last_mut.target = old_next; + + // Splice new block between current and old next + blocks[idx].next = new_block_idx; + blocks.push(new_block); + + // Extend visited array and update visit order + visited.push(true); + } + } } } + } - for (pos, info) in insert_positions.into_iter().rev() { - blocks[idx].instructions.insert(pos, info); - } + // Rebuild visit_order since backward normalization may have added new blocks + let mut visit_order = Vec::new(); + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + visit_order.push(current); + current = blocks[current.idx()].next; } // Replace JUMP → LOAD_CONST + RETURN_VALUE with inline return. @@ -2250,8 +2415,15 @@ fn normalize_jumps(blocks: &mut [Block]) { if !ins.instr.is_unconditional_jump() || ins.target == BlockIdx::NULL { continue; } - let target_block = &blocks[ins.target.idx()]; - // Target must be exactly LOAD_CONST + RETURN_VALUE (2 instructions) + // Follow through empty blocks (next_nonempty_block) + let mut target_idx = ins.target.idx(); + while blocks[target_idx].instructions.is_empty() + && blocks[target_idx].next != BlockIdx::NULL + { + target_idx = blocks[target_idx].next.idx(); + } + let target_block = &blocks[target_idx]; + // Target must start with LOAD_CONST + RETURN_VALUE if target_block.instructions.len() >= 2 { let t0 = &target_block.instructions[0]; let t1 = &target_block.instructions[1]; diff --git a/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap index 88f81321cd..02af6cad00 100644 --- a/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap +++ b/crates/codegen/src/snapshots/rustpython_codegen__compile__tests__nested_double_async_with.snap @@ -1,6 +1,5 @@ --- source: crates/codegen/src/compile.rs -assertion_line: 9962 expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')):\n with self.subTest(type=type(stop_exc)):\n try:\n async with egg():\n raise stop_exc\n except Exception as ex:\n self.assertIs(ex, stop_exc)\n else:\n self.fail(f'{stop_exc} was suppressed')\n\")" --- 1 0 RESUME (0) @@ -24,7 +23,7 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter 16 CACHE 17 CACHE 18 LOAD_CONST ("ham") - >> 19 CALL (1) + 19 CALL (1) 20 CACHE 21 CACHE 22 CACHE @@ -203,7 +202,7 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter 186 CACHE 187 CACHE 188 CACHE - 189 CACHE + >> 189 CACHE 190 CACHE 191 CACHE 192 CACHE @@ -221,34 +220,34 @@ expression: "compile_exec(\"\\\nasync def test():\n for stop_exc in (StopIter 3 204 LOAD_CONST (None) 205 LOAD_CONST (None) - 206 LOAD_CONST (None) + >> 206 LOAD_CONST (None) 207 CALL (3) 208 CACHE - >> 209 CACHE + 209 CACHE 210 CACHE 211 POP_TOP - 212 JUMP_FORWARD (19) - 213 PUSH_EXC_INFO - 214 WITH_EXCEPT_START - 215 TO_BOOL - 216 CACHE + 212 JUMP_BACKWARD (189) + 213 CACHE + 214 PUSH_EXC_INFO + 215 WITH_EXCEPT_START + 216 TO_BOOL 217 CACHE 218 CACHE - 219 POP_JUMP_IF_TRUE (2) - 220 CACHE - 221 NOT_TAKEN - 222 RERAISE (2) - 223 POP_TOP - 224 POP_EXCEPT - 225 POP_TOP + 219 CACHE + 220 POP_JUMP_IF_TRUE (2) + 221 CACHE + 222 NOT_TAKEN + 223 RERAISE (2) + 224 POP_TOP + 225 POP_EXCEPT 226 POP_TOP 227 POP_TOP - 228 JUMP_FORWARD (3) - 229 COPY (3) - 230 POP_EXCEPT - 231 RERAISE (1) - 232 JUMP_BACKWARD (209) - 233 CACHE + 228 POP_TOP + 229 JUMP_BACKWARD (206) + 230 CACHE + 231 COPY (3) + 232 POP_EXCEPT + 233 RERAISE (1) 2 234 CALL_INTRINSIC_1 (StopIterationError) 235 RERAISE (1) From 385b719876ad0b81fa77d3ae4d7464d125e117eb Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 30 Mar 2026 09:39:53 +0900 Subject: [PATCH 2/4] Add ConstantData::Slice and constant slice folding - Add Slice variant to ConstantData and BorrowedConstant - Fold constant slices (x[:3], x[1:4]) into LOAD_CONST(slice(...)) - Marshal serialization/deserialization for Slice type - Box::leak in borrow_obj_constant for PySlice roundtrip --- crates/codegen/src/compile.rs | 45 ++++++++++++++++++++++++++++ crates/compiler-core/src/bytecode.rs | 18 +++++++++++ crates/compiler-core/src/marshal.rs | 13 ++++++++ crates/vm/src/builtins/code.rs | 26 ++++++++++++++++ 4 files changed, 102 insertions(+) diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 0cda77f983..2fa38d76e6 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -7574,6 +7574,10 @@ impl Compiler { ast::Expr::Slice(ast::ExprSlice { lower, upper, step, .. }) => { + // Try constant slice folding first + if self.try_fold_constant_slice(lower.as_deref(), upper.as_deref(), step.as_deref()) { + return Ok(()); + } let mut compile_bound = |bound: Option<&ast::Expr>| match bound { Some(exp) => self.compile_expression(exp), None => { @@ -8984,6 +8988,47 @@ impl Compiler { self.emit_arg(idx, |consti| Instruction::LoadConst { consti }) } + /// Fold constant slice: if all parts are compile-time constants, emit LOAD_CONST(slice). + fn try_fold_constant_slice( + &mut self, + lower: Option<&ast::Expr>, + upper: Option<&ast::Expr>, + step: Option<&ast::Expr>, + ) -> bool { + fn to_const(expr: Option<&ast::Expr>) -> Option { + match expr { + None | Some(ast::Expr::NoneLiteral(_)) => Some(ConstantData::None), + Some(ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. })) => { + match value { + ast::Number::Int(i) => Some(ConstantData::Integer { + value: i.as_i64()?.into(), + }), + ast::Number::Float(f) => Some(ConstantData::Float { value: *f }), + ast::Number::Complex { real, imag } => Some(ConstantData::Complex { + value: num_complex::Complex64::new(*real, *imag), + }), + } + } + Some(ast::Expr::BooleanLiteral(ast::ExprBooleanLiteral { value, .. })) => { + Some(ConstantData::Boolean { value: *value }) + } + // Only match Constant_kind nodes (no UnaryOp) + _ => None, + } + } + + let (Some(start), Some(stop), Some(step_val)) = + (to_const(lower), to_const(upper), to_const(step)) + else { + return false; + }; + + self.emit_load_const(ConstantData::Slice { + elements: Box::new([start, stop, step_val]), + }); + true + } + fn emit_return_const(&mut self, constant: ConstantData) { self.emit_load_const(constant); emit!(self, Instruction::ReturnValue) diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 4e35408571..fd6481edd4 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -292,6 +292,7 @@ impl Constant for ConstantData { Self::Bytes { value } => Bytes { value }, Self::Code { code } => Code { code }, Self::Tuple { elements } => Tuple { elements }, + Self::Slice { elements } => Slice { elements }, Self::None => None, Self::Ellipsis => Ellipsis, } @@ -857,6 +858,8 @@ pub enum ConstantData { Str { value: Wtf8Buf }, Bytes { value: Vec }, Code { code: Box }, + /// Constant slice(start, stop, step) + Slice { elements: Box<[ConstantData; 3]> }, None, Ellipsis, } @@ -878,6 +881,7 @@ impl PartialEq for ConstantData { (Bytes { value: a }, Bytes { value: b }) => a == b, (Code { code: a }, Code { code: b }) => core::ptr::eq(a.as_ref(), b.as_ref()), (Tuple { elements: a }, Tuple { elements: b }) => a == b, + (Slice { elements: a }, Slice { elements: b }) => a == b, (None, None) => true, (Ellipsis, Ellipsis) => true, _ => false, @@ -904,6 +908,7 @@ impl hash::Hash for ConstantData { Bytes { value } => value.hash(state), Code { code } => core::ptr::hash(code.as_ref(), state), Tuple { elements } => elements.hash(state), + Slice { elements } => elements.hash(state), None => {} Ellipsis => {} } @@ -920,6 +925,7 @@ pub enum BorrowedConstant<'a, C: Constant> { Bytes { value: &'a [u8] }, Code { code: &'a CodeObject }, Tuple { elements: &'a [C] }, + Slice { elements: &'a [C; 3] }, None, Ellipsis, } @@ -957,6 +963,15 @@ impl BorrowedConstant<'_, C> { } write!(f, ")") } + BorrowedConstant::Slice { elements } => { + write!(f, "slice(")?; + elements[0].borrow_constant().fmt_display(f)?; + write!(f, ", ")?; + elements[1].borrow_constant().fmt_display(f)?; + write!(f, ", ")?; + elements[2].borrow_constant().fmt_display(f)?; + write!(f, ")") + } BorrowedConstant::None => write!(f, "None"), BorrowedConstant::Ellipsis => write!(f, "..."), } @@ -987,6 +1002,9 @@ impl BorrowedConstant<'_, C> { .map(|c| c.borrow_constant().to_owned()) .collect(), }, + BorrowedConstant::Slice { elements } => Slice { + elements: Box::new(elements.each_ref().map(|c| c.borrow_constant().to_owned())), + }, BorrowedConstant::None => None, BorrowedConstant::Ellipsis => Ellipsis, } diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs index 0031fba29e..53b6370408 100644 --- a/crates/compiler-core/src/marshal.rs +++ b/crates/compiler-core/src/marshal.rs @@ -435,6 +435,16 @@ impl MarshalBag for Bag { self.make_tuple(elements) } + fn make_slice( + &self, + start: Self::Value, + stop: Self::Value, + step: Self::Value, + ) -> Result { + let elements = [start, stop, step]; + Ok(self.make_constant::(BorrowedConstant::Slice { elements: &elements })) + } + fn make_code( &self, code: CodeObject<::Constant>, @@ -697,6 +707,9 @@ impl<'a, C: Constant> From> for DumpableValue<'a, C> { BorrowedConstant::Bytes { value } => Self::Bytes(value), BorrowedConstant::Code { code } => Self::Code(code), BorrowedConstant::Tuple { elements } => Self::Tuple(elements), + BorrowedConstant::Slice { elements } => { + Self::Slice(&elements[0], &elements[1], &elements[2]) + } BorrowedConstant::None => Self::None, BorrowedConstant::Ellipsis => Self::Ellipsis, } diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index 9d1083b106..c521806082 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -232,6 +232,16 @@ fn borrow_obj_constant(obj: &PyObject) -> BorrowedConstant<'_, Literal> { } super::singletons::PyNone => BorrowedConstant::None, super::slice::PyEllipsis => BorrowedConstant::Ellipsis, + ref s @ super::slice::PySlice => { + // Constant pool slices always store Some() for start/step (even for None). + // Box::leak the array so it outlives the borrow. Leak is acceptable since + // constant pool objects live for the program's lifetime. + let start = s.start.clone().unwrap(); + let stop = s.stop.clone(); + let step = s.step.clone().unwrap(); + let arr = Box::leak(Box::new([Literal(start), Literal(stop), Literal(step)])); + BorrowedConstant::Slice { elements: arr } + } _ => panic!("unexpected payload for constant python value"), }) } @@ -283,6 +293,22 @@ impl ConstantBag for PyObjBag<'_> { .collect(); ctx.new_tuple(elements).into() } + BorrowedConstant::Slice { elements } => { + let [start, stop, step] = elements; + let start_obj = self.make_constant(start.borrow_constant()).0; + let stop_obj = self.make_constant(stop.borrow_constant()).0; + let step_obj = self.make_constant(step.borrow_constant()).0; + // Store as PySlice with Some() for all fields (even None values) + // so borrow_obj_constant can reference them. + use crate::builtins::PySlice; + PySlice { + start: Some(start_obj), + stop: stop_obj, + step: Some(step_obj), + } + .into_ref(ctx) + .into() + } BorrowedConstant::None => ctx.none(), BorrowedConstant::Ellipsis => ctx.ellipsis.clone().into(), }; From f8927e11225c10453dbba486c38ead4ed15bbc2e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 30 Mar 2026 09:58:57 +0900 Subject: [PATCH 3/4] Add ConstantData::Frozenset variant (type only, folding deferred) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Frozenset to ConstantData, BorrowedConstant, and marshal support. Actual frozenset folding (BUILD_SET + CONTAINS_OP → LOAD_CONST frozenset) requires VirtualMachine for element hashing and is deferred. --- crates/compiler-core/src/bytecode.rs | 24 ++++++++++++++++++++++++ crates/compiler-core/src/marshal.rs | 6 ++++-- crates/vm/src/builtins/code.rs | 15 +++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index fd6481edd4..0e2f49f1f9 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -293,6 +293,7 @@ impl Constant for ConstantData { Self::Code { code } => Code { code }, Self::Tuple { elements } => Tuple { elements }, Self::Slice { elements } => Slice { elements }, + Self::Frozenset { elements } => Frozenset { elements }, Self::None => None, Self::Ellipsis => Ellipsis, } @@ -860,6 +861,7 @@ pub enum ConstantData { Code { code: Box }, /// Constant slice(start, stop, step) Slice { elements: Box<[ConstantData; 3]> }, + Frozenset { elements: Vec }, None, Ellipsis, } @@ -882,6 +884,7 @@ impl PartialEq for ConstantData { (Code { code: a }, Code { code: b }) => core::ptr::eq(a.as_ref(), b.as_ref()), (Tuple { elements: a }, Tuple { elements: b }) => a == b, (Slice { elements: a }, Slice { elements: b }) => a == b, + (Frozenset { elements: a }, Frozenset { elements: b }) => a == b, (None, None) => true, (Ellipsis, Ellipsis) => true, _ => false, @@ -909,6 +912,7 @@ impl hash::Hash for ConstantData { Code { code } => core::ptr::hash(code.as_ref(), state), Tuple { elements } => elements.hash(state), Slice { elements } => elements.hash(state), + Frozenset { elements } => elements.hash(state), None => {} Ellipsis => {} } @@ -926,6 +930,7 @@ pub enum BorrowedConstant<'a, C: Constant> { Code { code: &'a CodeObject }, Tuple { elements: &'a [C] }, Slice { elements: &'a [C; 3] }, + Frozenset { elements: &'a [C] }, None, Ellipsis, } @@ -972,6 +977,19 @@ impl BorrowedConstant<'_, C> { elements[2].borrow_constant().fmt_display(f)?; write!(f, ")") } + BorrowedConstant::Frozenset { elements } => { + write!(f, "frozenset({{")?; + let mut first = true; + for c in *elements { + if first { + first = false + } else { + write!(f, ", ")?; + } + c.borrow_constant().fmt_display(f)?; + } + write!(f, "}})") + } BorrowedConstant::None => write!(f, "None"), BorrowedConstant::Ellipsis => write!(f, "..."), } @@ -1005,6 +1023,12 @@ impl BorrowedConstant<'_, C> { BorrowedConstant::Slice { elements } => Slice { elements: Box::new(elements.each_ref().map(|c| c.borrow_constant().to_owned())), }, + BorrowedConstant::Frozenset { elements } => Frozenset { + elements: elements + .iter() + .map(|c| c.borrow_constant().to_owned()) + .collect(), + }, BorrowedConstant::None => None, BorrowedConstant::Ellipsis => Ellipsis, } diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs index 53b6370408..72d2334875 100644 --- a/crates/compiler-core/src/marshal.rs +++ b/crates/compiler-core/src/marshal.rs @@ -464,8 +464,9 @@ impl MarshalBag for Bag { Err(MarshalError::BadType) } - fn make_frozenset(&self, _: impl Iterator) -> Result { - Err(MarshalError::BadType) + fn make_frozenset(&self, it: impl Iterator) -> Result { + let elements: Vec = it.collect(); + Ok(self.make_constant::(BorrowedConstant::Frozenset { elements: &elements })) } fn make_dict( @@ -710,6 +711,7 @@ impl<'a, C: Constant> From> for DumpableValue<'a, C> { BorrowedConstant::Slice { elements } => { Self::Slice(&elements[0], &elements[1], &elements[2]) } + BorrowedConstant::Frozenset { elements } => Self::Frozenset(elements), BorrowedConstant::None => Self::None, BorrowedConstant::Ellipsis => Self::Ellipsis, } diff --git a/crates/vm/src/builtins/code.rs b/crates/vm/src/builtins/code.rs index c521806082..0ddbd9b851 100644 --- a/crates/vm/src/builtins/code.rs +++ b/crates/vm/src/builtins/code.rs @@ -242,6 +242,13 @@ fn borrow_obj_constant(obj: &PyObject) -> BorrowedConstant<'_, Literal> { let arr = Box::leak(Box::new([Literal(start), Literal(stop), Literal(step)])); BorrowedConstant::Slice { elements: arr } } + ref fs @ super::set::PyFrozenSet => { + // Box::leak the elements so they outlive the borrow. Leak is acceptable since + // constant pool objects live for the program's lifetime. + let elems: Vec = fs.elements().into_iter().map(Literal).collect(); + let elements = Box::leak(elems.into_boxed_slice()); + BorrowedConstant::Frozenset { elements } + } _ => panic!("unexpected payload for constant python value"), }) } @@ -309,6 +316,14 @@ impl ConstantBag for PyObjBag<'_> { .into_ref(ctx) .into() } + BorrowedConstant::Frozenset { elements: _ } => { + // Creating a frozenset requires VirtualMachine for element hashing. + // PyObjBag only has Context, so we cannot construct PyFrozenSet here. + // Frozenset constants from .pyc are handled by PyMarshalBag which has VM access. + unimplemented!( + "frozenset constant in PyObjBag::make_constant requires VirtualMachine" + ) + } BorrowedConstant::None => ctx.none(), BorrowedConstant::Ellipsis => ctx.ellipsis.clone().into(), }; From 42b24cbe40053f8b5763e448154b555fbd638fc7 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Mon, 30 Mar 2026 10:23:11 +0900 Subject: [PATCH 4/4] Add duplicate_exits_without_lineno (disabled) and Block: Clone Prepare infrastructure for exit block duplication optimization. Currently disabled pending stackdepth integration. --- crates/codegen/src/compile.rs | 24 +-- crates/codegen/src/ir.rs | 265 +++++++++++++++++++-------- crates/compiler-core/src/bytecode.rs | 40 +++- crates/compiler-core/src/marshal.rs | 12 +- 4 files changed, 237 insertions(+), 104 deletions(-) diff --git a/crates/codegen/src/compile.rs b/crates/codegen/src/compile.rs index 2fa38d76e6..e3f1d18737 100644 --- a/crates/codegen/src/compile.rs +++ b/crates/codegen/src/compile.rs @@ -7575,7 +7575,8 @@ impl Compiler { lower, upper, step, .. }) => { // Try constant slice folding first - if self.try_fold_constant_slice(lower.as_deref(), upper.as_deref(), step.as_deref()) { + if self.try_fold_constant_slice(lower.as_deref(), upper.as_deref(), step.as_deref()) + { return Ok(()); } let mut compile_bound = |bound: Option<&ast::Expr>| match bound { @@ -8998,17 +8999,16 @@ impl Compiler { fn to_const(expr: Option<&ast::Expr>) -> Option { match expr { None | Some(ast::Expr::NoneLiteral(_)) => Some(ConstantData::None), - Some(ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. })) => { - match value { - ast::Number::Int(i) => Some(ConstantData::Integer { - value: i.as_i64()?.into(), - }), - ast::Number::Float(f) => Some(ConstantData::Float { value: *f }), - ast::Number::Complex { real, imag } => Some(ConstantData::Complex { - value: num_complex::Complex64::new(*real, *imag), - }), - } - } + Some(ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, .. })) => match value + { + ast::Number::Int(i) => Some(ConstantData::Integer { + value: i.as_i64()?.into(), + }), + ast::Number::Float(f) => Some(ConstantData::Float { value: *f }), + ast::Number::Complex { real, imag } => Some(ConstantData::Complex { + value: num_complex::Complex64::new(*real, *imag), + }), + }, Some(ast::Expr::BooleanLiteral(ast::ExprBooleanLiteral { value, .. })) => { Some(ConstantData::Boolean { value: *value }) } diff --git a/crates/codegen/src/ir.rs b/crates/codegen/src/ir.rs index 84ac89ce00..63e31a5b1f 100644 --- a/crates/codegen/src/ir.rs +++ b/crates/codegen/src/ir.rs @@ -125,7 +125,7 @@ pub struct ExceptHandlerInfo { // spell-checker:ignore petgraph // TODO: look into using petgraph for handling blocks and stuff? it's heavier than this, but it // might enable more analysis/optimizations -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Block { pub instructions: Vec, pub next: BlockIdx, @@ -225,6 +225,8 @@ impl CodeInfo { normalize_jumps(&mut self.blocks); self.dce(); // re-run within-block DCE after normalize_jumps creates new instructions self.eliminate_unreachable_blocks(); + // TODO: duplicate_exits_without_lineno disabled pending stackdepth fix + // duplicate_exits_without_lineno(&mut self.blocks); duplicate_end_returns(&mut self.blocks); self.dce(); // truncate after terminal in blocks that got return duplicated self.eliminate_unreachable_blocks(); // remove now-unreachable last block @@ -2239,15 +2241,14 @@ fn jump_threading(blocks: &mut [Block]) { } // Check if target block's first instruction is an unconditional jump let target_block = &blocks[target.idx()]; - if let Some(target_ins) = target_block.instructions.first() { - if target_ins.instr.is_unconditional_jump() - && target_ins.target != BlockIdx::NULL - && target_ins.target != target - { - let final_target = target_ins.target; - blocks[bi].instructions[last_idx].target = final_target; - changed = true; - } + if let Some(target_ins) = target_block.instructions.first() + && target_ins.instr.is_unconditional_jump() + && target_ins.target != BlockIdx::NULL + && target_ins.target != target + { + let final_target = target_ins.target; + blocks[bi].instructions[last_idx].target = final_target; + changed = true; } } } @@ -2301,8 +2302,7 @@ fn normalize_jumps(blocks: &mut Vec) { visited.fill(false); - for vi in 0..visit_order.len() { - let block_idx = visit_order[vi]; + for &block_idx in &visit_order { let idx = block_idx.idx(); visited[idx] = true; @@ -2323,76 +2323,80 @@ fn normalize_jumps(blocks: &mut Vec) { // Normalize conditional jumps: forward gets NOT_TAKEN, backward gets inverted let last = blocks[idx].instructions.last(); - if let Some(last_ins) = last { - if is_conditional_jump(&last_ins.instr) && last_ins.target != BlockIdx::NULL { - let target = last_ins.target; - let is_forward = !visited[target.idx()]; - - if is_forward { - // Insert NOT_TAKEN after forward conditional jump - let not_taken = InstructionInfo { + if let Some(last_ins) = last + && is_conditional_jump(&last_ins.instr) + && last_ins.target != BlockIdx::NULL + { + let target = last_ins.target; + let is_forward = !visited[target.idx()]; + + if is_forward { + // Insert NOT_TAKEN after forward conditional jump + let not_taken = InstructionInfo { + instr: Instruction::NotTaken.into(), + arg: OpArg::new(0), + target: BlockIdx::NULL, + location: last_ins.location, + end_location: last_ins.end_location, + except_handler: last_ins.except_handler, + lineno_override: None, + cache_entries: 0, + }; + blocks[idx].instructions.push(not_taken); + } else { + // Backward conditional jump: invert and create new block + // Transform: `cond_jump T` (backward) + // Into: `reversed_cond_jump b_next` + new block [NOT_TAKEN, JUMP T] + let loc = last_ins.location; + let end_loc = last_ins.end_location; + let exc_handler = last_ins.except_handler; + + if let Some(reversed) = reversed_conditional(&last_ins.instr) { + let old_next = blocks[idx].next; + let is_cold = blocks[idx].cold; + + // Create new block with NOT_TAKEN + JUMP to original backward target + let new_block_idx = BlockIdx(blocks.len() as u32); + let mut new_block = Block { + cold: is_cold, + ..Block::default() + }; + new_block.instructions.push(InstructionInfo { instr: Instruction::NotTaken.into(), arg: OpArg::new(0), target: BlockIdx::NULL, - location: last_ins.location, - end_location: last_ins.end_location, - except_handler: last_ins.except_handler, + location: loc, + end_location: end_loc, + except_handler: exc_handler, lineno_override: None, cache_entries: 0, - }; - blocks[idx].instructions.push(not_taken); - } else { - // Backward conditional jump: invert and create new block - // Transform: `cond_jump T` (backward) - // Into: `reversed_cond_jump b_next` + new block [NOT_TAKEN, JUMP T] - let loc = last_ins.location; - let end_loc = last_ins.end_location; - let exc_handler = last_ins.except_handler; - - if let Some(reversed) = reversed_conditional(&last_ins.instr) { - let old_next = blocks[idx].next; - let is_cold = blocks[idx].cold; - - // Create new block with NOT_TAKEN + JUMP to original backward target - let new_block_idx = BlockIdx(blocks.len() as u32); - let mut new_block = Block { - cold: is_cold, - ..Block::default() - }; - new_block.instructions.push(InstructionInfo { - instr: Instruction::NotTaken.into(), - arg: OpArg::new(0), - target: BlockIdx::NULL, - location: loc, - end_location: end_loc, - except_handler: exc_handler, - lineno_override: None, - cache_entries: 0, - }); - new_block.instructions.push(InstructionInfo { - instr: PseudoInstruction::Jump { delta: Arg::marker() }.into(), - arg: OpArg::new(0), - target, - location: loc, - end_location: end_loc, - except_handler: exc_handler, - lineno_override: None, - cache_entries: 0, - }); - new_block.next = old_next; - - // Update the conditional jump: invert opcode, target = old next block - let last_mut = blocks[idx].instructions.last_mut().unwrap(); - last_mut.instr = reversed; - last_mut.target = old_next; - - // Splice new block between current and old next - blocks[idx].next = new_block_idx; - blocks.push(new_block); - - // Extend visited array and update visit order - visited.push(true); - } + }); + new_block.instructions.push(InstructionInfo { + instr: PseudoInstruction::Jump { + delta: Arg::marker(), + } + .into(), + arg: OpArg::new(0), + target, + location: loc, + end_location: end_loc, + except_handler: exc_handler, + lineno_override: None, + cache_entries: 0, + }); + new_block.next = old_next; + + // Update the conditional jump: invert opcode, target = old next block + let last_mut = blocks[idx].instructions.last_mut().unwrap(); + last_mut.instr = reversed; + last_mut.target = old_next; + + // Splice new block between current and old next + blocks[idx].next = new_block_idx; + blocks.push(new_block); + + // Extend visited array and update visit order + visited.push(true); } } } @@ -2496,10 +2500,111 @@ fn normalize_jumps(blocks: &mut Vec) { } } +#[allow(dead_code)] +fn is_simple_return_block(block: &Block) -> bool { + block.instructions.len() == 1 + && matches!( + block.instructions[0].instr.real(), + Some(Instruction::ReturnValue) + ) +} + +/// Follow chain of empty blocks to find first non-empty block. +fn next_nonempty_block(blocks: &[Block], mut idx: BlockIdx) -> BlockIdx { + while idx != BlockIdx::NULL + && blocks[idx.idx()].instructions.is_empty() + && blocks[idx.idx()].next != BlockIdx::NULL + { + idx = blocks[idx.idx()].next; + } + idx +} + +#[allow(dead_code)] +fn duplicate_exits_without_lineno(blocks: &mut Vec) { + // Count predecessors for each block + let mut predecessors = vec![0u32; blocks.len()]; + let mut current = BlockIdx(0); + while current != BlockIdx::NULL { + let block = &blocks[current.idx()]; + // Fall-through predecessor + let next = next_nonempty_block(blocks, block.next); + if next != BlockIdx::NULL { + let has_fallthrough = block + .instructions + .last() + .is_none_or(|ins| !ins.instr.is_scope_exit() && !ins.instr.is_unconditional_jump()); + if has_fallthrough { + predecessors[next.idx()] += 1; + } + } + // Jump target predecessor + for ins in &block.instructions { + if ins.target != BlockIdx::NULL { + let target = next_nonempty_block(blocks, ins.target); + if target != BlockIdx::NULL { + predecessors[target.idx()] += 1; + } + } + } + current = block.next; + } + + // For each block, if its last instruction jumps to an exit block without lineno + // that has >1 predecessor, copy that exit block. + current = BlockIdx(0); + while current != BlockIdx::NULL { + let block = &blocks[current.idx()]; + let last = match block.instructions.last() { + Some(ins) if ins.target != BlockIdx::NULL => ins, + _ => { + current = blocks[current.idx()].next; + continue; + } + }; + if !last.instr.is_unconditional_jump() && !is_conditional_jump(&last.instr) { + current = blocks[current.idx()].next; + continue; + } + + let target = next_nonempty_block(blocks, last.target); + if target == BlockIdx::NULL || !is_simple_return_block(&blocks[target.idx()]) { + current = blocks[current.idx()].next; + continue; + } + if predecessors[target.idx()] <= 1 { + current = blocks[current.idx()].next; + continue; + } + + // Copy the exit block + let new_idx = BlockIdx(blocks.len() as u32); + let mut new_block = blocks[target.idx()].clone(); + // Set location from the jump instruction + let jump_loc = blocks[current.idx()].instructions.last().unwrap().location; + let jump_end_loc = blocks[current.idx()] + .instructions + .last() + .unwrap() + .end_location; + if let Some(first) = new_block.instructions.first_mut() { + first.location = jump_loc; + first.end_location = jump_end_loc; + } + new_block.next = blocks[target.idx()].next; + blocks.push(new_block); + + // Update the jump target + let last_mut = blocks[current.idx()].instructions.last_mut().unwrap(); + last_mut.target = new_idx; + predecessors[target.idx()] -= 1; + + current = blocks[current.idx()].next; + } +} + /// Duplicate `LOAD_CONST None + RETURN_VALUE` for blocks that fall through -/// to the final return block. Matches CPython's behavior of ensuring every -/// code path that reaches the end of a function/module has its own explicit -/// return instruction. +/// to the final return block. fn duplicate_end_returns(blocks: &mut [Block]) { // Walk the block chain to find the last block let mut last_block = BlockIdx(0); diff --git a/crates/compiler-core/src/bytecode.rs b/crates/compiler-core/src/bytecode.rs index 0e2f49f1f9..a17fe7945d 100644 --- a/crates/compiler-core/src/bytecode.rs +++ b/crates/compiler-core/src/bytecode.rs @@ -851,17 +851,37 @@ impl CodeUnits { /// ``` #[derive(Debug, Clone)] pub enum ConstantData { - Tuple { elements: Vec }, - Integer { value: BigInt }, - Float { value: f64 }, - Complex { value: Complex64 }, - Boolean { value: bool }, - Str { value: Wtf8Buf }, - Bytes { value: Vec }, - Code { code: Box }, + Tuple { + elements: Vec, + }, + Integer { + value: BigInt, + }, + Float { + value: f64, + }, + Complex { + value: Complex64, + }, + Boolean { + value: bool, + }, + Str { + value: Wtf8Buf, + }, + Bytes { + value: Vec, + }, + Code { + code: Box, + }, /// Constant slice(start, stop, step) - Slice { elements: Box<[ConstantData; 3]> }, - Frozenset { elements: Vec }, + Slice { + elements: Box<[ConstantData; 3]>, + }, + Frozenset { + elements: Vec, + }, None, Ellipsis, } diff --git a/crates/compiler-core/src/marshal.rs b/crates/compiler-core/src/marshal.rs index 72d2334875..5594d0f4c4 100644 --- a/crates/compiler-core/src/marshal.rs +++ b/crates/compiler-core/src/marshal.rs @@ -442,7 +442,11 @@ impl MarshalBag for Bag { step: Self::Value, ) -> Result { let elements = [start, stop, step]; - Ok(self.make_constant::(BorrowedConstant::Slice { elements: &elements })) + Ok( + self.make_constant::(BorrowedConstant::Slice { + elements: &elements, + }), + ) } fn make_code( @@ -466,7 +470,11 @@ impl MarshalBag for Bag { fn make_frozenset(&self, it: impl Iterator) -> Result { let elements: Vec = it.collect(); - Ok(self.make_constant::(BorrowedConstant::Frozenset { elements: &elements })) + Ok( + self.make_constant::(BorrowedConstant::Frozenset { + elements: &elements, + }), + ) } fn make_dict(