From 23d886f2d379b0bf90ffeeaed639a92606494515 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 20 Jan 2026 22:03:17 +0900 Subject: [PATCH 1/3] show_deps --- scripts/update_lib/__main__.py | 10 +++ scripts/update_lib/deps.py | 71 ++++++++++++++++ scripts/update_lib/show_deps.py | 78 ++++++++++++++++++ scripts/update_lib/tests/test_deps.py | 111 ++++++++++++++++++++++++++ 4 files changed, 270 insertions(+) create mode 100644 scripts/update_lib/show_deps.py diff --git a/scripts/update_lib/__main__.py b/scripts/update_lib/__main__.py index f20f41c84e0..2a4c03919fb 100644 --- a/scripts/update_lib/__main__.py +++ b/scripts/update_lib/__main__.py @@ -49,6 +49,11 @@ def main(argv: list[str] | None = None) -> int: help="Copy library file/directory from CPython (delete existing first)", add_help=False, ) + subparsers.add_parser( + "deps", + help="Show dependency information for a module", + add_help=False, + ) args, remaining = parser.parse_known_args(argv) @@ -77,6 +82,11 @@ def main(argv: list[str] | None = None) -> int: return auto_mark_main(remaining) + if args.command == "deps": + from update_lib.show_deps import main as show_deps_main + + return show_deps_main(remaining) + return 0 diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index a0b6d121f59..dec64494efc 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -258,6 +258,77 @@ def parse_test_imports(content: str) -> set[str]: return imports +def parse_lib_imports(content: str) -> set[str]: + """Parse library file and extract all imported module names. + + Args: + content: Python file content + + Returns: + Set of imported module names (top-level only) + """ + try: + tree = ast.parse(content) + except SyntaxError: + return set() + + imports = set() + for node in ast.walk(tree): + if isinstance(node, ast.Import): + # import foo, bar + for alias in node.names: + imports.add(alias.name.split(".")[0]) + elif isinstance(node, ast.ImportFrom): + # from foo import bar + if node.module: + imports.add(node.module.split(".")[0]) + + return imports + + +def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: + """Get soft dependencies by parsing imports from library file. + + Args: + name: Module name + cpython_prefix: CPython directory prefix + + Returns: + Set of imported stdlib module names + """ + lib_paths = get_lib_paths(name, cpython_prefix) + + all_imports = set() + for lib_path in lib_paths: + if lib_path.exists(): + if lib_path.is_file(): + try: + content = lib_path.read_text(encoding="utf-8") + all_imports.update(parse_lib_imports(content)) + except (OSError, UnicodeDecodeError): + continue + else: + # Directory - parse all .py files + for py_file in lib_path.glob("**/*.py"): + try: + content = py_file.read_text(encoding="utf-8") + all_imports.update(parse_lib_imports(content)) + except (OSError, UnicodeDecodeError): + continue + + # Filter: only include modules that exist in cpython/Lib/ + stdlib_deps = set() + for imp in all_imports: + if imp == name: + continue # Skip self + file_path = pathlib.Path(f"{cpython_prefix}/Lib/{imp}.py") + dir_path = pathlib.Path(f"{cpython_prefix}/Lib/{imp}") + if file_path.exists() or dir_path.exists(): + stdlib_deps.add(imp) + + return stdlib_deps + + def get_test_dependencies( test_path: pathlib.Path, ) -> dict[str, list[pathlib.Path]]: diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py new file mode 100644 index 00000000000..c4b91ff9644 --- /dev/null +++ b/scripts/update_lib/show_deps.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +""" +Show dependency information for a module. + +Usage: + python scripts/update_lib deps dis + python scripts/update_lib deps dataclasses +""" + +import argparse +import pathlib +import sys + +sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) + + +def show_deps(name: str, cpython_prefix: str = "cpython") -> None: + """Show all dependency information for a module.""" + from update_lib.deps import ( + DEPENDENCIES, + get_lib_paths, + get_soft_deps, + get_test_paths, + ) + + print(f"Module: {name}") + + # lib paths + lib_paths = get_lib_paths(name, cpython_prefix) + for p in lib_paths: + exists = "+" if p.exists() else "-" + print(f" [{exists}] lib: {p}") + + # test paths + test_paths = get_test_paths(name, cpython_prefix) + for p in test_paths: + exists = "+" if p.exists() else "-" + print(f" [{exists}] test: {p}") + + # hard_deps (from DEPENDENCIES table) + dep_info = DEPENDENCIES.get(name, {}) + hard_deps = dep_info.get("hard_deps", []) + if hard_deps: + print(f" hard_deps: {hard_deps}") + + # soft_deps (auto-detected) + soft_deps = sorted(get_soft_deps(name, cpython_prefix)) + if soft_deps: + print(f" soft_deps: {soft_deps}") + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "name", + help="Module name (e.g., dis, dataclasses, datetime)", + ) + parser.add_argument( + "--cpython", + default="cpython", + help="CPython directory prefix (default: cpython)", + ) + + args = parser.parse_args(argv) + + try: + show_deps(args.name, args.cpython) + return 0 + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/update_lib/tests/test_deps.py b/scripts/update_lib/tests/test_deps.py index 1ff45b703c1..0e807f9cc5e 100644 --- a/scripts/update_lib/tests/test_deps.py +++ b/scripts/update_lib/tests/test_deps.py @@ -7,8 +7,10 @@ from update_lib.deps import ( get_data_paths, get_lib_paths, + get_soft_deps, get_test_dependencies, get_test_paths, + parse_lib_imports, parse_test_imports, resolve_all_paths, ) @@ -231,5 +233,114 @@ def test_regrtest(self): ) +class TestParseLibImports(unittest.TestCase): + """Tests for parse_lib_imports function.""" + + def test_import_statement(self): + """Test parsing 'import foo'.""" + code = """ +import os +import sys +import collections.abc +""" + imports = parse_lib_imports(code) + self.assertEqual(imports, {"os", "sys", "collections"}) + + def test_from_import(self): + """Test parsing 'from foo import bar'.""" + code = """ +from os import path +from collections.abc import Mapping +from typing import Optional +""" + imports = parse_lib_imports(code) + self.assertEqual(imports, {"os", "collections", "typing"}) + + def test_mixed_imports(self): + """Test mixed import styles.""" + code = """ +import sys +from os import path +from collections import defaultdict +import functools +""" + imports = parse_lib_imports(code) + self.assertEqual(imports, {"sys", "os", "collections", "functools"}) + + def test_syntax_error_returns_empty(self): + """Test that syntax errors return empty set.""" + code = "this is not valid python {" + imports = parse_lib_imports(code) + self.assertEqual(imports, set()) + + def test_relative_import_skipped(self): + """Test that relative imports (no module) are skipped.""" + code = """ +from . import foo +from .. import bar +""" + imports = parse_lib_imports(code) + self.assertEqual(imports, set()) + + +class TestGetSoftDeps(unittest.TestCase): + """Tests for get_soft_deps function.""" + + def test_with_temp_files(self): + """Test soft deps detection with temp files.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + lib_dir = tmpdir / "Lib" + lib_dir.mkdir() + + # Create a module that imports another module + (lib_dir / "foo.py").write_text(""" +import bar +from baz import something +""") + # Create the imported modules + (lib_dir / "bar.py").write_text("# bar module") + (lib_dir / "baz.py").write_text("# baz module") + + soft_deps = get_soft_deps("foo", str(tmpdir)) + self.assertEqual(soft_deps, {"bar", "baz"}) + + def test_skips_self(self): + """Test that module doesn't include itself in soft_deps.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + lib_dir = tmpdir / "Lib" + lib_dir.mkdir() + + # Create a module that imports itself (circular) + (lib_dir / "foo.py").write_text(""" +import foo +import bar +""") + (lib_dir / "bar.py").write_text("# bar module") + + soft_deps = get_soft_deps("foo", str(tmpdir)) + self.assertNotIn("foo", soft_deps) + self.assertIn("bar", soft_deps) + + def test_filters_nonexistent(self): + """Test that nonexistent modules are filtered out.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + lib_dir = tmpdir / "Lib" + lib_dir.mkdir() + + # Create a module that imports nonexistent module + (lib_dir / "foo.py").write_text(""" +import bar +import nonexistent +""") + (lib_dir / "bar.py").write_text("# bar module") + # nonexistent.py is NOT created + + soft_deps = get_soft_deps("foo", str(tmpdir)) + self.assertEqual(soft_deps, {"bar"}) + + if __name__ == "__main__": unittest.main() From 3178783d2630dbb7f5ecd7c0a76ec0a349442ce9 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Tue, 20 Jan 2026 23:12:54 +0900 Subject: [PATCH 2/3] soft deps tree --- scripts/update_lib/auto_mark.py | 10 +- scripts/update_lib/copy_lib.py | 19 +- scripts/update_lib/deps.py | 199 ++++++++++++-------- scripts/update_lib/io_utils.py | 81 ++++++++ scripts/update_lib/path.py | 81 ++++++++ scripts/update_lib/quick.py | 77 ++++---- scripts/update_lib/show_deps.py | 213 ++++++++++++++++++++-- scripts/update_lib/tests/test_copy_lib.py | 13 ++ scripts/update_lib/tests/test_deps.py | 80 ++++++++ 9 files changed, 636 insertions(+), 137 deletions(-) create mode 100644 scripts/update_lib/io_utils.py diff --git a/scripts/update_lib/auto_mark.py b/scripts/update_lib/auto_mark.py index faf7abec664..bfc80f0d9fa 100644 --- a/scripts/update_lib/auto_mark.py +++ b/scripts/update_lib/auto_mark.py @@ -455,15 +455,13 @@ def extract_test_methods(contents: str) -> set[tuple[str, str]]: Returns: Set of (class_name, method_name) tuples """ - import ast + from update_lib.io_utils import safe_parse_ast + from update_lib.patch_spec import iter_tests - try: - tree = ast.parse(contents) - except SyntaxError: + tree = safe_parse_ast(contents) + if tree is None: return set() - from update_lib.patch_spec import iter_tests - return {(cls_node.name, fn_node.name) for cls_node, fn_node in iter_tests(tree)} diff --git a/scripts/update_lib/copy_lib.py b/scripts/update_lib/copy_lib.py index 098c8d61163..2788f2ccc83 100644 --- a/scripts/update_lib/copy_lib.py +++ b/scripts/update_lib/copy_lib.py @@ -64,17 +64,14 @@ def copy_lib( # Extract module name and cpython prefix from path path_str = str(src_path).replace("\\", "/") - if "/Lib/" in path_str: - cpython_prefix, after_lib = path_str.split("/Lib/", 1) - # Get module name (first component, without .py) - name = after_lib.split("/")[0] - if name.endswith(".py"): - name = name[:-3] - else: - # Fallback: just copy the single file - lib_path = parse_lib_path(src_path) - _copy_single(src_path, lib_path, verbose) - return + if "/Lib/" not in path_str: + raise ValueError(f"Path must contain '/Lib/' (got: {src_path})") + + cpython_prefix, after_lib = path_str.split("/Lib/", 1) + # Get module name (first component, without .py) + name = after_lib.split("/")[0] + if name.endswith(".py"): + name = name[:-3] # Get all paths to copy from DEPENDENCIES table all_src_paths = get_lib_paths(name, cpython_prefix) diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index dec64494efc..1e7435e9e0c 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -7,9 +7,11 @@ - Test dependencies (auto-detected from 'from test import ...') """ -import ast import pathlib +from update_lib.io_utils import read_python_files, safe_parse_ast, safe_read_text +from update_lib.path import construct_lib_path, resolve_module_path + # Manual dependency table for irregular cases # Format: "name" -> {"lib": [...], "test": [...], "data": [...], "hard_deps": [...]} # - lib: override default path (default: name.py or name/) @@ -153,28 +155,18 @@ def get_lib_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Pa Returns: List of paths to copy """ - paths = [] dep_info = DEPENDENCIES.get(name, {}) # Get main lib path (override or default) if "lib" in dep_info: - paths = [pathlib.Path(f"{cpython_prefix}/Lib/{p}") for p in dep_info["lib"]] + paths = [construct_lib_path(cpython_prefix, p) for p in dep_info["lib"]] else: # Default: try file first, then directory - file_path = pathlib.Path(f"{cpython_prefix}/Lib/{name}.py") - if file_path.exists(): - paths = [file_path] - else: - dir_path = pathlib.Path(f"{cpython_prefix}/Lib/{name}") - if dir_path.exists(): - paths = [dir_path] - else: - paths = [file_path] # Default to file path + paths = [resolve_module_path(name, cpython_prefix, prefer="file")] # Add hard_deps - if "hard_deps" in dep_info: - for dep in dep_info["hard_deps"]: - paths.append(pathlib.Path(f"{cpython_prefix}/Lib/{dep}")) + for dep in dep_info.get("hard_deps", []): + paths.append(construct_lib_path(cpython_prefix, dep)) return paths @@ -191,18 +183,11 @@ def get_test_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.P """ if name in DEPENDENCIES and "test" in DEPENDENCIES[name]: return [ - pathlib.Path(f"{cpython_prefix}/Lib/{p}") - for p in DEPENDENCIES[name]["test"] + construct_lib_path(cpython_prefix, p) for p in DEPENDENCIES[name]["test"] ] # Default: try directory first, then file - dir_path = pathlib.Path(f"{cpython_prefix}/Lib/test/test_{name}") - if dir_path.exists(): - return [dir_path] - file_path = pathlib.Path(f"{cpython_prefix}/Lib/test/test_{name}.py") - if file_path.exists(): - return [file_path] - return [dir_path] # Default to directory path + return [resolve_module_path(f"test/test_{name}", cpython_prefix, prefer="dir")] def get_data_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.Path]: @@ -217,8 +202,7 @@ def get_data_paths(name: str, cpython_prefix: str = "cpython") -> list[pathlib.P """ if name in DEPENDENCIES and "data" in DEPENDENCIES[name]: return [ - pathlib.Path(f"{cpython_prefix}/Lib/{p}") - for p in DEPENDENCIES[name]["data"] + construct_lib_path(cpython_prefix, p) for p in DEPENDENCIES[name]["data"] ] return [] @@ -232,9 +216,10 @@ def parse_test_imports(content: str) -> set[str]: Returns: Set of module names imported from test package """ - try: - tree = ast.parse(content) - except SyntaxError: + import ast + + tree = safe_parse_ast(content) + if tree is None: return set() imports = set() @@ -267,9 +252,10 @@ def parse_lib_imports(content: str) -> set[str]: Returns: Set of imported module names (top-level only) """ - try: - tree = ast.parse(content) - except SyntaxError: + import ast + + tree = safe_parse_ast(content) + if tree is None: return set() imports = set() @@ -286,49 +272,125 @@ def parse_lib_imports(content: str) -> set[str]: return imports -def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: - """Get soft dependencies by parsing imports from library file. +def get_all_imports(name: str, cpython_prefix: str = "cpython") -> set[str]: + """Get all imports from a library file. Args: name: Module name cpython_prefix: CPython directory prefix Returns: - Set of imported stdlib module names + Set of all imported module names """ - lib_paths = get_lib_paths(name, cpython_prefix) - all_imports = set() - for lib_path in lib_paths: + for lib_path in get_lib_paths(name, cpython_prefix): if lib_path.exists(): - if lib_path.is_file(): - try: - content = lib_path.read_text(encoding="utf-8") - all_imports.update(parse_lib_imports(content)) - except (OSError, UnicodeDecodeError): - continue - else: - # Directory - parse all .py files - for py_file in lib_path.glob("**/*.py"): - try: - content = py_file.read_text(encoding="utf-8") - all_imports.update(parse_lib_imports(content)) - except (OSError, UnicodeDecodeError): - continue + for _, content in read_python_files(lib_path): + all_imports.update(parse_lib_imports(content)) + + # Remove self + all_imports.discard(name) + return all_imports + + +def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: + """Get soft dependencies by parsing imports from library file. + + Args: + name: Module name + cpython_prefix: CPython directory prefix + + Returns: + Set of imported stdlib module names (those that exist in cpython/Lib/) + """ + all_imports = get_all_imports(name, cpython_prefix) # Filter: only include modules that exist in cpython/Lib/ stdlib_deps = set() for imp in all_imports: - if imp == name: - continue # Skip self - file_path = pathlib.Path(f"{cpython_prefix}/Lib/{imp}.py") - dir_path = pathlib.Path(f"{cpython_prefix}/Lib/{imp}") - if file_path.exists() or dir_path.exists(): + module_path = resolve_module_path(imp, cpython_prefix) + if module_path.exists(): stdlib_deps.add(imp) return stdlib_deps +def get_rust_deps(name: str, cpython_prefix: str = "cpython") -> set[str]: + """Get Rust/C dependencies (imports that don't exist in cpython/Lib/). + + Args: + name: Module name + cpython_prefix: CPython directory prefix + + Returns: + Set of imported module names that are built-in or C extensions + """ + all_imports = get_all_imports(name, cpython_prefix) + soft_deps = get_soft_deps(name, cpython_prefix) + return all_imports - soft_deps + + +def _dircmp_is_same(dcmp) -> bool: + """Recursively check if two directories are identical. + + Args: + dcmp: filecmp.dircmp object + + Returns: + True if directories are identical (including subdirectories) + """ + if dcmp.diff_files or dcmp.left_only or dcmp.right_only: + return False + + # Recursively check subdirectories + for subdir in dcmp.subdirs.values(): + if not _dircmp_is_same(subdir): + return False + + return True + + +def is_up_to_date( + name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" +) -> bool: + """Check if a module is up-to-date by comparing files. + + Args: + name: Module name + cpython_prefix: CPython directory prefix + lib_prefix: Local Lib directory prefix + + Returns: + True if all files match, False otherwise + """ + import filecmp + + lib_paths = get_lib_paths(name, cpython_prefix) + + for cpython_path in lib_paths: + if not cpython_path.exists(): + continue + + # Convert cpython path to local path + # cpython/Lib/foo.py -> Lib/foo.py + rel_path = cpython_path.relative_to(cpython_prefix) + local_path = pathlib.Path(lib_prefix) / rel_path.relative_to("Lib") + + if not local_path.exists(): + return False + + if cpython_path.is_file(): + if not filecmp.cmp(cpython_path, local_path, shallow=False): + return False + else: + # Directory comparison (recursive) + dcmp = filecmp.dircmp(cpython_path, local_path) + if not _dircmp_is_same(dcmp): + return False + + return True + + def get_test_dependencies( test_path: pathlib.Path, ) -> dict[str, list[pathlib.Path]]: @@ -345,20 +407,10 @@ def get_test_dependencies( if not test_path.exists(): return result - # Collect all test files - if test_path.is_file(): - files = [test_path] - else: - files = list(test_path.glob("**/*.py")) - # Parse all files for imports (auto-detect deps) all_imports = set() - for f in files: - try: - content = f.read_text(encoding="utf-8") - all_imports.update(parse_test_imports(content)) - except (OSError, UnicodeDecodeError): - continue + for _, content in read_python_files(test_path): + all_imports.update(parse_test_imports(content)) # Also add manual dependencies from TEST_DEPENDENCIES test_name = test_path.stem if test_path.is_file() else test_path.name @@ -430,8 +482,11 @@ def resolve_all_paths( # Auto-detect test dependencies for test_path in result["test"]: deps = get_test_dependencies(test_path) - for dep in deps: - if dep not in result["test_deps"]: - result["test_deps"].append(dep) + for dep_path in deps["hard_deps"]: + if dep_path not in result["test_deps"]: + result["test_deps"].append(dep_path) + for data_path in deps["data"]: + if data_path not in result["data"]: + result["data"].append(data_path) return result diff --git a/scripts/update_lib/io_utils.py b/scripts/update_lib/io_utils.py new file mode 100644 index 00000000000..8bf0083211f --- /dev/null +++ b/scripts/update_lib/io_utils.py @@ -0,0 +1,81 @@ +""" +I/O utilities for update_lib. + +This module provides functions for: +- Safe file reading with error handling +- Safe AST parsing with error handling +- Iterating over Python files +""" + +import ast +import pathlib +from collections.abc import Iterator + + +def safe_read_text(path: pathlib.Path) -> str | None: + """ + Read file content with UTF-8 encoding, returning None on error. + + Args: + path: Path to the file + + Returns: + File content as string, or None if reading fails + """ + try: + return path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError): + return None + + +def safe_parse_ast(content: str) -> ast.Module | None: + """ + Parse Python content into AST, returning None on syntax error. + + Args: + content: Python source code + + Returns: + AST module, or None if parsing fails + """ + try: + return ast.parse(content) + except SyntaxError: + return None + + +def iter_python_files(path: pathlib.Path) -> Iterator[pathlib.Path]: + """ + Yield Python files from a file or directory. + + If path is a file, yields just that file. + If path is a directory, yields all .py files recursively. + + Args: + path: Path to a file or directory + + Yields: + Paths to Python files + """ + if path.is_file(): + yield path + else: + yield from path.glob("**/*.py") + + +def read_python_files(path: pathlib.Path) -> Iterator[tuple[pathlib.Path, str]]: + """ + Read all Python files from a path, yielding (path, content) pairs. + + Skips files that cannot be read. + + Args: + path: Path to a file or directory + + Yields: + Tuples of (file_path, file_content) + """ + for py_file in iter_python_files(path): + content = safe_read_text(py_file) + if content is not None: + yield py_file, content diff --git a/scripts/update_lib/path.py b/scripts/update_lib/path.py index 3096ec2bebe..d2360e21cd6 100644 --- a/scripts/update_lib/path.py +++ b/scripts/update_lib/path.py @@ -104,3 +104,84 @@ def test_name_from_path(test_path: pathlib.Path) -> str: if test_path.parent.name.startswith("test_"): return f"{test_path.parent.name}.{test_path.stem}" return test_path.stem + + +# --- Utility functions for reducing duplication --- + + +def resolve_module_path( + name: str, prefix: str = "cpython", prefer: str = "file" +) -> pathlib.Path: + """ + Resolve module path, trying file or directory. + + Args: + name: Module name (e.g., "dataclasses", "json") + prefix: CPython directory prefix + prefer: "file" to try .py first, "dir" to try directory first + + Returns: + Path to the module (file or directory) + + Examples: + resolve_module_path("dataclasses") -> cpython/Lib/dataclasses.py + resolve_module_path("json") -> cpython/Lib/json/ + """ + file_path = pathlib.Path(f"{prefix}/Lib/{name}.py") + dir_path = pathlib.Path(f"{prefix}/Lib/{name}") + + if prefer == "file": + if file_path.exists(): + return file_path + if dir_path.exists(): + return dir_path + return file_path # Default to file + else: + if dir_path.exists(): + return dir_path + if file_path.exists(): + return file_path + return dir_path # Default to dir + + +def construct_lib_path(prefix: str, *parts: str) -> pathlib.Path: + """ + Build a path under prefix/Lib/. + + Args: + prefix: Directory prefix (e.g., "cpython") + *parts: Path components after Lib/ + + Returns: + Combined path + + Examples: + construct_lib_path("cpython", "test", "test_foo.py") + -> cpython/Lib/test/test_foo.py + construct_lib_path("cpython", "dataclasses.py") + -> cpython/Lib/dataclasses.py + """ + return pathlib.Path(prefix) / "Lib" / pathlib.Path(*parts) + + +def get_module_name(path: pathlib.Path) -> str: + """ + Extract module name from path, handling __init__.py. + + Args: + path: Path to a Python file or directory + + Returns: + Module name + + Examples: + get_module_name(Path("cpython/Lib/dataclasses.py")) -> "dataclasses" + get_module_name(Path("cpython/Lib/json/__init__.py")) -> "json" + get_module_name(Path("cpython/Lib/json/")) -> "json" + """ + if path.suffix == ".py": + name = path.stem + if name == "__init__": + return path.parent.name + return name + return path.name diff --git a/scripts/update_lib/quick.py b/scripts/update_lib/quick.py index e770e0dedca..5d21952f22f 100644 --- a/scripts/update_lib/quick.py +++ b/scripts/update_lib/quick.py @@ -31,12 +31,16 @@ sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) +from update_lib.io_utils import safe_read_text from update_lib.path import ( + construct_lib_path, + get_module_name, get_test_files, is_lib_path, is_test_path, lib_to_test_path, parse_lib_path, + resolve_module_path, ) @@ -56,11 +60,14 @@ def collect_original_methods( return None if lib_path.is_file(): - return extract_test_methods(lib_path.read_text()) + content = safe_read_text(lib_path) + return extract_test_methods(content) if content else set() else: result = {} for lib_file in get_test_files(lib_path): - result[lib_file.resolve()] = extract_test_methods(lib_file.read_text()) + content = safe_read_text(lib_file) + if content: + result[lib_file.resolve()] = extract_test_methods(content) return result @@ -128,8 +135,15 @@ def quick( if verbose: print(f"Copying data: {data_src.name}") if data_lib.exists(): - shutil.rmtree(data_lib) - shutil.copytree(data_src, data_lib) + if data_lib.is_dir(): + shutil.rmtree(data_lib) + else: + data_lib.unlink() + if data_src.is_dir(): + shutil.copytree(data_src, data_lib) + else: + data_lib.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(data_src, data_lib) # Step 2: Auto-mark if not no_auto_mark: @@ -272,26 +286,20 @@ def _expand_shortcut(path: pathlib.Path) -> pathlib.Path: if name in DEPENDENCIES and "lib" in DEPENDENCIES[name]: lib_paths = DEPENDENCIES[name]["lib"] if lib_paths: - override_path = pathlib.Path(f"cpython/Lib/{lib_paths[0]}") + override_path = construct_lib_path("cpython", lib_paths[0]) if override_path.exists(): return override_path # Test shortcut: test_foo -> cpython/Lib/test/test_foo if name.startswith("test_"): - dir_path = pathlib.Path(f"cpython/Lib/test/{name}") - if dir_path.exists(): - return dir_path - file_path = pathlib.Path(f"cpython/Lib/test/{name}.py") - if file_path.exists(): - return file_path + resolved = resolve_module_path(f"test/{name}", "cpython", prefer="dir") + if resolved.exists(): + return resolved # Library shortcut: foo -> cpython/Lib/foo - file_path = pathlib.Path(f"cpython/Lib/{name}.py") - if file_path.exists(): - return file_path - dir_path = pathlib.Path(f"cpython/Lib/{name}") - if dir_path.exists(): - return dir_path + resolved = resolve_module_path(name, "cpython", prefer="file") + if resolved.exists(): + return resolved # Return original (will likely fail later with a clear error) return path @@ -369,29 +377,30 @@ def main(argv: list[str] | None = None) -> int: # Convert to test path src_path = lib_to_test_path(original_src) if not src_path.exists(): - print(f"Test path does not exist: {src_path}") - return 1 + print(f"Warning: Test path does not exist: {src_path}") + # Skip test processing, but continue with commit + src_path = None - test_path = parse_lib_path(src_path) if not is_lib_path(src_path) else src_path + if src_path is not None: + test_path = ( + parse_lib_path(src_path) if not is_lib_path(src_path) else src_path + ) - # Process the test path - quick( - src_path, - no_migrate=not args.migrate, - no_auto_mark=not args.auto_mark, - mark_failure=args.mark_failure, - skip_build=not args.build, - ) + # Process the test path + quick( + src_path, + no_migrate=not args.migrate, + no_auto_mark=not args.auto_mark, + mark_failure=args.mark_failure, + skip_build=not args.build, + ) # Step 3: Git commit if args.commit: - # Extract module name from path - name = original_src.stem - if name == "__init__": - name = original_src.parent.name - cpython_dir = get_cpython_dir(original_src) - git_commit(name, lib_file_path, test_path, cpython_dir) + git_commit( + get_module_name(original_src), lib_file_path, test_path, cpython_dir + ) return 0 except ValueError as e: diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py index c4b91ff9644..1dcd3404898 100644 --- a/scripts/update_lib/show_deps.py +++ b/scripts/update_lib/show_deps.py @@ -5,6 +5,8 @@ Usage: python scripts/update_lib deps dis python scripts/update_lib deps dataclasses + python scripts/update_lib deps dis --depth 2 + python scripts/update_lib deps all # Show all modules' dependencies """ import argparse @@ -14,39 +16,210 @@ sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) -def show_deps(name: str, cpython_prefix: str = "cpython") -> None: - """Show all dependency information for a module.""" +def get_all_modules(cpython_prefix: str = "cpython") -> list[str]: + """Get all top-level module names from cpython/Lib/. + + Returns: + Sorted list of module names (without .py extension) + """ + lib_dir = pathlib.Path(cpython_prefix) / "Lib" + if not lib_dir.exists(): + return [] + + modules = set() + for entry in lib_dir.iterdir(): + # Skip private/internal modules and special directories + if entry.name.startswith(("_", ".")): + continue + # Skip test directory + if entry.name == "test": + continue + + if entry.is_file() and entry.suffix == ".py": + modules.add(entry.stem) + elif entry.is_dir() and (entry / "__init__.py").exists(): + modules.add(entry.name) + + return sorted(modules) + + +def format_deps_tree( + cpython_prefix: str, + lib_prefix: str, + max_depth: int, + *, + name: str | None = None, + soft_deps: set[str] | None = None, + _depth: int = 0, + _visited: set[str] | None = None, + _indent: str = "", +) -> list[str]: + """Format soft dependencies as a tree with up-to-date status. + + Args: + cpython_prefix: CPython directory prefix + lib_prefix: Local Lib directory prefix + max_depth: Maximum recursion depth + name: Module name (used to compute deps if soft_deps not provided) + soft_deps: Pre-computed soft dependencies (optional) + _depth: Current depth (internal) + _visited: Already visited modules (internal) + _indent: Current indentation (internal) + + Returns: + List of formatted lines + """ + from update_lib.deps import ( + get_rust_deps, + get_soft_deps, + is_up_to_date, + ) + + lines = [] + + if _visited is None: + _visited = set() + + # Compute deps from name if not provided + if soft_deps is None: + soft_deps = get_soft_deps(name, cpython_prefix) if name else set() + + soft_deps = sorted(soft_deps) + + if not soft_deps: + return lines + + # Separate up-to-date and outdated modules + up_to_date_deps = [] + outdated_deps = [] + dup_deps = [] + + for dep in soft_deps: + up_to_date = is_up_to_date(dep, cpython_prefix, lib_prefix) + if up_to_date: + # Up-to-date modules collected compactly, no dup tracking needed + up_to_date_deps.append(dep) + elif dep in _visited: + # Only track dup for outdated modules + dup_deps.append(dep) + else: + outdated_deps.append(dep) + + # Show outdated modules with expansion + for dep in outdated_deps: + dep_native = get_rust_deps(dep, cpython_prefix) + native_suffix = ( + f" (native: {', '.join(sorted(dep_native))})" if dep_native else "" + ) + lines.append(f"{_indent}- [ ] {dep}{native_suffix}") + _visited.add(dep) + + # Recurse if within depth limit + if _depth < max_depth - 1: + lines.extend( + format_deps_tree( + cpython_prefix, + lib_prefix, + max_depth, + name=dep, + _depth=_depth + 1, + _visited=_visited, + _indent=_indent + " ", + ) + ) + + # Show duplicates compactly (only for outdated) + if dup_deps: + lines.append(f"{_indent}- [ ] {', '.join(dup_deps)}") + + # Show up-to-date modules compactly on one line + if up_to_date_deps: + lines.append(f"{_indent}- [x] {', '.join(up_to_date_deps)}") + + return lines + + +def format_deps( + name: str, + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", + max_depth: int = 10, + _visited: set[str] | None = None, +) -> list[str]: + """Format all dependency information for a module. + + Args: + name: Module name + cpython_prefix: CPython directory prefix + lib_prefix: Local Lib directory prefix + max_depth: Maximum recursion depth + _visited: Shared visited set for deduplication across modules + + Returns: + List of formatted lines + """ from update_lib.deps import ( DEPENDENCIES, get_lib_paths, - get_soft_deps, get_test_paths, ) - print(f"Module: {name}") + if _visited is None: + _visited = set() + + lines = [] # lib paths lib_paths = get_lib_paths(name, cpython_prefix) for p in lib_paths: exists = "+" if p.exists() else "-" - print(f" [{exists}] lib: {p}") + lines.append(f"[{exists}] lib: {p}") # test paths test_paths = get_test_paths(name, cpython_prefix) for p in test_paths: exists = "+" if p.exists() else "-" - print(f" [{exists}] test: {p}") + lines.append(f"[{exists}] test: {p}") # hard_deps (from DEPENDENCIES table) dep_info = DEPENDENCIES.get(name, {}) hard_deps = dep_info.get("hard_deps", []) if hard_deps: - print(f" hard_deps: {hard_deps}") + lines.append(f"hard_deps: {hard_deps}") - # soft_deps (auto-detected) - soft_deps = sorted(get_soft_deps(name, cpython_prefix)) - if soft_deps: - print(f" soft_deps: {soft_deps}") + lines.append("soft_deps:") + lines.extend( + format_deps_tree( + cpython_prefix, lib_prefix, max_depth, soft_deps={name}, _visited=_visited + ) + ) + + return lines + + +def show_deps( + names: list[str], + cpython_prefix: str = "cpython", + lib_prefix: str = "Lib", + max_depth: int = 10, +) -> None: + """Show all dependency information for modules.""" + # Expand "all" to all module names + expanded_names = [] + for name in names: + if name == "all": + expanded_names.extend(get_all_modules(cpython_prefix)) + else: + expanded_names.append(name) + + # Shared visited set across all modules + visited: set[str] = set() + + for i, name in enumerate(expanded_names): + if i > 0: + print() # blank line between modules + for line in format_deps(name, cpython_prefix, lib_prefix, max_depth, visited): + print(line) def main(argv: list[str] | None = None) -> int: @@ -55,19 +228,31 @@ def main(argv: list[str] | None = None) -> int: formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( - "name", - help="Module name (e.g., dis, dataclasses, datetime)", + "names", + nargs="+", + help="Module names (e.g., dis, dataclasses) or 'all' for all modules", ) parser.add_argument( "--cpython", default="cpython", help="CPython directory prefix (default: cpython)", ) + parser.add_argument( + "--lib", + default="Lib", + help="Local Lib directory prefix (default: Lib)", + ) + parser.add_argument( + "--depth", + type=int, + default=10, + help="Maximum recursion depth for soft_deps tree (default: 10)", + ) args = parser.parse_args(argv) try: - show_deps(args.name, args.cpython) + show_deps(args.names, args.cpython, args.lib, args.depth) return 0 except Exception as e: print(f"Error: {e}", file=sys.stderr) diff --git a/scripts/update_lib/tests/test_copy_lib.py b/scripts/update_lib/tests/test_copy_lib.py index 0b3f60b77b8..81ca73b5310 100644 --- a/scripts/update_lib/tests/test_copy_lib.py +++ b/scripts/update_lib/tests/test_copy_lib.py @@ -58,5 +58,18 @@ def test_removes_existing_before_copy(self): self.assertEqual(dst.read_text(), "new content") +class TestCopyLib(unittest.TestCase): + """Tests for copy_lib function.""" + + def test_raises_on_path_without_lib(self): + """Test that copy_lib raises ValueError when path doesn't contain /Lib/.""" + from update_lib.copy_lib import copy_lib + + with self.assertRaises(ValueError) as ctx: + copy_lib(pathlib.Path("some/path/without/lib.py")) + + self.assertIn("/Lib/", str(ctx.exception)) + + if __name__ == "__main__": unittest.main() diff --git a/scripts/update_lib/tests/test_deps.py b/scripts/update_lib/tests/test_deps.py index 0e807f9cc5e..41a51990ad0 100644 --- a/scripts/update_lib/tests/test_deps.py +++ b/scripts/update_lib/tests/test_deps.py @@ -342,5 +342,85 @@ def test_filters_nonexistent(self): self.assertEqual(soft_deps, {"bar"}) +class TestDircmpIsSame(unittest.TestCase): + """Tests for _dircmp_is_same function.""" + + def test_identical_directories(self): + """Test that identical directories return True.""" + import filecmp + + from update_lib.deps import _dircmp_is_same + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + dir1 = tmpdir / "dir1" + dir2 = tmpdir / "dir2" + dir1.mkdir() + dir2.mkdir() + + (dir1 / "file.py").write_text("content") + (dir2 / "file.py").write_text("content") + + dcmp = filecmp.dircmp(dir1, dir2) + self.assertTrue(_dircmp_is_same(dcmp)) + + def test_different_files(self): + """Test that directories with different files return False.""" + import filecmp + + from update_lib.deps import _dircmp_is_same + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + dir1 = tmpdir / "dir1" + dir2 = tmpdir / "dir2" + dir1.mkdir() + dir2.mkdir() + + (dir1 / "file.py").write_text("content1") + (dir2 / "file.py").write_text("content2") + + dcmp = filecmp.dircmp(dir1, dir2) + self.assertFalse(_dircmp_is_same(dcmp)) + + def test_nested_identical(self): + """Test that nested identical directories return True.""" + import filecmp + + from update_lib.deps import _dircmp_is_same + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + dir1 = tmpdir / "dir1" + dir2 = tmpdir / "dir2" + (dir1 / "sub").mkdir(parents=True) + (dir2 / "sub").mkdir(parents=True) + + (dir1 / "sub" / "file.py").write_text("content") + (dir2 / "sub" / "file.py").write_text("content") + + dcmp = filecmp.dircmp(dir1, dir2) + self.assertTrue(_dircmp_is_same(dcmp)) + + def test_nested_different(self): + """Test that nested directories with differences return False.""" + import filecmp + + from update_lib.deps import _dircmp_is_same + + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir = pathlib.Path(tmpdir) + dir1 = tmpdir / "dir1" + dir2 = tmpdir / "dir2" + (dir1 / "sub").mkdir(parents=True) + (dir2 / "sub").mkdir(parents=True) + + (dir1 / "sub" / "file.py").write_text("content1") + (dir2 / "sub" / "file.py").write_text("content2") + + dcmp = filecmp.dircmp(dir1, dir2) + self.assertFalse(_dircmp_is_same(dcmp)) + + if __name__ == "__main__": unittest.main() From af1325523ae10ff6ea27f260939421f231510944 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Wed, 21 Jan 2026 00:09:57 +0900 Subject: [PATCH 3/3] show deps CI --- .github/workflows/lib-deps-check.yaml | 117 ++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 .github/workflows/lib-deps-check.yaml diff --git a/.github/workflows/lib-deps-check.yaml b/.github/workflows/lib-deps-check.yaml new file mode 100644 index 00000000000..1903672045d --- /dev/null +++ b/.github/workflows/lib-deps-check.yaml @@ -0,0 +1,117 @@ +name: Lib Dependencies Check + +on: + pull_request_target: + types: [opened, synchronize, reopened] + paths: + - 'Lib/**' + +concurrency: + group: lib-deps-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + check_deps: + permissions: + pull-requests: write + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout base branch + uses: actions/checkout@v6.0.1 + with: + # Use base branch for scripts (security: don't run PR code with elevated permissions) + ref: ${{ github.event.pull_request.base.ref }} + fetch-depth: 0 + + - name: Fetch PR head + run: | + git fetch origin ${{ github.event.pull_request.head.sha }} + + - name: Checkout CPython + run: | + git clone --depth 1 --branch v3.14.2 https://github.com/python/cpython.git cpython + + - name: Get changed Lib files + id: changed-files + run: | + # Get the list of changed files under Lib/ + changed=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} -- 'Lib/*.py' 'Lib/**/*.py' | head -50) + echo "Changed files:" + echo "$changed" + + # Extract unique module names (top-level only, skip test/) + modules="" + for file in $changed; do + # Skip test files + if [[ "$file" == Lib/test/* ]]; then + continue + fi + # Extract module name: Lib/foo.py -> foo, Lib/foo/__init__.py -> foo + module=$(echo "$file" | sed -E 's|^Lib/||; s|/__init__\.py$||; s|\.py$||; s|/.*||') + if [[ -n "$module" && ! " $modules " =~ " $module " ]]; then + modules="$modules $module" + fi + done + + modules=$(echo "$modules" | xargs) # trim whitespace + echo "Detected modules: $modules" + echo "modules=$modules" >> $GITHUB_OUTPUT + + - name: Setup Python + if: steps.changed-files.outputs.modules != '' + uses: actions/setup-python@v6.1.0 + with: + python-version: "3.12" + + - name: Run deps check + if: steps.changed-files.outputs.modules != '' + id: deps-check + run: | + # Run deps for all modules at once + python scripts/update_lib deps ${{ steps.changed-files.outputs.modules }} --depth 2 > /tmp/deps_output.txt 2>&1 || true + + # Read output for GitHub Actions + echo "deps_output<> $GITHUB_OUTPUT + cat /tmp/deps_output.txt >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + # Check if there's any meaningful output + if [ -s /tmp/deps_output.txt ]; then + echo "has_output=true" >> $GITHUB_OUTPUT + else + echo "has_output=false" >> $GITHUB_OUTPUT + fi + + - name: Post comment + if: steps.deps-check.outputs.has_output == 'true' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: lib-deps-check + number: ${{ github.event.pull_request.number }} + message: | + ## 📦 Library Dependencies + + The following Lib/ modules were modified. Here are their dependencies: + +
+ Click to expand dependency information + + ``` + ${{ steps.deps-check.outputs.deps_output }} + ``` + +
+ + **Legend:** + - `[+]` path exists, `[-]` path missing + - `[x]` up-to-date, `[ ]` outdated + - `native:` Rust/C extension modules + + - name: Remove comment if no Lib changes + if: steps.changed-files.outputs.modules == '' + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: lib-deps-check + number: ${{ github.event.pull_request.number }} + delete: true