diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index e325264838e..878ad3a12a8 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -19,7 +19,7 @@ # === Cross-process cache using shelve === -def _get_cpython_version(cpython_prefix: str = "cpython") -> str: +def _get_cpython_version(cpython_prefix: str) -> str: """Get CPython version from git tag for cache namespace.""" try: result = subprocess.run( @@ -60,7 +60,7 @@ def clear_import_graph_caches() -> None: # regrtest is in Lib/test/libregrtest/, not Lib/libregrtest/ "regrtest": { "lib": ["test/libregrtest"], - "test": ["test/test_regrtest"], + "test": ["test_regrtest"], "data": ["test/regrtestdata"], }, # Rust-implemented modules (no lib file, only test) @@ -68,63 +68,89 @@ def clear_import_graph_caches() -> None: "lib": [], "hard_deps": ["_pylong.py"], "test": [ - "test/test_int.py", - "test/test_long.py", + "test_int.py", + "test_long.py", ], }, "exception": { "lib": [], "test": [ - "test/test_exceptions.py", - "test/test_baseexception.py", - "test/test_except_star.py", - "test/test_exception_group.py", - "test/test_exception_hierarchy.py", - "test/test_exception_variations.py", + "test_exceptions.py", + "test_baseexception.py", + "test_except_star.py", + "test_exception_group.py", + "test_exception_hierarchy.py", + "test_exception_variations.py", ], }, "dict": { "lib": [], "test": [ - "test/test_dict.py", - "test/test_dictcomps.py", - "test/test_dictviews.py", - "test/test_userdict.py", + "test_dict.py", + "test_dictcomps.py", + "test_dictviews.py", + "test_userdict.py", ], }, "list": { "lib": [], "test": [ - "test/test_list.py", - "test/test_listcomps.py", - "test/test_userlist.py", + "test_list.py", + "test_listcomps.py", + "test_userlist.py", ], }, + "__future__": { + "test": [ + "test___future__.py", + "test_future_stmt.py", + ], + }, + "site": { + "hard_deps": ["_sitebuiltins.py"], + }, + "opcode": { + "hard_deps": ["_opcode_metadata.py"], + "test": [ + "test_opcode.py", + "test__opcode.py", + "test_opcodes.py", + ], + }, + "pickle": { + "hard_deps": ["_compat_pickle.py"], + }, + "re": { + "hard_deps": ["sre_compile.py", "sre_constants.py", "sre_parse.py"], + }, + "weakref": { + "hard_deps": ["_weakrefset.py"], + }, "codecs": { "test": [ - "test/test_codecs.py", - "test/test_codeccallbacks.py", - "test/test_codecencodings_cn.py", - "test/test_codecencodings_hk.py", - "test/test_codecencodings_iso2022.py", - "test/test_codecencodings_jp.py", - "test/test_codecencodings_kr.py", - "test/test_codecencodings_tw.py", - "test/test_codecmaps_cn.py", - "test/test_codecmaps_hk.py", - "test/test_codecmaps_jp.py", - "test/test_codecmaps_kr.py", - "test/test_codecmaps_tw.py", - "test/test_charmapcodec.py", - "test/test_multibytecodec.py", + "test_codecs.py", + "test_codeccallbacks.py", + "test_codecencodings_cn.py", + "test_codecencodings_hk.py", + "test_codecencodings_iso2022.py", + "test_codecencodings_jp.py", + "test_codecencodings_kr.py", + "test_codecencodings_tw.py", + "test_codecmaps_cn.py", + "test_codecmaps_hk.py", + "test_codecmaps_jp.py", + "test_codecmaps_kr.py", + "test_codecmaps_tw.py", + "test_charmapcodec.py", + "test_multibytecodec.py", ], }, # Non-pattern hard_deps (can't be auto-detected) "ast": { "hard_deps": ["_ast_unparse.py"], "test": [ - "test/test_ast.py", - "test/test_unparse.py", + "test_ast.py", + "test_unparse.py", ], }, # Data directories @@ -139,210 +165,238 @@ def clear_import_graph_caches() -> None: "lib": ["test/support"], "data": ["test/wheeldata"], "test": [ - "test/test_support.py", - "test/test_script_helper.py", + "test_support.py", + "test_script_helper.py", ], }, # test_htmlparser tests html.parser "html": { - "test": ["test/test_html.py", "test/test_htmlparser.py"], + "hard_deps": ["_markupbase.py"], + "test": ["test_html.py", "test_htmlparser.py"], }, "xml": { "test": [ - "test/test_xml_etree.py", - "test/test_xml_etree_c.py", - "test/test_minidom.py", - "test/test_pulldom.py", - "test/test_pyexpat.py", - "test/test_sax.py", + "test_xml_etree.py", + "test_xml_etree_c.py", + "test_minidom.py", + "test_pulldom.py", + "test_pyexpat.py", + "test_sax.py", ], }, "multiprocessing": { "test": [ - "test/test_multiprocessing_fork", - "test/test_multiprocessing_forkserver", - "test/test_multiprocessing_spawn", + "test_multiprocessing_fork", + "test_multiprocessing_forkserver", + "test_multiprocessing_spawn", ], }, "urllib": { "test": [ - "test/test_urllib.py", - "test/test_urllib2.py", - "test/test_urllib2_localnet.py", - "test/test_urllib2net.py", - "test/test_urllibnet.py", - "test/test_urlparse.py", - "test/test_urllib_response.py", - "test/test_robotparser.py", + "test_urllib.py", + "test_urllib2.py", + "test_urllib2_localnet.py", + "test_urllib2net.py", + "test_urllibnet.py", + "test_urlparse.py", + "test_urllib_response.py", + "test_robotparser.py", ], }, "collections": { "test": [ - "test/test_collections.py", - "test/test_deque.py", - "test/test_defaultdict.py", - "test/test_ordered_dict.py", + "test_collections.py", + "test_deque.py", + "test_defaultdict.py", + "test_ordered_dict.py", ], }, "http": { "test": [ - "test/test_httplib.py", - "test/test_http_cookiejar.py", - "test/test_http_cookies.py", - "test/test_httpservers.py", + "test_httplib.py", + "test_http_cookiejar.py", + "test_http_cookies.py", + "test_httpservers.py", ], }, "unicode": { "lib": [], "test": [ - "test/test_unicode_file.py", - "test/test_unicode_file_functions.py", - "test/test_unicode_identifiers.py", - "test/test_unicodedata.py", + "test_unicodedata.py", + "test_unicode_file.py", + "test_unicode_file_functions.py", + "test_unicode_identifiers.py", + "test_ucn.py", ], }, "typing": { "test": [ - "test/test_typing.py", - "test/test_type_aliases.py", - "test/test_type_annotations.py", - "test/test_type_params.py", - "test/test_genericalias.py", + "test_typing.py", + "test_type_aliases.py", + "test_type_annotations.py", + "test_type_params.py", + "test_genericalias.py", ], }, "unpack": { "lib": [], "test": [ - "test/test_unpack.py", - "test/test_unpack_ex.py", + "test_unpack.py", + "test_unpack_ex.py", ], }, "zipimport": { "test": [ - "test/test_zipimport.py", - "test/test_zipimport_support.py", + "test_zipimport.py", + "test_zipimport_support.py", ], }, "time": { "lib": [], "test": [ - "test/test_time.py", - "test/test_strftime.py", + "test_time.py", + "test_strftime.py", ], }, "sys": { "lib": [], "test": [ - "test/test_sys.py", - "test/test_syslog.py", - "test/test_sys_setprofile.py", - "test/test_sys_settrace.py", + "test_sys.py", + "test_syslog.py", + "test_sys_setprofile.py", + "test_sys_settrace.py", ], }, "str": { "lib": [], "test": [ - "test/test_str.py", - "test/test_fstring.py", - "test/test_string_literals.py", + "test_str.py", + "test_fstring.py", + "test_string_literals.py", ], }, "thread": { "lib": [], "test": [ - "test/test_thread.py", - "test/test_thread_local_bytecode.py", - "test/test_threadsignals.py", + "test_thread.py", + "test_thread_local_bytecode.py", + "test_threadsignals.py", ], }, "threading": { + "hard_deps": ["_threading_local.py"], "test": [ - "test/test_threading.py", - "test/test_threadedtempfile.py", - "test/test_threading_local.py", + "test_threading.py", + "test_threadedtempfile.py", + "test_threading_local.py", ], }, "class": { "lib": [], "test": [ - "test/test_class.py", - "test/test_genericclass.py", - "test/test_subclassinit.py", + "test_class.py", + "test_genericclass.py", + "test_subclassinit.py", ], }, "generator": { "lib": [], "test": [ - "test/test_generators.py", - "test/test_genexps.py", - "test/test_generator_stop.py", - "test/test_yield_from.py", + "test_generators.py", + "test_genexps.py", + "test_generator_stop.py", + "test_yield_from.py", ], }, "descr": { "lib": [], "test": [ - "test/test_descr.py", - "test/test_descrtut.py", + "test_descr.py", + "test_descrtut.py", ], }, "contextlib": { "test": [ - "test/test_contextlib.py", - "test/test_contextlib_async.py", + "test_contextlib.py", + "test_contextlib_async.py", ], }, "io": { "test": [ - "test/test_io.py", - "test/test_bufio.py", - "test/test_fileio.py", - "test/test_memoryio.py", + "test_io.py", + "test_bufio.py", + "test_fileio.py", + "test_memoryio.py", ], }, "dbm": { "test": [ - "test/test_dbm.py", - "test/test_dbm_gnu.py", - "test/test_dbm_ndbm.py", + "test_dbm.py", + "test_dbm_gnu.py", + "test_dbm_ndbm.py", ], }, "datetime": { + "hard_deps": ["_strptime.py"], + "test": [ + "test_datetime.py", + "test_strptime.py", + ], + }, + "concurrent": { + "test": [ + "test_concurrent_futures", + ], + }, + "locale": { + "test": [ + "test_locale.py", + "test__locale.py", + ], + }, + "numbers": { "test": [ - "test/test_datetime.py", - "test/test_strptime.py", + "test_numbers.py", + "test_abstract_numbers.py", ], }, "file": { "lib": [], "test": [ - "test/test_file.py", - "test/test_largefile.py", + "test_file.py", + "test_largefile.py", ], }, "fcntl": { "lib": [], "test": [ - "test/test_fcntl.py", - "test/test_ioctl.py", + "test_fcntl.py", + "test_ioctl.py", + ], + }, + "select": { + "lib": [], + "test": [ + "test_select.py", + "test_poll.py", ], }, "xmlrpc": { "test": [ - "test/test_xmlrpc.py", - "test/test_docxmlrpc.py", + "test_xmlrpc.py", + "test_docxmlrpc.py", ], }, "ctypes": { "test": [ - "test/test_ctypes", - "test/test_stable_abi_ctypes.py", + "test_ctypes", + "test_stable_abi_ctypes.py", ], }, } -def resolve_hard_dep_parent(name: str, cpython_prefix: str = "cpython") -> str | None: +def resolve_hard_dep_parent(name: str, cpython_prefix: str) -> str | None: """Resolve a hard_dep name to its parent module. Only returns a parent if the file is actually tracked: @@ -391,6 +445,30 @@ def resolve_hard_dep_parent(name: str, cpython_prefix: str = "cpython") -> str | return None +def resolve_test_to_lib(test_name: str) -> str | None: + """Resolve a test name to its library group from DEPENDENCIES. + + Args: + test_name: Test name with or without test_ prefix (e.g., "test_urllib2" or "urllib2") + + Returns: + Library name if test belongs to a group, None otherwise + """ + # Normalize: add test_ prefix if not present + if not test_name.startswith("test_"): + test_name = f"test_{test_name}" + + for lib_name, dep_info in DEPENDENCIES.items(): + tests = dep_info.get("test", []) + for test_path in tests: + # test_path is like "test_urllib2.py" or "test_multiprocessing_fork" + path_stem = test_path[:-3] if test_path.endswith(".py") else test_path + if path_stem == test_name: + return lib_name + + return None + + # Test-specific dependencies (only when auto-detection isn't enough) # - hard_deps: files to migrate (tightly coupled, must be migrated together) # - data: directories to copy without migration @@ -476,9 +554,7 @@ def resolve_hard_dep_parent(name: str, cpython_prefix: str = "cpython") -> str | @functools.cache -def get_lib_paths( - name: str, cpython_prefix: str = "cpython" -) -> tuple[pathlib.Path, ...]: +def get_lib_paths(name: str, cpython_prefix: str) -> tuple[pathlib.Path, ...]: """Get all library paths for a module. Args: @@ -510,10 +586,35 @@ def get_lib_paths( return tuple(paths) +def get_all_hard_deps(name: str, cpython_prefix: str) -> list[str]: + """Get all hard_deps for a module (explicit + auto-detected). + + Args: + name: Module name (e.g., "decimal", "datetime") + cpython_prefix: CPython directory prefix + + Returns: + List of hard_dep names (without .py extension) + """ + dep_info = DEPENDENCIES.get(name, {}) + hard_deps = set() + + # Explicit hard_deps from DEPENDENCIES + for hd in dep_info.get("hard_deps", []): + # Remove .py extension if present + hard_deps.add(hd[:-3] if hd.endswith(".py") else hd) + + # Auto-detect _py{module}.py or _py_{module}.py patterns + for pattern in [f"_py{name}.py", f"_py_{name}.py"]: + auto_path = construct_lib_path(cpython_prefix, pattern) + if auto_path.exists(): + hard_deps.add(auto_path.stem) + + return sorted(hard_deps) + + @functools.cache -def get_test_paths( - name: str, cpython_prefix: str = "cpython" -) -> tuple[pathlib.Path, ...]: +def get_test_paths(name: str, cpython_prefix: str) -> tuple[pathlib.Path, ...]: """Get all test paths for a module. Args: @@ -525,7 +626,8 @@ def get_test_paths( """ if name in DEPENDENCIES and "test" in DEPENDENCIES[name]: return tuple( - construct_lib_path(cpython_prefix, p) for p in DEPENDENCIES[name]["test"] + construct_lib_path(cpython_prefix, f"test/{p}") + for p in DEPENDENCIES[name]["test"] ) # Default: try directory first, then file @@ -625,7 +727,7 @@ def parse_lib_imports(content: str) -> set[str]: @functools.cache -def get_all_imports(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: +def get_all_imports(name: str, cpython_prefix: str) -> frozenset[str]: """Get all imports from a library file. Args: @@ -647,7 +749,7 @@ def get_all_imports(name: str, cpython_prefix: str = "cpython") -> frozenset[str @functools.cache -def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: +def get_soft_deps(name: str, cpython_prefix: str) -> frozenset[str]: """Get soft dependencies by parsing imports from library file. Args: @@ -670,7 +772,7 @@ def get_soft_deps(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: @functools.cache -def get_rust_deps(name: str, cpython_prefix: str = "cpython") -> frozenset[str]: +def get_rust_deps(name: str, cpython_prefix: str) -> frozenset[str]: """Get Rust/C dependencies (imports that don't exist in cpython/Lib/). Args: @@ -706,9 +808,7 @@ def _dircmp_is_same(dcmp) -> bool: @functools.cache -def is_up_to_date( - name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" -) -> bool: +def is_up_to_date(name: str, cpython_prefix: str, lib_prefix: str) -> bool: """Check if a module is up-to-date by comparing files. Args: @@ -889,7 +989,7 @@ def _build_test_import_graph( # Cross-process cache (only for standard Lib/test directory) use_file_cache = _is_standard_lib_path(cache_key) if use_file_cache: - version = _get_cpython_version() + version = _get_cpython_version("cpython") shelve_key = f"test_import_graph:{version}" try: with shelve.open(_get_cache_path()) as db: @@ -950,7 +1050,7 @@ def _build_test_import_graph( _lib_import_graph_cache: dict[str, dict[str, set[str]]] = {} -def _build_lib_import_graph(lib_prefix: str = "Lib") -> dict[str, set[str]]: +def _build_lib_import_graph(lib_prefix: str) -> dict[str, set[str]]: """Build import graph for Lib modules (full module paths like urllib.request). Uses cross-process shelve cache based on CPython version. @@ -968,7 +1068,7 @@ def _build_lib_import_graph(lib_prefix: str = "Lib") -> dict[str, set[str]]: # Cross-process cache (only for standard Lib directory) use_file_cache = _is_standard_lib_path(lib_prefix) if use_file_cache: - version = _get_cpython_version() + version = _get_cpython_version("cpython") shelve_key = f"lib_import_graph:{version}" try: with shelve.open(_get_cache_path()) as db: @@ -1106,7 +1206,7 @@ def _consolidate_submodules( def find_dependent_tests_tree( module_name: str, - lib_prefix: str = "Lib", + lib_prefix: str, max_depth: int = 1, _depth: int = 0, _visited_tests: set[str] | None = None, diff --git a/scripts/update_lib/migrate.py b/scripts/update_lib/migrate.py index d0f049ec117..22ec9517fcc 100644 --- a/scripts/update_lib/migrate.py +++ b/scripts/update_lib/migrate.py @@ -38,12 +38,12 @@ def patch_single_content( # Extract patches from existing file (if exists) if lib_path.exists(): - patches = extract_patches(lib_path.read_text()) + patches = extract_patches(lib_path.read_text(encoding="utf-8")) else: patches = {} # Apply patches to source content - src_content = src_path.read_text() + src_content = src_path.read_text(encoding="utf-8") return apply_patches(src_content, patches) @@ -70,10 +70,10 @@ def patch_file( else: if verbose: print(f"Copying: {src_path} -> {lib_path}") - content = src_path.read_text() + content = src_path.read_text(encoding="utf-8") lib_path.parent.mkdir(parents=True, exist_ok=True) - lib_path.write_text(content) + lib_path.write_text(content, encoding="utf-8") def patch_directory( @@ -105,10 +105,10 @@ def patch_directory( else: if verbose: print(f"Copying: {src_file} -> {lib_file}") - content = src_file.read_text() + content = src_file.read_text(encoding="utf-8") lib_file.parent.mkdir(parents=True, exist_ok=True) - lib_file.write_text(content) + lib_file.write_text(content, encoding="utf-8") def main(argv: list[str] | None = None) -> int: diff --git a/scripts/update_lib/quick.py b/scripts/update_lib/quick.py index 5d21952f22f..19c5714c8f5 100644 --- a/scripts/update_lib/quick.py +++ b/scripts/update_lib/quick.py @@ -31,6 +31,7 @@ sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) +from update_lib.deps import get_test_paths from update_lib.io_utils import safe_read_text from update_lib.path import ( construct_lib_path, @@ -206,7 +207,7 @@ def get_cpython_version(cpython_dir: pathlib.Path) -> str: def git_commit( name: str, lib_path: pathlib.Path | None, - test_path: pathlib.Path | None, + test_paths: list[pathlib.Path] | pathlib.Path | None, cpython_dir: pathlib.Path, verbose: bool = True, ) -> bool: @@ -215,7 +216,7 @@ def git_commit( Args: name: Module name (e.g., "dataclasses") lib_path: Path to library file/directory (or None) - test_path: Path to test file/directory (or None) + test_paths: Path(s) to test file/directory (or None) cpython_dir: Path to cpython directory verbose: Print progress messages @@ -224,12 +225,19 @@ def git_commit( """ import subprocess + # Normalize test_paths to list + if test_paths is None: + test_paths = [] + elif isinstance(test_paths, pathlib.Path): + test_paths = [test_paths] + # Stage changes paths_to_add = [] if lib_path and lib_path.exists(): paths_to_add.append(str(lib_path)) - if test_path and test_path.exists(): - paths_to_add.append(str(test_path)) + for test_path in test_paths: + if test_path and test_path.exists(): + paths_to_add.append(str(test_path)) if not paths_to_add: return False @@ -301,6 +309,12 @@ def _expand_shortcut(path: pathlib.Path) -> pathlib.Path: if resolved.exists(): return resolved + # Extension module shortcut: winreg -> cpython/Lib/test/test_winreg + # For C/Rust extension modules that have no Python source but have tests + resolved = resolve_module_path(f"test/test_{name}", "cpython", prefer="dir") + if resolved.exists(): + return resolved + # Return original (will likely fail later with a clear error) return path @@ -374,19 +388,42 @@ def main(argv: list[str] | None = None) -> int: copy_lib(src_path) - # Convert to test path - src_path = lib_to_test_path(original_src) - if not src_path.exists(): - print(f"Warning: Test path does not exist: {src_path}") - # Skip test processing, but continue with commit - src_path = None - - if src_path is not None: + # Get all test paths from DEPENDENCIES (or fall back to default) + module_name = get_module_name(original_src) + cpython_dir = get_cpython_dir(original_src) + test_src_paths = get_test_paths(module_name, str(cpython_dir)) + + # Fall back to default test path if DEPENDENCIES has no entry + if not test_src_paths: + default_test = lib_to_test_path(original_src) + if default_test.exists(): + test_src_paths = (default_test,) + + # Process all test paths + test_paths_for_commit = [] + for test_src in test_src_paths: + if not test_src.exists(): + print(f"Warning: Test path does not exist: {test_src}") + continue + + test_lib_path = parse_lib_path(test_src) + test_paths_for_commit.append(test_lib_path) + + quick( + test_src, + no_migrate=not args.migrate, + no_auto_mark=not args.auto_mark, + mark_failure=args.mark_failure, + skip_build=not args.build, + ) + + test_paths = test_paths_for_commit + else: + # It's a test path - process single test test_path = ( parse_lib_path(src_path) if not is_lib_path(src_path) else src_path ) - # Process the test path quick( src_path, no_migrate=not args.migrate, @@ -394,12 +431,13 @@ def main(argv: list[str] | None = None) -> int: mark_failure=args.mark_failure, skip_build=not args.build, ) + test_paths = [test_path] # Step 3: Git commit if args.commit: cpython_dir = get_cpython_dir(original_src) git_commit( - get_module_name(original_src), lib_file_path, test_path, cpython_dir + get_module_name(original_src), lib_file_path, test_paths, cpython_dir ) return 0 diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py index a0b147f0103..fb9ea1089e9 100644 --- a/scripts/update_lib/show_deps.py +++ b/scripts/update_lib/show_deps.py @@ -16,7 +16,7 @@ sys.path.insert(0, str(pathlib.Path(__file__).parent.parent)) -def get_all_modules(cpython_prefix: str = "cpython") -> list[str]: +def get_all_modules(cpython_prefix: str) -> list[str]: """Get all top-level module names from cpython/Lib/. Includes private modules (_*) that are not hard_deps of other modules. @@ -46,12 +46,9 @@ def get_all_modules(cpython_prefix: str = "cpython") -> list[str]: else: continue - # Skip private modules that are hard_deps of other modules - # e.g., _pydatetime is a hard_dep of datetime, so skip it - if ( - name.startswith("_") - and resolve_hard_dep_parent(name, cpython_prefix) is not None - ): + # Skip modules that are hard_deps of other modules + # e.g., _pydatetime is a hard_dep of datetime, pydoc_data is a hard_dep of pydoc + if resolve_hard_dep_parent(name, cpython_prefix) is not None: continue modules.add(name) @@ -66,6 +63,7 @@ def format_deps_tree( *, name: str | None = None, soft_deps: set[str] | None = None, + hard_deps: set[str] | None = None, _depth: int = 0, _visited: set[str] | None = None, _indent: str = "", @@ -78,6 +76,7 @@ def format_deps_tree( max_depth: Maximum recursion depth name: Module name (used to compute deps if soft_deps not provided) soft_deps: Pre-computed soft dependencies (optional) + hard_deps: Hard dependencies to show under the module (root level only) _depth: Current depth (internal) _visited: Already visited modules (internal) _indent: Current indentation (internal) @@ -103,7 +102,7 @@ def format_deps_tree( soft_deps = sorted(soft_deps) - if not soft_deps: + if not soft_deps and not hard_deps: return lines # Separate up-to-date and outdated modules @@ -136,6 +135,14 @@ def format_deps_tree( lines.append(f"{_indent}- [ ] {dep}{native_suffix}") _visited.add(dep) + # Show hard_deps under this module (only at root level, i.e., when hard_deps is provided) + if hard_deps and dep in soft_deps: + for hd in sorted(hard_deps): + hd_up_to_date = is_up_to_date(hd, cpython_prefix, lib_prefix) + hd_marker = "[x]" if hd_up_to_date else "[ ]" + lines.append(f"{_indent} - {hd_marker} {hd}") + hard_deps = None # Only show once + # Recurse if within depth limit if _depth < max_depth - 1: lines.extend( @@ -163,8 +170,8 @@ def format_deps_tree( def format_deps( name: str, - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, max_depth: int = 10, _visited: set[str] | None = None, ) -> list[str]: @@ -200,7 +207,7 @@ def format_deps( name = module_name # Resolve hard_dep to parent module (e.g., pydoc_data -> pydoc) - parent = resolve_hard_dep_parent(name) + parent = resolve_hard_dep_parent(name, cpython_prefix) if parent: lines.append(f"(redirecting {name} -> {parent})") name = parent @@ -222,16 +229,30 @@ def format_deps( lines.append(f"(module '{name}' not found)") return lines - # hard_deps (from DEPENDENCIES table) + # Collect all hard_deps (explicit from DEPENDENCIES + implicit from lib_paths) dep_info = DEPENDENCIES.get(name, {}) - hard_deps = dep_info.get("hard_deps", []) - if hard_deps: - lines.append(f"packages: {hard_deps}") + explicit_hard_deps = dep_info.get("hard_deps", []) + + # Get implicit hard_deps from lib_paths (e.g., _pydecimal.py for decimal) + all_hard_deps = set() + for hd in explicit_hard_deps: + # Remove .py extension if present + all_hard_deps.add(hd[:-3] if hd.endswith(".py") else hd) + + for p in existing_lib_paths: + dep_name = p.stem if p.is_file() else p.name + if dep_name != name: # Skip the main module itself + all_hard_deps.add(dep_name) lines.append("\ndependencies:") lines.extend( format_deps_tree( - cpython_prefix, lib_prefix, max_depth, soft_deps={name}, _visited=_visited + cpython_prefix, + lib_prefix, + max_depth, + soft_deps={name}, + _visited=_visited, + hard_deps=all_hard_deps, ) ) @@ -244,8 +265,8 @@ def format_deps( def _format_dependent_tests_tree( tree: dict, - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, indent: str = "", ) -> list[str]: """Format dependent tests tree for display.""" @@ -314,14 +335,18 @@ def _resolve_module_name( get_lib_paths, get_test_paths, resolve_hard_dep_parent, + resolve_test_to_lib, ) - # Resolve test_ prefix + # Resolve test to library group (e.g., test_urllib2 -> urllib) if name.startswith("test_"): + lib_group = resolve_test_to_lib(name) + if lib_group: + return [lib_group] name = name[5:] # Resolve hard_dep to parent - parent = resolve_hard_dep_parent(name) + parent = resolve_hard_dep_parent(name, cpython_prefix) if parent: return [parent] @@ -350,8 +375,8 @@ def _resolve_module_name( def show_deps( names: list[str], - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, max_depth: int = 10, ) -> None: """Show all dependency information for modules.""" diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 19198e4a472..352454ee4e9 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -15,8 +15,8 @@ def compute_todo_list( - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, include_done: bool = False, ) -> list[dict]: """Compute prioritized list of modules to update. @@ -32,7 +32,12 @@ def compute_todo_list( Returns: List of dicts with module info, sorted by priority """ - from update_lib.deps import get_rust_deps, get_soft_deps, is_up_to_date + from update_lib.deps import ( + get_all_hard_deps, + get_rust_deps, + get_soft_deps, + is_up_to_date, + ) from update_lib.show_deps import get_all_modules all_modules = get_all_modules(cpython_prefix) @@ -44,11 +49,18 @@ def compute_todo_list( native_deps = get_rust_deps(name, cpython_prefix) up_to_date = is_up_to_date(name, cpython_prefix, lib_prefix) + # Get hard_deps and check their status + hard_deps = get_all_hard_deps(name, cpython_prefix) + hard_deps_status = { + hd: is_up_to_date(hd, cpython_prefix, lib_prefix) for hd in hard_deps + } + module_data[name] = { "name": name, "soft_deps": soft_deps, "native_deps": native_deps, "up_to_date": up_to_date, + "hard_deps_status": hard_deps_status, } # Build reverse dependency map: who depends on this module @@ -61,8 +73,11 @@ def compute_todo_list( # Compute scores and filter result = [] for name, data in module_data.items(): - # Skip already up-to-date modules (unless --done) - if data["up_to_date"] and not include_done: + hard_deps_status = data["hard_deps_status"] + has_outdated_hard_deps = any(not ok for ok in hard_deps_status.values()) + + # Include if: not up-to-date, or has outdated hard_deps, or --done + if data["up_to_date"] and not has_outdated_hard_deps and not include_done: continue soft_deps = data["soft_deps"] @@ -90,6 +105,7 @@ def compute_todo_list( "native_deps": data["native_deps"], "soft_deps": soft_deps, "up_to_date": data["up_to_date"], + "hard_deps_status": hard_deps_status, } ) @@ -108,7 +124,7 @@ def compute_todo_list( return result -def get_all_tests(cpython_prefix: str = "cpython") -> list[str]: +def get_all_tests(cpython_prefix: str) -> list[str]: """Get all test module names from cpython/Lib/test/. Returns: @@ -136,8 +152,8 @@ def get_all_tests(cpython_prefix: str = "cpython") -> list[str]: def get_untracked_files( - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, ) -> list[str]: """Get files that exist in cpython/Lib but not in our Lib. @@ -198,6 +214,52 @@ def get_untracked_files( return sorted(untracked) +def get_original_files( + cpython_prefix: str, + lib_prefix: str, +) -> list[str]: + """Get top-level files/modules that exist in our Lib but not in cpython/Lib. + + These are RustPython-original files that don't come from CPython. + Modules that exist in cpython are handled by the library todo (even if + they have additional local files), so they are excluded here. + Excludes test/ directory (handled separately). + + Returns: + Sorted list of top-level names (e.g., ["_dummy_thread.py"]) + """ + cpython_lib = pathlib.Path(cpython_prefix) / "Lib" + local_lib = pathlib.Path(lib_prefix) + + if not local_lib.exists(): + return [] + + original = [] + + # Only check top-level entries + for entry in local_lib.iterdir(): + name = entry.name + + # Skip hidden files and __pycache__ + if name.startswith(".") or name == "__pycache__": + continue + + # Skip test/ directory (handled separately) + if name == "test": + continue + + # Skip site-packages (not a module) + if name == "site-packages": + continue + + # Only include if it doesn't exist in cpython at all + cpython_entry = cpython_lib / name + if not cpython_entry.exists(): + original.append(name) + + return sorted(original) + + def _filter_rustpython_todo(content: str) -> str: """Remove lines containing 'TODO: RUSTPYTHON' from content.""" lines = content.splitlines(keepends=True) @@ -246,7 +308,7 @@ def _compare_dir_ignoring_todo( return True -def count_test_todos(test_name: str, lib_prefix: str = "Lib") -> int: +def count_test_todos(test_name: str, lib_prefix: str) -> int: """Count TODO: RUSTPYTHON lines in a test file/directory.""" local_dir = pathlib.Path(lib_prefix) / "test" / test_name local_file = pathlib.Path(lib_prefix) / "test" / f"{test_name}.py" @@ -276,9 +338,7 @@ def count_test_todos(test_name: str, lib_prefix: str = "Lib") -> int: return total -def is_test_tracked( - test_name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" -) -> bool: +def is_test_tracked(test_name: str, cpython_prefix: str, lib_prefix: str) -> bool: """Check if a test exists in our local Lib/test.""" cpython_dir = pathlib.Path(cpython_prefix) / "Lib" / "test" / test_name cpython_file = pathlib.Path(cpython_prefix) / "Lib" / "test" / f"{test_name}.py" @@ -294,9 +354,7 @@ def is_test_tracked( return local_path.exists() -def is_test_up_to_date( - test_name: str, cpython_prefix: str = "cpython", lib_prefix: str = "Lib" -) -> bool: +def is_test_up_to_date(test_name: str, cpython_prefix: str, lib_prefix: str) -> bool: """Check if a test is up-to-date by comparing files. Ignores lines containing 'TODO: RUSTPYTHON' in local files. @@ -324,7 +382,7 @@ def is_test_up_to_date( def _build_test_to_lib_map( - cpython_prefix: str = "cpython", + cpython_prefix: str, ) -> tuple[dict[str, str], dict[str, list[str]]]: """Build reverse mapping from test name to library name using DEPENDENCIES. @@ -344,7 +402,7 @@ def _build_test_to_lib_map( continue lib_test_order[lib_name] = [] for test_path in dep_info["test"]: - # test_path is like "test/test_htmlparser.py" or "test/test_multiprocessing_fork" + # test_path is like "test_htmlparser.py" or "test_multiprocessing_fork" path = pathlib.Path(test_path) if path.suffix == ".py": test_name = path.stem @@ -357,8 +415,8 @@ def _build_test_to_lib_map( def compute_test_todo_list( - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, include_done: bool = False, lib_status: dict[str, bool] | None = None, ) -> list[dict]: @@ -515,12 +573,25 @@ def format_todo_list( rev_str = f"{rev_count} dependents" if rev_count else "" - parts = ["-", done_mark, f"[{score_str}]", name] + parts = ["-", done_mark, f"[{score_str}]", f"`{name}`"] if rev_str: parts.append(f"({rev_str})") lines.append(" ".join(parts)) + # Show hard_deps: + # - Normal mode: only show if lib is up-to-date but hard_deps are not + # - Verbose mode: always show all hard_deps with their status + hard_deps_status = item.get("hard_deps_status", {}) + if verbose and hard_deps_status: + for hd in sorted(hard_deps_status.keys()): + hd_mark = "[x]" if hard_deps_status[hd] else "[ ]" + lines.append(f" - {hd_mark} {hd} (hard_dep)") + elif item["up_to_date"]: + for hd, ok in sorted(hard_deps_status.items()): + if not ok: + lines.append(f" - [ ] {hd} (hard_dep)") + # Show corresponding tests if exist if test_by_lib and name in test_by_lib: for test_info in test_by_lib[name]: @@ -541,8 +612,8 @@ def format_todo_list( def format_all_todo( - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, limit: int | None = None, include_done: bool = False, verbose: bool = False, @@ -620,12 +691,23 @@ def format_all_todo( if limit and len(untracked) > limit: lines.append(f" ... and {len(untracked) - limit} more") + # Format original files (in our Lib but not in cpython) + original = get_original_files(cpython_prefix, lib_prefix) + if original: + lines.append("") + lines.append("## Original Files") + display_original = original[:limit] if limit else original + for path in display_original: + lines.append(f"- {path}") + if limit and len(original) > limit: + lines.append(f" ... and {len(original) - limit} more") + return lines def show_todo( - cpython_prefix: str = "cpython", - lib_prefix: str = "Lib", + cpython_prefix: str, + lib_prefix: str, limit: int | None = None, include_done: bool = False, verbose: bool = False, diff --git a/scripts/update_lib/tests/test_deps.py b/scripts/update_lib/tests/test_deps.py index 4c206192963..d97af2867aa 100644 --- a/scripts/update_lib/tests/test_deps.py +++ b/scripts/update_lib/tests/test_deps.py @@ -70,13 +70,13 @@ def test_auto_detect_py_module(self): tmpdir = pathlib.Path(tmpdir) lib_dir = tmpdir / "Lib" lib_dir.mkdir() - (lib_dir / "datetime.py").write_text("# datetime") - (lib_dir / "_pydatetime.py").write_text("# _pydatetime") + (lib_dir / "mymodule.py").write_text("# mymodule") + (lib_dir / "_pymymodule.py").write_text("# _pymymodule") - paths = get_lib_paths("datetime", str(tmpdir)) + paths = get_lib_paths("mymodule", str(tmpdir)) self.assertEqual(len(paths), 2) - self.assertIn(tmpdir / "Lib" / "datetime.py", paths) - self.assertIn(tmpdir / "Lib" / "_pydatetime.py", paths) + self.assertIn(tmpdir / "Lib" / "mymodule.py", paths) + self.assertIn(tmpdir / "Lib" / "_pymymodule.py", paths) def test_default_file(self): """Test default to .py file."""