diff --git a/.github/workflows/lib-deps-check.yaml b/.github/workflows/lib-deps-check.yaml index 8749f5fc2e7..e314f4643ad 100644 --- a/.github/workflows/lib-deps-check.yaml +++ b/.github/workflows/lib-deps-check.yaml @@ -45,15 +45,20 @@ jobs: echo "Changed files:" echo "$changed" - # Extract unique module names (top-level only, skip test/) + # Extract unique module names modules="" for file in $changed; do - # Skip test files if [[ "$file" == Lib/test/* ]]; then - continue + # Test files: Lib/test/test_pydoc.py -> test_pydoc, Lib/test/test_pydoc/foo.py -> test_pydoc + module=$(echo "$file" | sed -E 's|^Lib/test/||; s|\.py$||; s|/.*||') + # Skip non-test files in test/ (e.g., support.py, __init__.py) + if [[ ! "$module" == test_* ]]; then + continue + fi + else + # Lib files: Lib/foo.py -> foo, Lib/foo/__init__.py -> foo + module=$(echo "$file" | sed -E 's|^Lib/||; s|/__init__\.py$||; s|\.py$||; s|/.*||') fi - # Extract module name: Lib/foo.py -> foo, Lib/foo/__init__.py -> foo - module=$(echo "$file" | sed -E 's|^Lib/||; s|/__init__\.py$||; s|\.py$||; s|/.*||') if [[ -n "$module" && ! " $modules " =~ " $module " ]]; then modules="$modules $module" fi @@ -94,6 +99,7 @@ jobs: with: header: lib-deps-check number: ${{ github.event.pull_request.number }} + recreate: true message: | ## 📦 Library Dependencies diff --git a/scripts/update_lib/deps.py b/scripts/update_lib/deps.py index 089cc143c78..6f59403b563 100644 --- a/scripts/update_lib/deps.py +++ b/scripts/update_lib/deps.py @@ -101,6 +101,32 @@ def clear_import_graph_caches() -> None: }, } +def resolve_hard_dep_parent(name: str) -> str | None: + """Resolve a hard_dep name to its parent module. + + If 'name' is listed as a hard_dep of another module, return that module's name. + E.g., 'pydoc_data' -> 'pydoc', '_pydatetime' -> 'datetime' + + Args: + name: Module or file name (with or without .py extension) + + Returns: + Parent module name if found, None otherwise + """ + # Normalize: remove .py extension if present + if name.endswith(".py"): + name = name[:-3] + + for module_name, dep_info in DEPENDENCIES.items(): + hard_deps = dep_info.get("hard_deps", []) + for dep in hard_deps: + # Normalize dep: remove .py extension + dep_normalized = dep[:-3] if dep.endswith(".py") else dep + if dep_normalized == name: + return module_name + return None + + # Test-specific dependencies (only when auto-detection isn't enough) # - hard_deps: files to migrate (tightly coupled, must be migrated together) # - data: directories to copy without migration @@ -254,10 +280,11 @@ def _extract_top_level_code(content: str) -> str: _FROM_TEST_IMPORT_RE = re.compile(r"^from test import (.+)", re.MULTILINE) _FROM_TEST_DOT_RE = re.compile(r"^from test\.(\w+)", re.MULTILINE) +_IMPORT_TEST_DOT_RE = re.compile(r"^import test\.(\w+)", re.MULTILINE) def parse_test_imports(content: str) -> set[str]: - """Parse test file content and extract 'from test import ...' dependencies. + """Parse test file content and extract test package dependencies. Uses regex for speed - only matches top-level imports. @@ -285,6 +312,12 @@ def parse_test_imports(content: str) -> set[str]: if dep not in ("support", "__init__"): imports.add(dep) + # Match "import test.foo" -> depends on foo + for match in _IMPORT_TEST_DOT_RE.finditer(content): + dep = match.group(1) + if dep not in ("support", "__init__"): + imports.add(dep) + return imports diff --git a/scripts/update_lib/show_deps.py b/scripts/update_lib/show_deps.py index ae23ced3ead..50d58e4592d 100644 --- a/scripts/update_lib/show_deps.py +++ b/scripts/update_lib/show_deps.py @@ -163,6 +163,7 @@ def format_deps( find_dependent_tests_tree, get_lib_paths, get_test_paths, + resolve_hard_dep_parent, ) if _visited is None: @@ -170,17 +171,34 @@ def format_deps( lines = [] + # Resolve test_ prefix to module (e.g., test_pydoc -> pydoc) + if name.startswith("test_"): + module_name = name[5:] # strip "test_" + lines.append(f"(redirecting {name} -> {module_name})") + name = module_name + + # Resolve hard_dep to parent module (e.g., pydoc_data -> pydoc) + parent = resolve_hard_dep_parent(name) + if parent: + lines.append(f"(redirecting {name} -> {parent})") + name = parent + # lib paths (only show existing) lib_paths = get_lib_paths(name, cpython_prefix) - for p in lib_paths: - if p.exists(): - lines.append(f"[+] lib: {p}") + existing_lib_paths = [p for p in lib_paths if p.exists()] + for p in existing_lib_paths: + lines.append(f"[+] lib: {p}") # test paths (only show existing) test_paths = get_test_paths(name, cpython_prefix) - for p in test_paths: - if p.exists(): - lines.append(f"[+] test: {p}") + existing_test_paths = [p for p in test_paths if p.exists()] + for p in existing_test_paths: + lines.append(f"[+] test: {p}") + + # If no lib or test paths exist, module doesn't exist + if not existing_lib_paths and not existing_test_paths: + lines.append(f"(module '{name}' not found)") + return lines # hard_deps (from DEPENDENCIES table) dep_info = DEPENDENCIES.get(name, {}) @@ -188,7 +206,7 @@ def format_deps( if hard_deps: lines.append(f"packages: {hard_deps}") - lines.append("dependencies:") + lines.append("\ndependencies:") lines.extend( format_deps_tree( cpython_prefix, lib_prefix, max_depth, soft_deps={name}, _visited=_visited @@ -227,9 +245,9 @@ def count_tests(t: dict) -> int: total = count_tests(tree) if total == 0 and not children: - lines.append(f"dependent tests: (no tests depend on {module})") + lines.append(f"\ndependent tests: (no tests depend on {module})") return lines - lines.append(f"dependent tests: ({total} tests)") + lines.append(f"\ndependent tests: ({total} tests)") # Check if module is up-to-date synced = is_up_to_date(module.split(".")[0], cpython_prefix, lib_prefix) @@ -258,6 +276,56 @@ def count_tests(t: dict) -> int: return lines +def _resolve_module_name( + name: str, + cpython_prefix: str, + lib_prefix: str, +) -> list[str]: + """Resolve module name through redirects. + + Returns a list of module names (usually 1, but test support files may expand to multiple). + """ + import pathlib + + from update_lib.deps import ( + _build_test_import_graph, + get_lib_paths, + get_test_paths, + resolve_hard_dep_parent, + ) + + # Resolve test_ prefix + if name.startswith("test_"): + name = name[5:] + + # Resolve hard_dep to parent + parent = resolve_hard_dep_parent(name) + if parent: + return [parent] + + # Check if it's a valid module + lib_paths = get_lib_paths(name, cpython_prefix) + test_paths = get_test_paths(name, cpython_prefix) + if any(p.exists() for p in lib_paths) or any(p.exists() for p in test_paths): + return [name] + + # Check for test support files (e.g., string_tests -> bytes, str, userstring) + test_support_path = pathlib.Path(cpython_prefix) / "Lib" / "test" / f"{name}.py" + if test_support_path.exists(): + test_dir = pathlib.Path(lib_prefix) / "test" + if test_dir.exists(): + import_graph, _ = _build_test_import_graph(test_dir) + importing_tests = [] + for file_key, imports in import_graph.items(): + if name in imports and file_key.startswith("test_"): + importing_tests.append(file_key) + if importing_tests: + # Resolve test names to module names (test_bytes -> bytes) + return sorted(set(t[5:] for t in importing_tests)) + + return [name] + + def show_deps( names: list[str], cpython_prefix: str = "cpython", @@ -273,10 +341,19 @@ def show_deps( else: expanded_names.append(name) + # Resolve and deduplicate names (preserving order) + seen: set[str] = set() + resolved_names: list[str] = [] + for name in expanded_names: + for resolved in _resolve_module_name(name, cpython_prefix, lib_prefix): + if resolved not in seen: + seen.add(resolved) + resolved_names.append(resolved) + # Shared visited set across all modules visited: set[str] = set() - for i, name in enumerate(expanded_names): + for i, name in enumerate(resolved_names): if i > 0: print() # blank line between modules for line in format_deps(name, cpython_prefix, lib_prefix, max_depth, visited): diff --git a/scripts/update_lib/show_todo.py b/scripts/update_lib/show_todo.py index 5cf7f90f68d..1bf9def7df0 100644 --- a/scripts/update_lib/show_todo.py +++ b/scripts/update_lib/show_todo.py @@ -143,7 +143,7 @@ def format_todo_list( rev_str = f"{rev_count} dependents" if rev_count else "" - parts = [done_mark, f"[{score_str}]", name] + parts = ["-", done_mark, f"[{score_str}]", name] if rev_str: parts.append(f"({rev_str})")