diff --git a/.gitignore b/.gitignore index f36439a..637d7d2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ dist/ codegraph.egg-info/ codegraph/__pycache__/ -*/__pycache__/ \ No newline at end of file +*/__pycache__/ +.vscode +.conda \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index e6bcd74..1819133 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,93 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.5.0] - 2026-01-28 + +### Added + +**Focus Mode** +- New "Focus Mode" checkbox in Highlight Settings panel +- When enabled, hides all background (non-highlighted) nodes and links +- Shows entire dependency chain regardless of display filters +- Improves visibility when analyzing specific code paths + +**Navigation History** +- Added back/forward navigation buttons with browser-style controls +- Keyboard shortcuts: Alt+Left (back), Alt+Right (forward) +- Tracks entity clicks for easy navigation through code exploration +- Button states update based on history availability + +**External Import Nodes** +- External imports (e.g., `os`, `sys`, `numpy`) now appear as nodes in the graph +- Distinguished with special styling and external type +- Helps visualize all project dependencies including standard library and third-party packages + +### Changed + +**Arrow Direction and Visualization** +- Reversed arrow directions to better represent code flow +- Module imports: arrows now point FROM importer TO imported module +- Entity dependencies: arrows point FROM entity TO its dependencies +- Improved arrow scaling: scales consistently with zoom level and icon size +- Unified arrow head sizes and stroke widths across all link types +- Dynamic arrow scaling in highlight mode with configurable maximum (18.75x scale factor) + +**Keyword Filtering Enhancements** +- Keyword search now filters at entity level in addition to module level +- Shows full dependency chains when entities match keywords +- Handles import aliases correctly (e.g., `import pandas as pd`) +- Improves search accuracy for finding specific functions/classes + +**UI Defaults and Behavior** +- Default font size increased to 256 for better readability +- Display panel now expanded by default +- Classes and Functions checkboxes unchecked by default (show only modules) +- Focus mode properly dims labels and icons of non-highlighted nodes +- Improved link opacity handling in highlight and focus modes + +### Fixed + +**Display Filter Issues** +- Fixed Classes/Functions nodes appearing in highlight mode when filters disabled +- Corrected entity visibility logic to respect display panel settings +- Entity links now properly hidden when corresponding filters are off + +**Import Handling** +- Fixed import statement extraction from AST +- Corrected handling of multi-line imports and comma-separated imports +- Fixed alias imports (e.g., `import x as y`) appearing in tooltips + +**Highlight Mode** +- Fixed entity-to-entity click highlighting for dependency relationships +- Corrected link dimming in focus mode +- Fixed label and icon dimming to match node highlight state + +**Arrow Scaling Consistency** +- Fixed inconsistent arrow sizes at different zoom levels +- Unified base stroke widths (2px for all link types) +- Arrows now scale proportionally with icons in highlight mode +- Applied proper clamping to prevent excessive arrow sizes + +## [1.4.0] - 2026-01-26 + +### Fixed + +**Duplicate Filename Support** +- Fixed critical issue where files with the same basename in different directories would collide +- Now uses full relative paths as unique identifiers instead of just filenames +- Module node IDs are now path-based (e.g., `src/utils`, `tests/utils`) without `.py` extension +- Entity node IDs use format `path/module:entity_name` (e.g., `src/utils:helper`) +- Module labels still display basenames for readability, with full paths in tooltips +- Search box now shows file paths for modules to distinguish duplicates +- CSV export shows basenames with `.py` extension for backward compatibility +- Added comprehensive test suite for duplicate filename scenarios + +### Changed + +- Internal node identifiers now use relative paths for uniqueness +- Module identification system refactored throughout core, parser, and visualization layers +- Import resolution improved to correctly match dependencies with path-based identifiers + ## [1.2.0] - 2026-01-18 ### Added diff --git a/codegraph/__init__.py b/codegraph/__init__.py index c68196d..5b60188 100644 --- a/codegraph/__init__.py +++ b/codegraph/__init__.py @@ -1 +1 @@ -__version__ = "1.2.0" +__version__ = "1.5.0" diff --git a/codegraph/core.py b/codegraph/core.py index 64ccd33..83e941d 100644 --- a/codegraph/core.py +++ b/codegraph/core.py @@ -2,7 +2,7 @@ import os from argparse import Namespace from collections import defaultdict, deque -from typing import Dict, List, Set, Text, Tuple +from typing import Dict, List, Optional, Set, Text, Tuple from codegraph.parser import Import, create_objects_array from codegraph.utils import get_python_paths_list @@ -13,35 +13,259 @@ def read_file_content(path: Text) -> Text: - with open(path, "r+") as file_read: - return file_read.read() + try: + with open(path, "r", encoding="utf-8") as file_read: + return file_read.read() + except UnicodeDecodeError: + # Try with latin-1 as fallback, which accepts all byte values + with open(path, "r", encoding="latin-1") as file_read: + return file_read.read() -def parse_code_file(path: Text) -> List: +def parse_code_file(path: Text, base_paths: Optional[List] = None) -> List: """read module source and parse to get objects array""" source = read_file_content(path) - parsed_module = create_objects_array(source=source, fname=os.path.basename(path)) + # Pass full path to parser to support duplicate filenames + parsed_module = create_objects_array(source=source, fname=path, base_paths=base_paths or []) return parsed_module -def get_code_objects(paths_list: List) -> Dict: +def get_code_objects(paths_list: List, base_paths: Optional[List] = None) -> Dict: """ get all code files data for paths list :param paths_list: list with paths to code files to parse + :param base_paths: list of base paths for calculating relative paths :return: """ all_data = {} for path in paths_list: - content = parse_code_file(path) + content = parse_code_file(path, base_paths) all_data[path] = content return all_data class CodeGraph: def __init__(self, args: Namespace): - self.paths_list = get_python_paths_list(args.paths) + self.base_paths = [os.path.abspath(p) for p in args.paths] + self.paths_list = get_python_paths_list(args.paths, max_depth=args.depth) + + # Filter by keyword if provided + if args.keyword: + self.paths_list = self._filter_by_keyword(self.paths_list, args.keyword) + if not self.paths_list: + print(f"Warning: No files found containing keyword '{args.keyword}'") + # get py modules list data - self.modules_data = get_code_objects(self.paths_list) + self.modules_data = get_code_objects(self.paths_list, self.base_paths) + + # Store raw imports before they get popped by get_imports_and_entities_lines + self.raw_imports = {} + for module_path, parsed_objects in self.modules_data.items(): + for obj in parsed_objects: + if isinstance(obj, Import): + self.raw_imports[module_path] = list(obj.modules) + break + + # Apply entity-level filtering if keyword is specified + if args.keyword: + self._filter_entities_by_keyword(args.keyword) + + def _filter_entities_by_keyword(self, keyword: str): + """Filter entities within modules to only include those that use the keyword. + + Handles import aliases like 'import h13shotgrid as shotgrid' by tracking the alias name. + Also includes entities that call other entities using the keyword (dependency chain). + """ + keyword_lower = keyword.lower() + + for module_path, parsed_objects in self.modules_data.items(): + # Extract import statements and build alias mapping + import_aliases = {} # Maps alias/module name to original module name + for obj in parsed_objects: + if isinstance(obj, Import): + for module_import in obj.modules: + # Handle "import module as alias" + if " as " in module_import: + original, alias = module_import.split(" as ") + original = original.strip() + alias = alias.strip() + # Check if original matches keyword + if keyword_lower in original.lower(): + import_aliases[alias.lower()] = original + import_aliases[original.lower()] = original + else: + # Handle "import module" or "from X import Y" + parts = module_import.split(".") + base_module = parts[0] + if keyword_lower in base_module.lower(): + import_aliases[base_module.lower()] = base_module + + # If no matching imports found, skip filtering for this module + if not import_aliases: + continue + + # Read file content to check entity usage + try: + content = read_file_content(module_path) + content_lower = content.lower() + except Exception: + continue + + # First pass: identify entities that directly use the keyword + entities_using_keyword = set() + entity_code_map = {} # Map entity name to its code + entity_objects = [] # Non-import objects + + for obj in parsed_objects: + if isinstance(obj, Import): + continue + + entity_objects.append(obj) + entity_name = obj.name + + if hasattr(obj, 'lineno') and hasattr(obj, 'endno') and obj.lineno and obj.endno: + # Extract entity's code section + lines = content.split('\n') + entity_code = '\n'.join(lines[obj.lineno - 1:obj.endno]) + entity_code_map[entity_name] = entity_code + entity_code_lower = entity_code.lower() + + # Check if entity uses any of the import aliases + for alias in import_aliases.keys(): + if alias in entity_code_lower: + entities_using_keyword.add(entity_name) + break + + # Second pass: find entities that call entities using the keyword (dependency chain) + # Keep iterating until no new entities are found + entities_to_keep = set(entities_using_keyword) + changed = True + max_iterations = 10 # Prevent infinite loops + iteration = 0 + + while changed and iteration < max_iterations: + changed = False + iteration += 1 + + for obj in entity_objects: + entity_name = obj.name + if entity_name in entities_to_keep: + continue + + if entity_name in entity_code_map: + entity_code_lower = entity_code_map[entity_name].lower() + + # Check if this entity calls any entity that's already in the keep set + for kept_entity in entities_to_keep: + # Look for function/method calls: kept_entity( or kept_entity. + if f'{kept_entity.lower()}(' in entity_code_lower or f'{kept_entity.lower()}.' in entity_code_lower: + entities_to_keep.add(entity_name) + changed = True + break + + # Build filtered objects list + filtered_objects = [] + for obj in parsed_objects: + if isinstance(obj, Import): + # Always keep Import objects + filtered_objects.append(obj) + elif hasattr(obj, 'name') and obj.name in entities_to_keep: + filtered_objects.append(obj) + + # Update modules_data with filtered entities + self.modules_data[module_path] = filtered_objects + + def _filter_by_keyword(self, paths_list: list, keyword: str) -> list: + """Filter paths to only include files containing the keyword or their direct dependencies.""" + keyword_lower = keyword.lower() + matching_files = set() + + # First pass: find files containing the keyword + for path in paths_list: + # Check if keyword is in filename + if keyword_lower in os.path.basename(path).lower(): + matching_files.add(path) + continue + + # Check if keyword is in file content or imports + try: + # Parse the file to extract imports using parse_code_file + parsed_objects = parse_code_file(path, self.base_paths) + + # Check if any import contains the keyword + for obj in parsed_objects: + if isinstance(obj, Import): + for module in obj.modules: + # Extract the actual import name from "module.name" or "name" + if " as " in module: + module = module.split(" as ")[0] + parts = module.split(".") + # Check all parts of the import for the keyword + for part in parts: + if keyword_lower in part.lower(): + matching_files.add(path) + break + if path in matching_files: + break + if path in matching_files: + break + + # Also check file content for the keyword + if path not in matching_files: + content = read_file_content(path) + if keyword_lower in content.lower(): + matching_files.add(path) + except Exception as e: + # Skip files that can't be parsed/read + logger.debug(f"Failed to parse {path}: {e}") + pass + + if not matching_files: + return [] + + # Second pass: parse all files to find direct dependencies + # Parse all files to build import mapping + all_modules_data = {} + for path in paths_list: + try: + all_modules_data[path] = parse_code_file(path, self.base_paths) + except Exception: + all_modules_data[path] = [] + + # Extract imports from parsed data + all_imports = {} + for path, parsed_objects in all_modules_data.items(): + imports_list = [] + for obj in parsed_objects: + if isinstance(obj, Import): + for module in obj.modules: + # Extract base import name + if " as " in module: + module = module.split(" as ")[0] + imports_list.append(module) + all_imports[path] = imports_list + + # Build dependency graph + result_files = set(matching_files) + for match_file in matching_files: + # Add files that import the keyword (already added in first pass) + + # Find files that the matching file depends on or that depend on it + for file_path, imports in all_imports.items(): + # Check if this file imports something from match_file + match_basename = os.path.basename(match_file).replace('.py', '') + for imp in imports: + if match_basename in imp: + result_files.add(file_path) + + # Check if match_file imports something from this file + if match_file in all_imports: + file_basename = os.path.basename(file_path).replace('.py', '') + for imp in all_imports[match_file]: + if file_basename in imp: + result_files.add(file_path) + + return list(result_files) def get_lines_numbers(self): """ @@ -109,6 +333,9 @@ def usage_graph(self) -> Dict: for method_that_used in entities_usage_in_modules[module]: method_usage_lines = entities_usage_in_modules[module][method_that_used] for method_usage_line in method_usage_lines: + # Skip if method_usage_line is None (parsing issue) + if method_usage_line is None: + continue for entity in entities_lines[module]: if entity[0] <= method_usage_line <= entity[1]: dependencies[module][entities_lines[module][entity]].append( @@ -158,9 +385,25 @@ def get_dependencies(self, file_path: str, distance: int) -> Dict[str, Set[str]] return dependencies -def get_module_name(code_path: Text) -> Text: - module_name = os.path.basename(code_path).replace(".py", "") - return module_name +def get_module_name(code_path: Text, base_paths: Optional[List] = None) -> Text: + """Get a unique module identifier using relative path. + + :param code_path: Full path to the module + :param base_paths: List of base paths for calculating relative path + :return: Relative path without .py extension (e.g., 'src/utils' or 'tests/utils') + """ + from codegraph.utils import get_relative_path + + if base_paths: + rel_path = get_relative_path(code_path, base_paths) + else: + rel_path = os.path.basename(code_path) + + # Remove .py extension + if rel_path.endswith('.py'): + rel_path = rel_path[:-3] + + return rel_path def module_name_in_imports(imports: List, module_name: Text) -> bool: @@ -182,11 +425,21 @@ def get_imports_and_entities_lines( # noqa: C901 imports = defaultdict(list) modules_ = code_objects.keys() names_map = {} - # Build a set of all module names for quick lookup - module_names_set = {os.path.basename(m).replace(".py", "") for m in modules_} + + # Get base paths from any parsed object's file attribute + # This is a bit of a workaround - ideally we'd pass base_paths as a parameter + base_paths = [] + for path in modules_: + parent_dir = os.path.dirname(path) + if parent_dir and parent_dir not in base_paths: + # Add parent directories as potential base paths + base_paths.append(parent_dir) + + # Build a set of all module names for quick lookup (now using relative paths) + module_names_set = {get_module_name(m, base_paths) for m in modules_} for path in code_objects: - names_map[get_module_name(path)] = path + names_map[get_module_name(path, base_paths)] = path # for each module in list if code_objects[path] and isinstance(code_objects[path][-1], Import): # extract imports if exist @@ -233,7 +486,9 @@ def get_imports_and_entities_lines( # noqa: C901 for entity in code_objects[path]: # create a dict with lines of start and end for each entity in module - entities_lines[path][(entity.lineno, entity.endno)] = entity.name + # Skip entities with None line numbers to prevent TypeError + if entity.lineno is not None and entity.endno is not None: + entities_lines[path][(entity.lineno, entity.endno)] = entity.name return entities_lines, imports, names_map diff --git a/codegraph/main.py b/codegraph/main.py index 93f8a91..4523a6b 100644 --- a/codegraph/main.py +++ b/codegraph/main.py @@ -40,7 +40,18 @@ type=click.Path(), help="Export graph data to CSV file (specify output path)", ) -def cli(paths, object_only, file_path, distance, matplotlib, output, csv): +@click.option( + "--depth", + type=int, + required=True, + help="Maximum subfolder depth to scan (1=original/sub1, 2=original/sub1/sub2, etc.)", +) +@click.option( + "--keyword", + type=str, + help="Filter to only show modules related to this keyword (reduces graph size for large codebases)", +) +def cli(paths, object_only, file_path, distance, matplotlib, output, csv, depth, keyword): """ Tool that creates a graph of code to show dependencies between code entities (methods, classes, etc.). CodeGraph does not execute code, it is based only on lex and syntax parsing. @@ -62,6 +73,8 @@ def cli(paths, object_only, file_path, distance, matplotlib, output, csv): matplotlib=matplotlib, output=output, csv=csv, + depth=depth, + keyword=keyword, ) main(args) @@ -70,6 +83,8 @@ def main(args): code_graph = core.CodeGraph(args) usage_graph = code_graph.usage_graph() entity_metadata = code_graph.get_entity_metadata() + base_paths = code_graph.base_paths + raw_imports = code_graph.raw_imports # Raw imports before they get popped if args.file_path and args.distance: dependencies = code_graph.get_dependencies(args.file_path, args.distance) @@ -81,14 +96,16 @@ def main(args): elif args.csv: import codegraph.vizualyzer as vz - vz.export_to_csv(usage_graph, entity_metadata=entity_metadata, output_path=args.csv) + vz.export_to_csv(usage_graph, entity_metadata=entity_metadata, + output_path=args.csv, base_paths=base_paths, raw_imports=raw_imports) else: import codegraph.vizualyzer as vz if args.matplotlib: - vz.draw_graph_matplotlib(usage_graph) + vz.draw_graph_matplotlib(usage_graph, base_paths=base_paths) else: - vz.draw_graph(usage_graph, entity_metadata=entity_metadata, output_path=args.output) + vz.draw_graph(usage_graph, entity_metadata=entity_metadata, + output_path=args.output, base_paths=base_paths, raw_imports=raw_imports) if __name__ == "__main__": diff --git a/codegraph/parser.py b/codegraph/parser.py index d58850f..060a9ec 100644 --- a/codegraph/parser.py +++ b/codegraph/parser.py @@ -88,9 +88,15 @@ def _nest_class(ob, class_name, lineno, super=None): return newclass -def create_objects_array(fname, source): # noqa: C901 +def create_objects_array(fname, source, base_paths=None): # noqa: C901 # todo: need to do optimization - """Return an object list for a particular module.""" + """Return an object list for a particular module. + + :param fname: Full path to the file being parsed + :param source: Source code content + :param base_paths: List of base paths for calculating relative paths (optional) + :return: List of parsed objects + """ tree = [] f = io.StringIO(source) diff --git a/codegraph/templates/index.html b/codegraph/templates/index.html index 16783d7..7a7cfc2 100644 --- a/codegraph/templates/index.html +++ b/codegraph/templates/index.html @@ -16,6 +16,8 @@
-