""" Module for extracting documentation from Python objects using pydoc and inspect. """ import pydoc import inspect from typing import Optional, Dict, Any class DocExtractor: """ Extracts documentation from Python objects. Supports: - Modules - Classes - Functions - Methods - Builtins - Any object accessible through pydoc """ @staticmethod def extract_doc(object_name: str) -> Dict[str, Any]: """ Extract documentation for a Python object. Args: object_name: Dot-separated path to the object (e.g., 'dict.update', 'os.path', 'builtins.BaseException') Returns: Dictionary containing: - 'original': Original English documentation - 'object_name': Name of the object - 'object_type': Type of object (module, class, function, etc.) - 'signature': Function/method signature if applicable - 'error': Error message if extraction failed """ try: obj = None resolved_name = object_name # For builtins, resolve directly first (pydoc.resolve can be unreliable) if object_name.startswith('builtins.'): try: import builtins name = object_name.replace('builtins.', '', 1) if hasattr(builtins, name): obj = getattr(builtins, name) # Verify we got the right object obj_name = getattr(obj, '__name__', None) if obj_name == name: resolved_name = object_name else: obj = None # Wrong object, try other methods except Exception: pass # If not a builtin or builtin resolution failed, try direct import first # This is more reliable than pydoc.resolve for standard library modules if obj is None: try: parts = object_name.split('.') if len(parts) == 1: # Simple module name (e.g., 'asyncio') obj = __import__(object_name) # Verify it's actually a module if not inspect.ismodule(obj): obj = None elif len(parts) > 1: # Dotted name (e.g., 'os.path', 'collections.abc') module_name = '.'.join(parts[:-1]) attr_name = parts[-1] module = __import__(module_name, fromlist=[attr_name]) obj = getattr(module, attr_name) resolved_name = object_name except Exception: pass # If direct import failed, try pydoc.resolve as fallback if obj is None: try: resolved_obj = pydoc.resolve(object_name) # Verify the resolved object is correct obj = resolved_obj except (ImportError, AttributeError, ValueError) as e: pass if obj is None: raise ValueError(f"Could not resolve object: {object_name}") # Verify we got the right object by checking its name and type # This helps catch cases where pydoc.resolve returns wrong object try: parts = object_name.split('.') expected_name = parts[-1] actual_name = getattr(obj, '__name__', None) or getattr(obj, '__qualname__', None) # For modules, check module name if inspect.ismodule(obj): module_name = getattr(obj, '__name__', '') if module_name != object_name and not module_name.endswith('.' + object_name): # Wrong module - try direct import try: correct_obj = __import__(object_name) if inspect.ismodule(correct_obj) and getattr(correct_obj, '__name__', '') == object_name: obj = correct_obj except Exception: pass # For non-modules, verify the name matches elif actual_name and actual_name != expected_name: # Object name doesn't match - try to get it more directly if len(parts) == 2 and parts[0] == 'builtins': import builtins if hasattr(builtins, parts[1]): new_obj = getattr(builtins, parts[1]) new_name = getattr(new_obj, '__name__', None) if new_name == expected_name: obj = new_obj elif len(parts) > 1: # Try direct import for standard library try: module_name = '.'.join(parts[:-1]) attr_name = parts[-1] module = __import__(module_name, fromlist=[attr_name]) new_obj = getattr(module, attr_name) new_name = getattr(new_obj, '__name__', None) or getattr(new_obj, '__qualname__', None) if new_name == expected_name or new_name == attr_name: obj = new_obj except Exception: pass except Exception: pass # Continue even if verification fails # Get the docstring docstring = inspect.getdoc(obj) or pydoc.getdoc(obj) or "" # Additional verification: check if docstring matches tuple (common wrong result) # This catches cases where pydoc.resolve returns tuple instead of the requested object if docstring and "Built-in immutable sequence" in docstring and "tuple" in docstring.lower(): # This looks like tuple documentation - verify we didn't request tuple if object_name.lower() != 'tuple' and not object_name.lower().endswith('.tuple'): # We got tuple docs but didn't ask for tuple - this is wrong! # Try to get the correct object try: parts = object_name.split('.') if len(parts) == 1: # Simple module - try direct import correct_obj = __import__(object_name) if inspect.ismodule(correct_obj): correct_doc = inspect.getdoc(correct_obj) or pydoc.getdoc(correct_obj) or "" # If the correct doc doesn't mention tuple, use it if "tuple" not in correct_doc.lower() or "Built-in immutable sequence" not in correct_doc: obj = correct_obj docstring = correct_doc elif len(parts) > 1: # Dotted name - try direct import module_name = '.'.join(parts[:-1]) attr_name = parts[-1] module = __import__(module_name, fromlist=[attr_name]) correct_obj = getattr(module, attr_name) correct_doc = inspect.getdoc(correct_obj) or pydoc.getdoc(correct_obj) or "" # If the correct doc doesn't mention tuple, use it if "tuple" not in correct_doc.lower() or "Built-in immutable sequence" not in correct_doc: obj = correct_obj docstring = correct_doc except Exception: pass # If correction fails, continue with what we have # Determine object type if inspect.ismodule(obj): obj_type = "module" elif inspect.isclass(obj): obj_type = "class" elif inspect.isfunction(obj) or inspect.ismethod(obj): obj_type = "function" else: obj_type = "object" # Get signature if it's a callable signature = None if inspect.isclass(obj) or inspect.isfunction(obj) or inspect.ismethod(obj): try: sig = inspect.signature(obj) signature = str(sig) except (ValueError, TypeError): pass # If docstring is empty, try to get help text if not docstring: try: help_text = pydoc.render_doc(obj, renderer=pydoc.plaintext) # Extract just the docstring part (first paragraph after object name) lines = help_text.split('\n') # Skip empty lines and find the actual docstring start_idx = 0 for i, line in enumerate(lines): if line.strip() and not line.strip().startswith(object_name.split('.')[-1]): start_idx = i break docstring = '\n'.join(lines[start_idx:]).strip() except Exception: pass # Final fallback: use help() output if not docstring: try: import io import sys help_output = io.StringIO() sys.stdout = help_output help(obj) sys.stdout = sys.__stdout__ help_text = help_output.getvalue() # Extract meaningful parts lines = help_text.split('\n') docstring = '\n'.join([l for l in lines if l.strip() and not l.strip().startswith('Help on')])[:500] except Exception: pass return { 'original': docstring, 'object_name': resolved_name, # Use resolved name, not original 'object_type': obj_type, 'signature': signature, 'error': None } except Exception as e: import traceback error_msg = str(e) # Don't expose full traceback to user, but log it print(f"Error extracting doc for {object_name}: {error_msg}") print(traceback.format_exc()) return { 'original': None, 'object_name': object_name, 'object_type': None, 'signature': None, 'error': f"Could not extract documentation: {error_msg}" }