29.4 代码理解模块

18 分钟阅读

29.4.1 代码理解概述#

代码理解模块是编程 Agent 的另一个核心能力,它能够分析、解释和理解现有代码的功能、结构和设计。代码理解涉及代码解析、语义分析、依赖分析等多个环节。

代码理解流程#

输入代码 ↓ 代码解析 ↓ 结构分析 ↓ 语义分析 ↓ 依赖分析 ↓ 功能推断 ↓ 生成解释

29.4.2 代码解析#

代码解析器#

python
python class CodeParser: """代码解析器""" def __init__(self): self.parsers = { 'python': PythonParser(), 'javascript': JavaScriptParser(), 'java': JavaParser(), 'cpp': CppParser() } def parse(self, code: str, language: str) -> ParsedCode: """解析代码""" parser = self.parsers.get(language.lower()) if not parser: raise ValueError(f"Unsupported language: {language}") return parser.parse(code) class PythonParser: """Python 解析器""" def parse(self, code: str) -> ParsedCode: """解析 Python 代码""" try: tree = ast.parse(code) parsed_code = ParsedCode( language='python', original_code=code, ast=tree ) # 提取类 parsed_code.classes = self._extract_classes(tree) # 提取函数 parsed_code.functions = self._extract_functions(tree) # 提取导入 parsed_code.imports = self._extract_imports(tree) # 提取全局变量 parsed_code.global_variables = self._extract_global_variables(tree) return parsed_code except SyntaxError as e: raise ValueError(f"Invalid Python code: {e}") def _extract_classes(self, tree: ast.AST) -> List[ClassInfo]: """提取类信息""" classes = [] for node in ast.walk(tree): if isinstance(node, ast.ClassDef): class_info = ClassInfo( name=node.name, bases=[self._get_name(base) for base in node.bases], methods=[self._extract_method(m) for m in node.body if isinstance(m, ast.FunctionDef)], attributes=[self._extract_attribute(a) for a in node.body if isinstance(a, ast.Assign)], docstring=ast.get_docstring(node) ) classes.append(class_info) return classes def _extract_functions(self, tree: ast.AST) -> List[FunctionInfo]: """提取函数信息""" functions = [] for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): # 跳过类中的方法 if not any(isinstance(parent, ast.ClassDef) for parent in ast.walk(tree) if node in ast.iter_child_nodes(parent)): function_info = FunctionInfo( name=node.name, arguments=[arg.arg for arg in node.args.args], return_type=self._get_return_type(node), docstring=ast.get_docstring(node), decorators=[self._get_name(d) for d in node.decorator_list] ) functions.append(function_info) return functions def _extract_imports(self, tree: ast.AST) -> List[ImportInfo]: """提取导入信息""" imports = [] for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: import_info = ImportInfo( module=alias.name, alias=alias.asname, type='import' ) imports.append(import_info) elif isinstance(node, ast.ImportFrom): for alias in node.names: import_info = ImportInfo( module=node.module, name=alias.name, alias=alias.asname, type='from' ) imports.append(import_info) return imports def _extract_global_variables(self, tree: ast.AST) -> List[VariableInfo]: """提取全局变量""" variables = [] for node in ast.walk(tree): if isinstance(node, ast.Assign): # 只提取模块级别的变量 if isinstance(node, ast.Module): for target in node.targets: if isinstance(target, ast.Name): var_info = VariableInfo( name=target.id, type=self._infer_type(node.value), value=self._get_value(node.value) ) variables.append(var_info) return variables def _extract_method(self, node: ast.FunctionDef) -> MethodInfo: """提取方法信息""" return MethodInfo( name=node.name, arguments=[arg.arg for arg in node.args.args], return_type=self._get_return_type(node), docstring=ast.get_docstring(node), is_static=any(isinstance(d, ast.Name) and d.id == 'staticmethod' for d in node.decorator_list), is_classmethod=any(isinstance(d, ast.Name) and d.id == 'classmethod' for d in node.decorator_list) ) def _extract_attribute(self, node: ast.Assign) -> AttributeInfo: """提取属性信息""" target = node.targets[0] if isinstance(target, ast.Name): return AttributeInfo( name=target.id, type=self._infer_type(node.value), value=self._get_value(node.value) ) return None def _get_name(self, node: ast.AST) -> str: """获取节点名称""" if isinstance(node, ast.Name): return node.id elif isinstance(node, ast.Attribute): return f"{self._get_name(node.value)}.{node.attr}" return str(node) def _get_return_type(self, node: ast.FunctionDef) -> str: """获取返回类型""" if node.returns: return self._get_name(node.returns) return "None" def _infer_type(self, node: ast.AST) -> str: """推断类型""" if isinstance(node, ast.Constant): return type(node.value).__name__ elif isinstance(node, ast.List): return "list" elif isinstance(node, ast.Dict): return "dict" elif isinstance(node, ast.Call): return self._get_name(node.func) return "Any" def _get_value(self, node: ast.AST) -> Any: """获取值""" if isinstance(node, ast.Constant): return node.value return None ```## 29.4.3 结构分析 ### 结构分析器 class StructureAnalyzer: """结构分析器""" def analyze(self, parsed_code: ParsedCode) -> StructureAnalysis: """分析代码结构""" analysis = StructureAnalysis() # 分析类的层次结构 analysis.class_hierarchy = self._analyze_class_hierarchy( parsed_code.classes ) # 分析函数调用关系 analysis.call_graph = self._analyze_call_graph(parsed_code) # 分析模块依赖 analysis.dependencies = self._analyze_dependencies(parsed_code) # 分析代码复杂度 analysis.complexity = self._analyze_complexity(parsed_code) return analysis def _analyze_class_hierarchy(self, classes: List[ClassInfo]) -> Dict[str, List[str]]: """分析类层次结构""" hierarchy = {} for cls in classes: hierarchy[cls.name] = cls.bases return hierarchy def _analyze_call_graph(self, parsed_code: ParsedCode) -> Dict[str, List[str]]: """分析函数调用关系""" call_graph = {} # 分析函数调用 for func in parsed_code.functions: calls = self._extract_function_calls(func, parsed_code) call_graph[func.name] = calls # 分析方法调用 for cls in parsed_code.classes: for method in cls.methods: calls = self._extract_method_calls(method, cls, parsed_code) call_graph[f"{cls.name}.{method.name}"] = calls return call_graph def _extract_function_calls(self, func: FunctionInfo, parsed_code: ParsedCode) -> List[str]: """提取函数调用""" calls = [] # 这里需要更复杂的 AST 分析 # 简化实现:从函数体中提取调用 return calls def _extract_method_calls(self, method: MethodInfo, cls: ClassInfo, parsed_code: ParsedCode) -> List[str]: """提取方法调用""" calls = [] # 这里需要更复杂的 AST 分析 # 简化实现:从方法体中提取调用 return calls def _analyze_dependencies(self, parsed_code: ParsedCode) -> List[Dependency]: """分析依赖关系""" dependencies = [] # 分析导入依赖 for imp in parsed_code.imports: dependency = Dependency( type='import', source=parsed_code.language, target=imp.module, strength='external' ) dependencies.append(dependency) # 分析类继承依赖 for cls in parsed_code.classes: for base in cls.bases: dependency = Dependency( type='inheritance', source=cls.name, target=base, strength='strong' ) dependencies.append(dependency) return dependencies def _analyze_complexity(self, parsed_code: ParsedCode) -> ComplexityMetrics: """分析代码复杂度"""

metrics = ComplexityMetrics()

计算圈复杂度

metrics.cyclomatic_complexity = self._calculate_cyclomatic_complexity( parsed_code )

计算认知复杂度

metrics.cognitive_complexity = self._calculate_cognitive_complexity( parsed_code )

计算维护性指数

metrics.maintainability_index = self._calculate_maintainability_index( parsed_code ) return metrics def _calculate_cyclomatic_complexity(self, parsed_code: ParsedCode) -> float: """计算圈复杂度""" complexity = 1 # 基础复杂度

遍历 AST,计算决策点

for node in ast.walk(parsed_code.ast): if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)): complexity += 1 elif isinstance(node, ast.BoolOp): complexity += len(node.values) - 1 return complexity def _calculate_cognitive_complexity(self, parsed_code: ParsedCode) -> float: """计算认知复杂度"""

简化实现

return self._calculate_cyclomatic_complexity(parsed_code) * 1.5 def _calculate_maintainability_index(self, parsed_code: ParsedCode) -> float: """计算维护性指数"""

简化实现

loc = len(parsed_code.original_code.split('\n')) complexity = self._calculate_cyclomatic_complexity(parsed_code)

MI = 171 - 5.2 * ln(V) - 0.23 * G - 16.2 * ln(L)

V = 圈复杂度, G = 代码行数, L = 代码行数

mi = 171 - 5.2 * math.log(complexity) - 0.23 * loc - 16.2 * math.log(loc) return max(0, min(100, mi))

bash
## 29.4.4 语义分析

### 语义分析器

```python
```python

class SemanticAnalyzer:
    """语义分析器"""

    def __init__(self, llm_client: LLMClient):
        self.llm_client = llm_client

    async def analyze(self, parsed_code: ParsedCode,
                     structure: StructureAnalysis) -> SemanticAnalysis:
        """分析代码语义"""
        analysis = SemanticAnalysis()

        # 分析代码意图
        analysis.purpose = await self._analyze_purpose(parsed_code)

        # 分析算法
        analysis.algorithms = await self._analyze_algorithms(parsed_code)

        # 分析设计模式
        analysis.design_patterns = await self._analyze_design_patterns(
            parsed_code,
            structure
        )

        # 分析数据流
        analysis.data_flow = await self._analyze_data_flow(parsed_code)

        return analysis

    async def _analyze_purpose(self,
                              parsed_code: ParsedCode) -> str:
        """分析代码目的"""
        prompt = f"""
        分析以下代码的主要目的和功能:

        {parsed_code.original_code}

        请用简洁的语言描述这段代码的主要功能。
        """

        return await self.llm_client.complete(prompt)

    async def _analyze_algorithms(self,
                                 parsed_code: ParsedCode) -> List[AlgorithmInfo]:
        """分析算法"""
        prompt = f"""
        识别以下代码中使用的算法:

        {parsed_code.original_code}

        请识别:
        1. 使用的主要算法(排序、搜索、图算法等)
        2. 算法的时间复杂度
        3. 算法的空间复杂度

        以 JSON 格式返回结果。
        """

        response = await self.llm_client.complete(prompt)
        return self._parse_algorithms(response)

    async def _analyze_design_patterns(self,
                                      parsed_code: ParsedCode,
                                      structure: StructureAnalysis) -> List[str]:
        """分析设计模式"""
        prompt = f"""
        识别以下代码中使用的设计模式:

        类:{parsed_code.classes}
        函数:{parsed_code.functions}
        类层次结构:{structure.class_hierarchy}

        请识别使用的设计模式。
        """

        response = await self.llm_client.complete(prompt)
        return self._parse_design_patterns(response)

    async def _analyze_data_flow(self,
                                parsed_code: ParsedCode) -> DataFlowAnalysis:
        """分析数据流"""
        analysis = DataFlowAnalysis()

        # 分析输入
        analysis.inputs = self._identify_inputs(parsed_code)

        # 分析输出
        analysis.outputs = self._identify_outputs(parsed_code)

        # 分析转换
        analysis.transformations = self._identify_transformations(parsed_code)

        return analysis

    def _identify_inputs(self, parsed_code: ParsedCode) -> List[str]:
        """识别输入"""
        inputs = []

        # 函数参数
        for func in parsed_code.functions:
            inputs.extend(func.arguments)

        # 方法参数
        for cls in parsed_code.classes:
            for method in cls.methods:
                inputs.extend(method.arguments)

        return list(set(inputs))

    def _identify_outputs(self, parsed_code: ParsedCode) -> List[str]:
        """识别输出"""
        outputs = []

        # 函数返回值
        for func in parsed_code.functions:
            if func.return_type != "None":
                outputs.append(f"{func.name}() -> {func.return_type}")

        # 方法返回值
        for cls in parsed_code.classes:
            for method in cls.methods:
                if method.return_type != "None":
                    outputs.append(f"{cls.name}.{method.name}() -> {method.return_type}")

        return outputs

    def _identify_transformations(self,
                                 parsed_code: ParsedCode) -> List[str]:
        """识别数据转换"""
        transformations = []

        # 这里需要更复杂的分析
        # 简化实现:基于函数名推断

        for func in parsed_code.functions:
            if any(keyword in func.name.lower()
                  for keyword in ['transform', 'convert', 'process', 'compute']):
                transformations.append(func.name)

        return transformations

```## 29.4.5 代码解释生成

### 解释生成器

class ExplanationGenerator:
"""解释生成器"""
def __init__(self, llm_client: LLMClient):
self.llm_client = llm_client
async def generate_explanation(self,
parsed_code: ParsedCode,
structure: StructureAnalysis,
semantic: SemanticAnalysis) -> CodeExplanation:
"""生成代码解释"""
explanation = CodeExplanation()
# 生成总体概述
explanation.overview = await self._generate_overview(
parsed_code,
semantic
)
# 生成类解释
explanation.class_explanations = await self._generate_class_explanations(
parsed_code.classes,
structure
)
# 生成函数解释
explanation.function_explanations = await self._generate_function_explanations(
parsed_code.functions,
structure
)
# 生成算法解释
explanation.algorithm_explanations = await self._generate_algorithm_explanations(
semantic.algorithms
)
# 生成使用示例
explanation.usage_examples = await self._generate_usage_examples(
parsed_code
)
return explanation
async def _generate_overview(self,
parsed_code: ParsedCode,
semantic: SemanticAnalysis) -> str:
"""生成总体概述"""
prompt = f"""
为以下代码生成总体概述:
代码目的:{semantic.purpose}
类:{[c.name for c in parsed_code.classes]}
函数:{[f.name for f in parsed_code.functions]}
请生成一个清晰的总体概述,包括:
1. 代码的主要功能
2. 主要组件
3. 整体架构
"""
return await self.llm_client.complete(prompt)
async def _generate_class_explanations(self,
classes: List[ClassInfo],
structure: StructureAnalysis) -> Dict[str, str]:
"""生成类解释"""
explanations = {}
for cls in classes:
prompt = f"""
为以下类生成详细解释:
类名:{cls.name}
父类:{cls.bases}
方法:{[m.name for m in cls.methods]}
属性:{[a.name for a in cls.attributes if a]}
文档字符串:{cls.docstring}
请生成详细的类解释,包括:
1. 类的职责
2. 主要方法的功能
3. 使用场景
"""
explanation = await self.llm_client.complete(prompt)
explanations[cls.name] = explanation
return explanations
async def _generate_function_explanations(self,
functions: List[FunctionInfo],
structure: StructureAnalysis) -> Dict[str, str]:
"""生成函数解释"""
explanations = {}
for func in functions:
prompt = f"""
为以下函数生成详细解释:
函数名:{func.name}
参数:{func.arguments}
返回类型:{func.return_type}
文档字符串:{func.docstring}
请生成详细的函数解释,包括:
1. 函数的功能
2. 参数说明
3. 返回值说明
4. 使用示例
"""
explanation = await self.llm_client.complete(prompt)
explanations[func.name] = explanation
return explanations
async def _generate_algorithm_explanations(self,
algorithms: List[AlgorithmInfo]) -> Dict[str, str]:
"""生成算法解释"""
explanations = {}
for algo in algorithms:
prompt = f"""
为以下算法生成详细解释:
算法名称:{algo.name}
时间复杂度:{algo.time_complexity}
空间复杂度:{algo.space_complexity}
请生成详细的算法解释,包括:
1. 算法原理
2. 实现细节
3. 优缺点分析
4. 适用场景
"""
explanation = await self.llm_client.complete(prompt)
explanations[algo.name] = explanation
return explanations
async def _generate_usage_examples(self,
parsed_code: ParsedCode) -> List[str]:
"""生成使用示例"""
examples = []
# 为每个类生成示例
for cls in parsed_code.classes:
prompt = f"""
为以下类生成使用示例:
类名:{cls.name}
方法:{[m.name for m in cls.methods]}
请生成 2-3 个实用的使用示例。
"""
example = await self.llm_client.complete(prompt)
examples.append(example)
# 为主要函数生成示例
for func in parsed_code.functions[:3]:  # 只生成前 3 个函数的示例
prompt = f"""
为以下函数生成使用示例:
函数名:{func.name}
参数:{func.arguments}
请生成 1-2 个实用的使用示例。
"""
example = await self.llm_client.complete(prompt)
examples.append(example)
return examples

标记本节教程为已读

记录您的学习进度,方便后续查看。