c1r3x commited on
Commit
aa300a4
·
verified ·
1 Parent(s): 4b6e45c

Review Agent: Uploaded remaining files

Browse files
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Code Review Agent Package
src/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Core Package for Code Review Agent
src/core/agent_manager.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Agent Manager
6
+
7
+ This module provides the main orchestrator for the Code Review Agent.
8
+ It coordinates the review process and manages the state of the application.
9
+ """
10
+
11
+ import os
12
+ import time
13
+ import logging
14
+ import tempfile
15
+ import json
16
+ from datetime import datetime
17
+ import gradio as gr
18
+
19
+ from src.core.language_detector import LanguageDetector
20
+ from src.services.code_analyzer import CodeAnalyzer
21
+ from src.services.report_generator import ReportGenerator
22
+ from src.services.repository_service import RepositoryService
23
+ from src.services.security_scanner import SecurityScanner
24
+ from src.services.performance_analyzer import PerformanceAnalyzer
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class AgentManager:
30
+ """
31
+ Main orchestrator for the Code Review Agent.
32
+
33
+ This class coordinates the review process, manages the application state,
34
+ and provides the interface between the UI and the business logic.
35
+ """
36
+
37
+ def __init__(self):
38
+ """
39
+ Initialize the AgentManager.
40
+ """
41
+ # Initialize state management
42
+ self.state = {
43
+ 'repo_url': None,
44
+ 'progress': {},
45
+ 'results': {},
46
+ 'current_step': None
47
+ }
48
+
49
+ # Initialize services
50
+ self.language_detector = LanguageDetector()
51
+ self.code_analyzer = CodeAnalyzer()
52
+ self.report_generator = ReportGenerator()
53
+ self.repository_service = RepositoryService()
54
+ self.security_scanner = SecurityScanner()
55
+ self.performance_analyzer = PerformanceAnalyzer()
56
+ self.temp_dir = tempfile.mkdtemp(prefix="code_review_agent_")
57
+
58
+ logger.info(f"Initialized AgentManager with temp directory: {self.temp_dir}")
59
+
60
+ def start_review(self, repo_url, github_token=None, selected_languages=None):
61
+ """
62
+ Start the code review process for a GitHub repository.
63
+
64
+ Args:
65
+ repo_url (str): The URL of the GitHub repository to review.
66
+ github_token (str, optional): GitHub authentication token for private repositories.
67
+ selected_languages (list, optional): List of languages to analyze. If None,
68
+ languages will be auto-detected.
69
+
70
+ Returns:
71
+ tuple: (progress_group, overall_progress, status_message, results_dashboard) - Updated UI components.
72
+ """
73
+ # Initialize progress components outside the try block
74
+ progress_group = gr.Group(visible=True)
75
+ overall_progress = gr.Slider(value=0)
76
+ status_message = gr.Markdown("*Starting review...*")
77
+
78
+ try:
79
+ # Initialize state for new review
80
+ self.state = {
81
+ 'repo_url': repo_url,
82
+ 'progress': {},
83
+ 'results': {},
84
+ 'current_step': None
85
+ }
86
+
87
+ # Clone repository
88
+ self._update_progress("Repository Cloning", 0, overall_progress, status_message)
89
+ repo_path = self._clone_repository(repo_url, github_token)
90
+ self._update_progress("Repository Cloning", 100, overall_progress, status_message)
91
+
92
+ # Detect languages
93
+ self._update_progress("Language Detection", 0, overall_progress, status_message)
94
+ if selected_languages and len(selected_languages) > 0:
95
+ languages = selected_languages
96
+ logger.info(f"Using selected languages: {languages}")
97
+ else:
98
+ languages = self.language_detector.detect_languages(repo_path)
99
+ logger.info(f"Auto-detected languages: {languages}")
100
+
101
+ self.state['languages'] = languages
102
+ self._update_progress("Language Detection", 100, overall_progress, status_message)
103
+
104
+ # Perform code analysis
105
+ self._update_progress("Code Analysis", 0, overall_progress, status_message)
106
+ code_analysis_results = self.code_analyzer.analyze_repository(repo_path, languages)
107
+ self.state['results']['code_analysis'] = code_analysis_results
108
+ self._update_progress("Code Analysis", 100, overall_progress, status_message)
109
+
110
+ # Perform security scanning
111
+ self._update_progress("Security Scanning", 0, overall_progress, status_message)
112
+ security_results = self.security_scanner.scan_repository(repo_path, languages)
113
+ self.state['results']['security'] = security_results
114
+ self._update_progress("Security Scanning", 100, overall_progress, status_message)
115
+
116
+ # Perform performance analysis
117
+ self._update_progress("Performance Analysis", 0, overall_progress, status_message)
118
+ performance_results = self.performance_analyzer.analyze_repository(repo_path, languages)
119
+ self.state['results']['performance'] = performance_results
120
+ self._update_progress("Performance Analysis", 100, overall_progress, status_message)
121
+
122
+ # Perform AI review
123
+ self._update_progress("AI Review", 0, overall_progress, status_message)
124
+ ai_review_results = self._perform_ai_review(repo_path, languages)
125
+ self.state['results']['ai_review'] = ai_review_results
126
+ self._update_progress("AI Review", 100, overall_progress, status_message)
127
+
128
+ # Generate report
129
+ self._update_progress("Report Generation", 0, overall_progress, status_message)
130
+ repo_name = repo_url.split('/')[-1].replace('.git', '')
131
+ report_paths = self.report_generator.generate_report(
132
+ repo_name, self.state['results']
133
+ )
134
+ self.state['report_paths'] = report_paths
135
+ self._update_progress("Report Generation", 100, overall_progress, status_message)
136
+
137
+ # Update results dashboard
138
+ results_dashboard = self._create_results_dashboard(self.state['results'])
139
+ results_dashboard.update(visible=True)
140
+
141
+ return progress_group, overall_progress, status_message, results_dashboard
142
+
143
+ except Exception as e:
144
+ logger.exception(f"Error during code review: {e}")
145
+ # Update progress components with error
146
+ status_message.update(value=f"*Error: {str(e)}*")
147
+ return progress_group, overall_progress, status_message, None
148
+
149
+ def export_report(self, results_dashboard, export_format):
150
+ """
151
+ Export the code review report in the specified format.
152
+
153
+ Args:
154
+ results_dashboard: The results dashboard component.
155
+ export_format (str): The format to export the report in ('pdf', 'json', 'html', 'csv').
156
+
157
+ Returns:
158
+ str: The path to the exported file.
159
+ """
160
+ try:
161
+ if not self.state.get('results'):
162
+ logger.warning("No results available to export")
163
+ return None
164
+
165
+ # Get the actual format value from the textbox component
166
+ format_value = export_format.value if hasattr(export_format, 'value') else export_format
167
+
168
+ # Create exports directory if it doesn't exist
169
+ exports_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'exports')
170
+ os.makedirs(exports_dir, exist_ok=True)
171
+
172
+ # Generate filename with timestamp
173
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
174
+ repo_name = self.state['repo_url'].split('/')[-1].replace('.git', '')
175
+ filename = f"{repo_name}_review_{timestamp}.{format_value}"
176
+ filepath = os.path.join(exports_dir, filename)
177
+
178
+ # Export report in the specified format using report_generator
179
+ report_paths = self.report_generator.generate_report(
180
+ repo_name, self.state['results'], format_value
181
+ )
182
+
183
+ if format_value in report_paths:
184
+ return report_paths[format_value]
185
+ else:
186
+ logger.warning(f"Unsupported export format: {format_value}")
187
+ return None
188
+
189
+ logger.info(f"Exported report to {filepath}")
190
+ return filepath
191
+
192
+ except Exception as e:
193
+ logger.exception(f"Error exporting report: {e}")
194
+ return None
195
+
196
+ def _clone_repository(self, repo_url, github_token=None):
197
+ """
198
+ Clone the GitHub repository to a temporary directory.
199
+
200
+ Args:
201
+ repo_url (str): The URL of the GitHub repository to clone.
202
+ github_token (str, optional): GitHub authentication token for private repositories.
203
+
204
+ Returns:
205
+ str: The path to the cloned repository.
206
+ """
207
+ # Import the repository service here to avoid circular imports
208
+ from src.services.repository_service import RepositoryService
209
+
210
+ # Create a repository service instance
211
+ repo_service = RepositoryService(base_temp_dir=self.temp_dir)
212
+
213
+ # Clone the repository using the service
214
+ try:
215
+ # If a GitHub token is provided, use it for authentication
216
+ if github_token and github_token.strip():
217
+ # Modify the URL to include the token for authentication
218
+ auth_url = repo_url.replace('https://', f'https://{github_token}@')
219
+ repo_path = repo_service.clone_repository(auth_url)
220
+ logger.info(f"Cloned repository using GitHub token authentication")
221
+ else:
222
+ # Clone without authentication (for public repositories)
223
+ repo_path = repo_service.clone_repository(repo_url)
224
+ logger.info(f"Cloned repository without authentication")
225
+
226
+ return repo_path
227
+ except Exception as e:
228
+ logger.error(f"Error cloning repository: {e}")
229
+ raise
230
+
231
+ def _perform_ai_review(self, repo_path, languages):
232
+ """
233
+ Perform AI-powered code review.
234
+
235
+ Args:
236
+ repo_path (str): The path to the repository.
237
+ languages (list): List of programming languages to analyze.
238
+
239
+ Returns:
240
+ dict: AI review results.
241
+ """
242
+ try:
243
+ # This is a placeholder for AI review functionality
244
+ # In a real implementation, this would use the MCP AI review service
245
+ from src.mcp.ai_review import AIReviewMCP
246
+
247
+ ai_reviewer = AIReviewMCP()
248
+ results = ai_reviewer.review_repository(repo_path, languages)
249
+
250
+ logger.info(f"AI review completed for {len(languages)} languages")
251
+ return results
252
+ except Exception as e:
253
+ logger.error(f"Error during AI review: {e}")
254
+ return {
255
+ 'error': str(e),
256
+ 'suggestions': [],
257
+ 'issues': []
258
+ }
259
+
260
+ def _update_progress(self, step, value, overall_progress, status_message):
261
+ """
262
+ Update the progress components for a specific step.
263
+
264
+ Args:
265
+ step (str): The step to update.
266
+ value (int): The progress value (0-100).
267
+ overall_progress: The overall progress slider component.
268
+ status_message: The status message markdown component.
269
+ """
270
+ # Update state
271
+ self.state['progress'][step] = value
272
+ self.state['current_step'] = step
273
+
274
+ # Calculate overall progress
275
+ total_steps = 7 # Total number of steps in the review process
276
+ completed_steps = sum(1 for v in self.state['progress'].values() if v == 100)
277
+ current_step_progress = value if step in self.state['progress'] else 0
278
+ overall_value = (completed_steps * 100 + current_step_progress) / total_steps
279
+
280
+ # Update UI components
281
+ overall_progress.update(value=overall_value)
282
+ status_message.update(value=f"*{step}: {value}%*")
283
+
284
+ logger.info(f"Progress update: {step} - {value}% (Overall: {overall_value:.1f}%)")
285
+ time.sleep(0.5) # Simulate progress update time
286
+
287
+ def _create_results_dashboard(self, report):
288
+ """
289
+ Create a results dashboard component for the UI.
290
+
291
+ Args:
292
+ report (dict): The code review report.
293
+
294
+ Returns:
295
+ object: A results dashboard component.
296
+ """
297
+ # This is a placeholder. In a real implementation, this would create a
298
+ # results dashboard component for the UI.
299
+ class ResultsDashboard:
300
+ def __init__(self):
301
+ self.visible = False
302
+
303
+ def update(self, visible=None):
304
+ if visible is not None:
305
+ self.visible = visible
306
+ return self
307
+
308
+ return ResultsDashboard()
src/core/language_detector.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Language Detector
6
+
7
+ This module provides functionality for detecting programming languages in a repository.
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ from collections import Counter
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # File extension to language mapping
17
+ EXTENSION_TO_LANGUAGE = {
18
+ '.py': 'Python',
19
+ '.js': 'JavaScript',
20
+ '.jsx': 'JavaScript',
21
+ '.ts': 'TypeScript',
22
+ '.tsx': 'TypeScript',
23
+ '.java': 'Java',
24
+ '.go': 'Go',
25
+ '.rs': 'Rust',
26
+ '.cpp': 'C++',
27
+ '.cc': 'C++',
28
+ '.cxx': 'C++',
29
+ '.c': 'C',
30
+ '.h': 'C',
31
+ '.hpp': 'C++',
32
+ '.cs': 'C#',
33
+ '.php': 'PHP',
34
+ '.rb': 'Ruby',
35
+ '.swift': 'Swift',
36
+ '.kt': 'Kotlin',
37
+ '.scala': 'Scala',
38
+ '.r': 'R',
39
+ '.sh': 'Shell',
40
+ '.bash': 'Shell',
41
+ '.zsh': 'Shell',
42
+ '.html': 'HTML',
43
+ '.htm': 'HTML',
44
+ '.css': 'CSS',
45
+ '.scss': 'SCSS',
46
+ '.sass': 'SCSS',
47
+ '.less': 'Less',
48
+ '.md': 'Markdown',
49
+ '.json': 'JSON',
50
+ '.xml': 'XML',
51
+ '.yaml': 'YAML',
52
+ '.yml': 'YAML',
53
+ '.sql': 'SQL',
54
+ '.graphql': 'GraphQL',
55
+ '.gql': 'GraphQL',
56
+ }
57
+
58
+ # Special files to language mapping
59
+ SPECIAL_FILES_TO_LANGUAGE = {
60
+ 'Dockerfile': 'Docker',
61
+ 'docker-compose.yml': 'Docker',
62
+ 'docker-compose.yaml': 'Docker',
63
+ 'Makefile': 'Make',
64
+ 'CMakeLists.txt': 'CMake',
65
+ 'package.json': 'JavaScript',
66
+ 'tsconfig.json': 'TypeScript',
67
+ 'requirements.txt': 'Python',
68
+ 'setup.py': 'Python',
69
+ 'pom.xml': 'Java',
70
+ 'build.gradle': 'Java',
71
+ 'Cargo.toml': 'Rust',
72
+ 'go.mod': 'Go',
73
+ }
74
+
75
+
76
+ class LanguageDetector:
77
+ """
78
+ Detects programming languages in a repository.
79
+ """
80
+
81
+ def __init__(self):
82
+ """
83
+ Initialize the LanguageDetector.
84
+ """
85
+ logger.info("Initialized LanguageDetector")
86
+
87
+ def detect_languages(self, repo_path):
88
+ """
89
+ Detect programming languages in a repository.
90
+
91
+ Args:
92
+ repo_path (str): The path to the repository.
93
+
94
+ Returns:
95
+ list: A list of detected programming languages, sorted by prevalence.
96
+ """
97
+ logger.info(f"Detecting languages in repository: {repo_path}")
98
+
99
+ language_counter = Counter()
100
+
101
+ for root, dirs, files in os.walk(repo_path):
102
+ # Skip hidden directories and common non-code directories
103
+ dirs[:] = [d for d in dirs if not d.startswith('.') and
104
+ d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']]
105
+
106
+ for file in files:
107
+ file_path = os.path.join(root, file)
108
+
109
+ # Check if it's a special file
110
+ if file in SPECIAL_FILES_TO_LANGUAGE:
111
+ language = SPECIAL_FILES_TO_LANGUAGE[file]
112
+ language_counter[language] += 1
113
+ continue
114
+
115
+ # Check file extension
116
+ _, ext = os.path.splitext(file)
117
+ if ext in EXTENSION_TO_LANGUAGE:
118
+ language = EXTENSION_TO_LANGUAGE[ext]
119
+ language_counter[language] += 1
120
+
121
+ # Get the top languages (limit to supported languages)
122
+ supported_languages = [
123
+ "Python", "JavaScript", "TypeScript", "Java",
124
+ "Go", "Rust", "C++", "C#", "PHP", "Ruby",
125
+ "Swift", "Kotlin", "Scala", "R", "Shell"
126
+ ]
127
+
128
+ detected_languages = [lang for lang, _ in language_counter.most_common()
129
+ if lang in supported_languages]
130
+
131
+ logger.info(f"Detected languages: {detected_languages}")
132
+ return detected_languages
133
+
134
+ def get_language_breakdown(self, repo_path):
135
+ """
136
+ Get a breakdown of programming languages in a repository by lines of code.
137
+
138
+ Args:
139
+ repo_path (str): The path to the repository.
140
+
141
+ Returns:
142
+ dict: A dictionary mapping languages to lines of code.
143
+ """
144
+ logger.info(f"Getting language breakdown for repository: {repo_path}")
145
+
146
+ language_loc = {}
147
+
148
+ for root, dirs, files in os.walk(repo_path):
149
+ # Skip hidden directories and common non-code directories
150
+ dirs[:] = [d for d in dirs if not d.startswith('.') and
151
+ d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']]
152
+
153
+ for file in files:
154
+ file_path = os.path.join(root, file)
155
+
156
+ # Determine the language
157
+ language = None
158
+
159
+ # Check if it's a special file
160
+ if file in SPECIAL_FILES_TO_LANGUAGE:
161
+ language = SPECIAL_FILES_TO_LANGUAGE[file]
162
+ else:
163
+ # Check file extension
164
+ _, ext = os.path.splitext(file)
165
+ if ext in EXTENSION_TO_LANGUAGE:
166
+ language = EXTENSION_TO_LANGUAGE[ext]
167
+
168
+ if language:
169
+ # Count lines of code
170
+ try:
171
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
172
+ line_count = sum(1 for _ in f)
173
+
174
+ if language in language_loc:
175
+ language_loc[language] += line_count
176
+ else:
177
+ language_loc[language] = line_count
178
+ except Exception as e:
179
+ logger.warning(f"Error counting lines in {file_path}: {e}")
180
+
181
+ logger.info(f"Language breakdown: {language_loc}")
182
+ return language_loc
src/main.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Code Review Agent - Main Entry Point
6
+
7
+ This module serves as the entry point for the Code Review Agent application.
8
+ It initializes the Gradio interface and starts the web server.
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import logging
14
+ from dotenv import load_dotenv
15
+
16
+ # Add the project root to the Python path
17
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
18
+
19
+ # Import application modules
20
+ from src.ui.gradio_app import create_gradio_app
21
+ from src.core.agent_manager import AgentManager
22
+
23
+ # Configure logging
24
+ # Create logs directory if it doesn't exist
25
+ logs_dir = os.path.join(os.path.dirname(__file__), '..', 'logs')
26
+ os.makedirs(logs_dir, exist_ok=True)
27
+
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
31
+ handlers=[
32
+ logging.StreamHandler(),
33
+ logging.FileHandler(os.path.join(logs_dir, 'app.log'), mode='a')
34
+ ]
35
+ )
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ def main():
41
+ """Main function to start the Code Review Agent application."""
42
+ # Load environment variables
43
+ load_dotenv()
44
+
45
+ # Create logs directory if it doesn't exist
46
+ logs_dir = os.path.join(os.path.dirname(__file__), '..', 'logs')
47
+ os.makedirs(logs_dir, exist_ok=True)
48
+
49
+ # Initialize the agent manager
50
+ agent_manager = AgentManager()
51
+
52
+ # Create and launch the Gradio app
53
+ app = create_gradio_app(agent_manager)
54
+
55
+ # Start the Gradio server
56
+ app.launch(server_name="0.0.0.0", server_port=7860)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ try:
61
+ logger.info("Starting Code Review Agent application")
62
+ main()
63
+ except Exception as e:
64
+ logger.exception(f"Error starting application: {e}")
65
+ sys.exit(1)
src/mcp/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # MCP Integration Package for Code Review Agent
src/mcp/ai_review.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ AI Review Service
6
+
7
+ This module provides functionality for AI-powered code review using Nebius Qwen2.5-72B-Instruct model.
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ import json
13
+ import re
14
+ from openai import OpenAI
15
+ from dotenv import load_dotenv
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Load environment variables
20
+ load_dotenv()
21
+
22
+ class AIReviewService:
23
+ """
24
+ Service for AI-powered code review using Nebius Qwen2.5-72B-Instruct model.
25
+ """
26
+
27
+ def __init__(self):
28
+ """
29
+ Initialize the AIReviewService.
30
+ """
31
+ self.api_key = os.getenv('NEBIUS_API_KEY')
32
+ if not self.api_key:
33
+ logger.warning("NEBIUS_API_KEY not found in environment variables. AI review will not be available.")
34
+ else:
35
+ self.client = OpenAI(
36
+ base_url="https://api.studio.nebius.com/v1/",
37
+ api_key=self.api_key
38
+ )
39
+
40
+ logger.info("Initialized AIReviewService")
41
+
42
+ def is_available(self):
43
+ """
44
+ Check if the AI review service is available.
45
+
46
+ Returns:
47
+ bool: True if the service is available, False otherwise.
48
+ """
49
+ return self.api_key is not None
50
+
51
+ def review_code(self, file_path, file_content, language, context=None):
52
+ """
53
+ Review code using Qwen.
54
+
55
+ Args:
56
+ file_path (str): The path to the file being reviewed.
57
+ file_content (str): The content of the file being reviewed.
58
+ language (str): The programming language of the file.
59
+ context (dict, optional): Additional context for the review.
60
+
61
+ Returns:
62
+ dict: The review results.
63
+ """
64
+ if not self.is_available():
65
+ return {
66
+ 'status': 'error',
67
+ 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.',
68
+ 'suggestions': [],
69
+ }
70
+
71
+ logger.info(f"Reviewing {language} code in {file_path}")
72
+
73
+ # Prepare the prompt for Qwen
74
+ prompt = self._prepare_prompt(file_path, file_content, language, context)
75
+
76
+ try:
77
+ # Call Nebius API with Qwen2.5-72B-Instruct model
78
+ response = self.client.chat.completions.create(
79
+ model="Qwen/Qwen2.5-72B-Instruct",
80
+ max_tokens=4000,
81
+ temperature=0,
82
+ messages=[
83
+ {"role": "system", "content": self._get_system_prompt(language)},
84
+ {"role": "user", "content": prompt}
85
+ ]
86
+ )
87
+
88
+ # Parse the response
89
+ review_text = response.choices[0].message.content
90
+ suggestions = self._parse_review(review_text)
91
+
92
+ return {
93
+ 'status': 'success',
94
+ 'review_text': review_text,
95
+ 'suggestions': suggestions,
96
+ }
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error calling Qwen API: {e}")
100
+ return {
101
+ 'status': 'error',
102
+ 'error': str(e),
103
+ 'suggestions': [],
104
+ }
105
+
106
+ def review_repository(self, repo_path, files, languages, analysis_results=None):
107
+ """
108
+ Review a repository using Qwen.
109
+
110
+ Args:
111
+ repo_path (str): The path to the repository.
112
+ files (list): A list of files to review.
113
+ languages (list): A list of programming languages in the repository.
114
+ analysis_results (dict, optional): Results from other analysis tools.
115
+
116
+ Returns:
117
+ dict: The review results.
118
+ """
119
+ if not self.is_available():
120
+ return {
121
+ 'status': 'error',
122
+ 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.',
123
+ 'reviews': {},
124
+ 'summary': '',
125
+ }
126
+
127
+ logger.info(f"Reviewing repository at {repo_path}")
128
+
129
+ # Limit the number of files to review to avoid excessive API usage
130
+ max_files = 10
131
+ if len(files) > max_files:
132
+ logger.warning(f"Too many files to review ({len(files)}). Limiting to {max_files} files.")
133
+ files = files[:max_files]
134
+
135
+ # Review each file
136
+ reviews = {}
137
+ for file_path in files:
138
+ try:
139
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
140
+ file_content = f.read()
141
+
142
+ # Determine the language based on file extension
143
+ _, ext = os.path.splitext(file_path)
144
+ language = self._get_language_from_extension(ext)
145
+
146
+ if language:
147
+ # Provide context from analysis results if available
148
+ context = None
149
+ if analysis_results:
150
+ context = self._extract_context_for_file(file_path, analysis_results)
151
+
152
+ # Review the file
153
+ review_result = self.review_code(file_path, file_content, language, context)
154
+ reviews[file_path] = review_result
155
+
156
+ except Exception as e:
157
+ logger.error(f"Error reviewing file {file_path}: {e}")
158
+ reviews[file_path] = {
159
+ 'status': 'error',
160
+ 'error': str(e),
161
+ 'suggestions': [],
162
+ }
163
+
164
+ # Generate a summary of the review
165
+ summary = self._generate_repository_summary(repo_path, reviews, languages, analysis_results)
166
+
167
+ return {
168
+ 'status': 'success',
169
+ 'reviews': reviews,
170
+ 'summary': summary,
171
+ }
172
+
173
+ def _prepare_prompt(self, file_path, file_content, language, context=None):
174
+ """
175
+ Prepare a prompt for Qwen.
176
+
177
+ Args:
178
+ file_path (str): The path to the file being reviewed.
179
+ file_content (str): The content of the file being reviewed.
180
+ language (str): The programming language of the file.
181
+ context (dict, optional): Additional context for the review.
182
+
183
+ Returns:
184
+ str: The prompt for Qwen.
185
+ """
186
+ prompt = f"""Please review the following {language} code and provide constructive feedback:
187
+
188
+ File: {file_path}
189
+
190
+ ```{language}
191
+ {file_content}
192
+ ```
193
+
194
+ """
195
+
196
+ if context:
197
+ prompt += "Additional context:\n"
198
+ if 'issues' in context:
199
+ prompt += "\nIssues detected by other tools:\n"
200
+ for issue in context['issues']:
201
+ prompt += f"- {issue.get('issue', 'Unknown issue')} at line {issue.get('line', 'unknown')}: {issue.get('description', '')}\n"
202
+
203
+ if 'vulnerabilities' in context:
204
+ prompt += "\nSecurity vulnerabilities detected:\n"
205
+ for vuln in context['vulnerabilities']:
206
+ prompt += f"- {vuln.get('issue', 'Unknown vulnerability')} at line {vuln.get('line', 'unknown')}: {vuln.get('description', '')}\n"
207
+
208
+ prompt += "\nPlease provide your review with the following sections:\n"
209
+ prompt += "1. Code Quality: Assess the overall quality, readability, and maintainability.\n"
210
+ prompt += "2. Potential Issues: Identify any bugs, edge cases, or potential problems.\n"
211
+ prompt += "3. Security Concerns: Highlight any security vulnerabilities or risks.\n"
212
+ prompt += "4. Performance Considerations: Note any performance bottlenecks or inefficiencies.\n"
213
+ prompt += "5. Specific Suggestions: Provide concrete, actionable suggestions for improvement.\n"
214
+
215
+ return prompt
216
+
217
+ def _get_system_prompt(self, language):
218
+ """
219
+ Get the system prompt for Qwen based on the programming language.
220
+
221
+ Args:
222
+ language (str): The programming language.
223
+
224
+ Returns:
225
+ str: The system prompt for Qwen.
226
+ """
227
+ base_prompt = """You are an expert code reviewer with deep knowledge of software development best practices, design patterns, and security.
228
+ Your task is to review code and provide constructive, actionable feedback.
229
+ Be thorough but prioritize the most important issues.
230
+ Format your response in markdown with clear sections.
231
+ For each suggestion, include the line number, the issue, and a recommended solution.
232
+ Focus on:
233
+ - Code quality and readability
234
+ - Potential bugs and edge cases
235
+ - Security vulnerabilities
236
+ - Performance optimizations
237
+ - Adherence to best practices
238
+
239
+ Your feedback should be specific, actionable, and educational. Explain why each suggestion matters.
240
+ Do not hallucinate vulnerabilities. Base claims on code patterns.
241
+ """
242
+
243
+ # Add language-specific guidance
244
+ if language == 'Python':
245
+ base_prompt += "\nFor Python code, pay special attention to PEP 8 compliance, proper exception handling, and Pythonic idioms."
246
+ elif language in ['JavaScript', 'TypeScript']:
247
+ base_prompt += "\nFor JavaScript/TypeScript code, focus on modern ES6+ practices, proper async handling, and potential type issues."
248
+ elif language == 'Java':
249
+ base_prompt += "\nFor Java code, examine object-oriented design, proper exception handling, and resource management."
250
+ elif language == 'Go':
251
+ base_prompt += "\nFor Go code, check for idiomatic Go patterns, proper error handling, and concurrency issues."
252
+ elif language == 'Rust':
253
+ base_prompt += "\nFor Rust code, verify memory safety, proper use of ownership/borrowing, and idiomatic Rust patterns."
254
+
255
+ return base_prompt
256
+
257
+ def _parse_review(self, review_text):
258
+ """
259
+ Parse the review text from Qwen to extract structured suggestions.
260
+
261
+ Args:
262
+ review_text (str): The review text from Qwen.
263
+
264
+ Returns:
265
+ list: A list of structured suggestions.
266
+ """
267
+ suggestions = []
268
+
269
+ # Split the review into sections
270
+ sections = review_text.split('##')
271
+
272
+ for section in sections:
273
+ if not section.strip():
274
+ continue
275
+ # Extract suggestions from the section
276
+ lines = section.strip().split('\n')
277
+ section_title = lines[0].strip()
278
+
279
+ current_suggestion = None
280
+ for line in lines[1:]:
281
+ line = line.strip()
282
+ if not line:
283
+ continue
284
+
285
+ # Check if this line starts a new suggestion (often contains line numbers)
286
+ line_number_match = re.search(r'line\s+(\d+)', line, re.IGNORECASE)
287
+ if line_number_match or line.startswith('- ') or line.startswith('* '):
288
+ # Save the previous suggestion if it exists
289
+ if current_suggestion:
290
+ suggestions.append(current_suggestion)
291
+
292
+ # Start a new suggestion
293
+ line_number = int(line_number_match.group(1)) if line_number_match else None
294
+ current_suggestion = {
295
+ 'section': section_title,
296
+ 'line': line_number,
297
+ 'description': line,
298
+ 'details': '',
299
+ }
300
+ elif current_suggestion:
301
+ # Add to the details of the current suggestion
302
+ current_suggestion['details'] += line + '\n'
303
+
304
+ # Add the last suggestion in the section
305
+ if current_suggestion:
306
+ suggestions.append(current_suggestion)
307
+
308
+ return suggestions
309
+
310
+ def _get_language_from_extension(self, extension):
311
+ """
312
+ Get the programming language from a file extension.
313
+
314
+ Args:
315
+ extension (str): The file extension.
316
+
317
+ Returns:
318
+ str: The programming language, or None if unknown.
319
+ """
320
+ extension_to_language = {
321
+ '.py': 'Python',
322
+ '.js': 'JavaScript',
323
+ '.jsx': 'JavaScript',
324
+ '.ts': 'TypeScript',
325
+ '.tsx': 'TypeScript',
326
+ '.java': 'Java',
327
+ '.go': 'Go',
328
+ '.rs': 'Rust',
329
+ '.cpp': 'C++',
330
+ '.cc': 'C++',
331
+ '.c': 'C',
332
+ '.h': 'C',
333
+ '.hpp': 'C++',
334
+ '.cs': 'C#',
335
+ '.php': 'PHP',
336
+ '.rb': 'Ruby',
337
+ }
338
+
339
+ return extension_to_language.get(extension.lower())
340
+
341
+ def _extract_context_for_file(self, file_path, analysis_results):
342
+ """
343
+ Extract relevant context for a file from analysis results.
344
+
345
+ Args:
346
+ file_path (str): The path to the file.
347
+ analysis_results (dict): Results from other analysis tools.
348
+
349
+ Returns:
350
+ dict: Context for the file.
351
+ """
352
+ context = {
353
+ 'issues': [],
354
+ 'vulnerabilities': [],
355
+ }
356
+
357
+ # Extract code quality issues
358
+ if 'code_analysis' in analysis_results:
359
+ for language, language_results in analysis_results['code_analysis'].items():
360
+ for issue in language_results.get('issues', []):
361
+ if issue.get('file', '') == file_path:
362
+ context['issues'].append(issue)
363
+
364
+ # Extract security vulnerabilities
365
+ if 'security_scan' in analysis_results:
366
+ for language, language_results in analysis_results['security_scan'].items():
367
+ for vuln in language_results.get('vulnerabilities', []):
368
+ if vuln.get('file', '') == file_path:
369
+ context['vulnerabilities'].append(vuln)
370
+
371
+ # Extract performance issues
372
+ if 'performance_analysis' in analysis_results:
373
+ for language, language_results in analysis_results['performance_analysis'].get('language_results', {}).items():
374
+ for issue in language_results.get('issues', []):
375
+ if issue.get('file', '') == file_path:
376
+ context['issues'].append(issue)
377
+
378
+ return context
379
+
380
+ def _generate_repository_summary(self, repo_path, reviews, languages, analysis_results=None):
381
+ """
382
+ Generate a summary of the repository review.
383
+
384
+ Args:
385
+ repo_path (str): The path to the repository.
386
+ reviews (dict): The review results for each file.
387
+ languages (list): A list of programming languages in the repository.
388
+ analysis_results (dict, optional): Results from other analysis tools.
389
+
390
+ Returns:
391
+ str: A summary of the repository review.
392
+ """
393
+ if not self.is_available():
394
+ return "AI review service is not available. Please set ANTHROPIC_API_KEY in environment variables."
395
+
396
+ # Prepare the prompt for Qwen
397
+ prompt = f"""Please provide a summary of the code review for the repository at {repo_path}.
398
+
399
+ Languages used in the repository: {', '.join(languages)}
400
+
401
+ """
402
+
403
+ # Add information about the files reviewed
404
+ prompt += "\nFiles reviewed:\n"
405
+ for file_path, review in reviews.items():
406
+ if review.get('status') == 'success':
407
+ suggestion_count = len(review.get('suggestions', []))
408
+ prompt += f"- {file_path}: {suggestion_count} suggestions\n"
409
+ else:
410
+ prompt += f"- {file_path}: Error - {review.get('error', 'Unknown error')}\n"
411
+
412
+ # Add summary of analysis results if available
413
+ if analysis_results:
414
+ prompt += "\nAnalysis results summary:\n"
415
+
416
+ if 'code_analysis' in analysis_results:
417
+ total_issues = sum(result.get('issue_count', 0) for result in analysis_results['code_analysis'].values())
418
+ prompt += f"- Code quality issues: {total_issues}\n"
419
+
420
+ if 'security_scan' in analysis_results:
421
+ total_vulns = sum(result.get('vulnerability_count', 0) for result in analysis_results['security_scan'].values())
422
+ prompt += f"- Security vulnerabilities: {total_vulns}\n"
423
+
424
+ if 'performance_analysis' in analysis_results:
425
+ total_perf_issues = sum(result.get('issue_count', 0) for result in analysis_results['performance_analysis'].get('language_results', {}).values())
426
+ prompt += f"- Performance issues: {total_perf_issues}\n"
427
+
428
+ prompt += "\nPlease provide a comprehensive summary of the code review, including:\n"
429
+ prompt += "1. Overall code quality assessment\n"
430
+ prompt += "2. Common patterns and issues found across the codebase\n"
431
+ prompt += "3. Strengths of the codebase\n"
432
+ prompt += "4. Areas for improvement\n"
433
+ prompt += "5. Prioritized recommendations\n"
434
+
435
+ try:
436
+ # Call Nebius API with Qwen2.5-72B-Instruct model
437
+ response = self.client.chat.completions.create(
438
+ model="Qwen/Qwen2.5-72B-Instruct",
439
+ max_tokens=4000,
440
+ temperature=0,
441
+ messages=[
442
+ {"role": "system", "content": "You are an expert code reviewer providing a summary of a repository review. Be concise, insightful, and actionable in your feedback. Format your response in markdown with clear sections."},
443
+ {"role": "user", "content": prompt}
444
+ ]
445
+ )
446
+
447
+ return response.choices[0].message.content
448
+
449
+ except Exception as e:
450
+ logger.error(f"Error generating repository summary: {e}")
451
+ return f"Error generating repository summary: {e}"
src/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services Package for Code Review Agent
src/services/code_analyzer.py ADDED
@@ -0,0 +1,749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Code Analyzer Service
6
+
7
+ This module provides functionality for analyzing code quality across different languages.
8
+ """
9
+
10
+ import os
11
+ import subprocess
12
+ import logging
13
+ import json
14
+ import tempfile
15
+ from collections import defaultdict
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class CodeAnalyzer:
20
+ """
21
+ Service for analyzing code quality across different languages.
22
+ """
23
+
24
+ def __init__(self):
25
+ """
26
+ Initialize the CodeAnalyzer.
27
+ """
28
+ logger.info("Initialized CodeAnalyzer")
29
+ self.analyzers = {
30
+ 'Python': self._analyze_python,
31
+ 'JavaScript': self._analyze_javascript,
32
+ 'TypeScript': self._analyze_typescript,
33
+ 'Java': self._analyze_java,
34
+ 'Go': self._analyze_go,
35
+ 'Rust': self._analyze_rust,
36
+ }
37
+
38
+ def analyze_repository(self, repo_path, languages):
39
+ """
40
+ Analyze code quality in a repository for the specified languages.
41
+
42
+ Args:
43
+ repo_path (str): The path to the repository.
44
+ languages (list): A list of programming languages to analyze.
45
+
46
+ Returns:
47
+ dict: A dictionary containing analysis results for each language.
48
+ """
49
+ logger.info(f"Analyzing repository at {repo_path} for languages: {languages}")
50
+
51
+ results = {}
52
+
53
+ for language in languages:
54
+ if language in self.analyzers:
55
+ try:
56
+ logger.info(f"Analyzing {language} code in {repo_path}")
57
+ results[language] = self.analyzers[language](repo_path)
58
+ except Exception as e:
59
+ logger.error(f"Error analyzing {language} code: {e}")
60
+ results[language] = {
61
+ 'status': 'error',
62
+ 'error': str(e),
63
+ 'issues': [],
64
+ }
65
+ else:
66
+ logger.warning(f"No analyzer available for {language}")
67
+ results[language] = {
68
+ 'status': 'not_supported',
69
+ 'message': f"Analysis for {language} is not supported yet.",
70
+ 'issues': [],
71
+ }
72
+
73
+ return results
74
+
75
+ def _analyze_python(self, repo_path):
76
+ """
77
+ Analyze Python code using pylint.
78
+
79
+ Args:
80
+ repo_path (str): The path to the repository.
81
+
82
+ Returns:
83
+ dict: Analysis results.
84
+ """
85
+ logger.info(f"Analyzing Python code in {repo_path}")
86
+
87
+ # Find Python files
88
+ python_files = []
89
+ for root, _, files in os.walk(repo_path):
90
+ for file in files:
91
+ if file.endswith('.py'):
92
+ python_files.append(os.path.join(root, file))
93
+
94
+ if not python_files:
95
+ return {
96
+ 'status': 'no_files',
97
+ 'message': 'No Python files found in the repository.',
98
+ 'issues': [],
99
+ }
100
+
101
+ # Create a temporary file to store pylint output
102
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:
103
+ temp_path = temp_file.name
104
+
105
+ try:
106
+ # Run pylint with JSON reporter
107
+ cmd = [
108
+ 'pylint',
109
+ '--output-format=json',
110
+ '--reports=n',
111
+ ] + python_files
112
+
113
+ process = subprocess.run(
114
+ cmd,
115
+ stdout=subprocess.PIPE,
116
+ stderr=subprocess.PIPE,
117
+ text=True,
118
+ check=False,
119
+ )
120
+
121
+ # Parse pylint output
122
+ if process.stdout.strip():
123
+ try:
124
+ issues = json.loads(process.stdout)
125
+ except json.JSONDecodeError:
126
+ logger.error(f"Error parsing pylint output: {process.stdout}")
127
+ issues = []
128
+ else:
129
+ issues = []
130
+
131
+ # Group issues by type
132
+ issues_by_type = defaultdict(list)
133
+ for issue in issues:
134
+ issue_type = issue.get('type', 'unknown')
135
+ issues_by_type[issue_type].append(issue)
136
+
137
+ return {
138
+ 'status': 'success',
139
+ 'issues': issues,
140
+ 'issues_by_type': dict(issues_by_type),
141
+ 'issue_count': len(issues),
142
+ 'files_analyzed': len(python_files),
143
+ }
144
+
145
+ except Exception as e:
146
+ logger.error(f"Error running pylint: {e}")
147
+ return {
148
+ 'status': 'error',
149
+ 'error': str(e),
150
+ 'issues': [],
151
+ }
152
+
153
+ finally:
154
+ # Clean up the temporary file
155
+ if os.path.exists(temp_path):
156
+ os.unlink(temp_path)
157
+
158
+ def _analyze_javascript(self, repo_path):
159
+ """
160
+ Analyze JavaScript code using ESLint.
161
+
162
+ Args:
163
+ repo_path (str): The path to the repository.
164
+
165
+ Returns:
166
+ dict: Analysis results.
167
+ """
168
+ logger.info(f"Analyzing JavaScript code in {repo_path}")
169
+
170
+ # Find JavaScript files
171
+ js_files = []
172
+ for root, _, files in os.walk(repo_path):
173
+ for file in files:
174
+ if file.endswith(('.js', '.jsx')) and not 'node_modules' in root:
175
+ js_files.append(os.path.join(root, file))
176
+
177
+ if not js_files:
178
+ return {
179
+ 'status': 'no_files',
180
+ 'message': 'No JavaScript files found in the repository.',
181
+ 'issues': [],
182
+ }
183
+
184
+ # Create a temporary ESLint configuration file
185
+ eslint_config = {
186
+ "env": {
187
+ "browser": True,
188
+ "es2021": True,
189
+ "node": True
190
+ },
191
+ "extends": "eslint:recommended",
192
+ "parserOptions": {
193
+ "ecmaVersion": 12,
194
+ "sourceType": "module",
195
+ "ecmaFeatures": {
196
+ "jsx": True
197
+ }
198
+ },
199
+ "rules": {}
200
+ }
201
+
202
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
203
+ json.dump(eslint_config, temp_config)
204
+ temp_config_path = temp_config.name
205
+
206
+ try:
207
+ # Run ESLint with JSON formatter
208
+ cmd = [
209
+ 'npx',
210
+ 'eslint',
211
+ '--config', temp_config_path,
212
+ '--format', 'json',
213
+ ] + js_files
214
+
215
+ process = subprocess.run(
216
+ cmd,
217
+ stdout=subprocess.PIPE,
218
+ stderr=subprocess.PIPE,
219
+ text=True,
220
+ check=False,
221
+ )
222
+
223
+ # Parse ESLint output
224
+ if process.stdout.strip():
225
+ try:
226
+ eslint_results = json.loads(process.stdout)
227
+
228
+ # Extract issues from ESLint results
229
+ issues = []
230
+ for result in eslint_results:
231
+ file_path = result.get('filePath', '')
232
+ for message in result.get('messages', []):
233
+ issues.append({
234
+ 'path': file_path,
235
+ 'line': message.get('line', 0),
236
+ 'column': message.get('column', 0),
237
+ 'message': message.get('message', ''),
238
+ 'severity': message.get('severity', 0),
239
+ 'ruleId': message.get('ruleId', ''),
240
+ })
241
+ except json.JSONDecodeError:
242
+ logger.error(f"Error parsing ESLint output: {process.stdout}")
243
+ issues = []
244
+ else:
245
+ issues = []
246
+
247
+ # Group issues by severity
248
+ issues_by_severity = defaultdict(list)
249
+ for issue in issues:
250
+ severity = issue.get('severity', 0)
251
+ severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown')
252
+ issues_by_severity[severity_name].append(issue)
253
+
254
+ return {
255
+ 'status': 'success',
256
+ 'issues': issues,
257
+ 'issues_by_severity': dict(issues_by_severity),
258
+ 'issue_count': len(issues),
259
+ 'files_analyzed': len(js_files),
260
+ }
261
+
262
+ except Exception as e:
263
+ logger.error(f"Error running ESLint: {e}")
264
+ return {
265
+ 'status': 'error',
266
+ 'error': str(e),
267
+ 'issues': [],
268
+ }
269
+
270
+ finally:
271
+ # Clean up the temporary configuration file
272
+ if os.path.exists(temp_config_path):
273
+ os.unlink(temp_config_path)
274
+
275
+ def _analyze_typescript(self, repo_path):
276
+ """
277
+ Analyze TypeScript code using ESLint and TSC.
278
+
279
+ Args:
280
+ repo_path (str): The path to the repository.
281
+
282
+ Returns:
283
+ dict: Analysis results.
284
+ """
285
+ logger.info(f"Analyzing TypeScript code in {repo_path}")
286
+
287
+ # Find TypeScript files
288
+ ts_files = []
289
+ for root, _, files in os.walk(repo_path):
290
+ for file in files:
291
+ if file.endswith(('.ts', '.tsx')) and not 'node_modules' in root:
292
+ ts_files.append(os.path.join(root, file))
293
+
294
+ if not ts_files:
295
+ return {
296
+ 'status': 'no_files',
297
+ 'message': 'No TypeScript files found in the repository.',
298
+ 'issues': [],
299
+ }
300
+
301
+ # Create a temporary ESLint configuration file for TypeScript
302
+ eslint_config = {
303
+ "env": {
304
+ "browser": True,
305
+ "es2021": True,
306
+ "node": True
307
+ },
308
+ "extends": [
309
+ "eslint:recommended",
310
+ "plugin:@typescript-eslint/recommended"
311
+ ],
312
+ "parser": "@typescript-eslint/parser",
313
+ "parserOptions": {
314
+ "ecmaVersion": 12,
315
+ "sourceType": "module",
316
+ "ecmaFeatures": {
317
+ "jsx": True
318
+ }
319
+ },
320
+ "plugins": [
321
+ "@typescript-eslint"
322
+ ],
323
+ "rules": {}
324
+ }
325
+
326
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
327
+ json.dump(eslint_config, temp_config)
328
+ temp_config_path = temp_config.name
329
+
330
+ # Create a temporary tsconfig.json file
331
+ tsconfig = {
332
+ "compilerOptions": {
333
+ "target": "es2020",
334
+ "module": "commonjs",
335
+ "strict": True,
336
+ "esModuleInterop": True,
337
+ "skipLibCheck": True,
338
+ "forceConsistentCasingInFileNames": True,
339
+ "noEmit": True
340
+ },
341
+ "include": ts_files
342
+ }
343
+
344
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_tsconfig:
345
+ json.dump(tsconfig, temp_tsconfig)
346
+ temp_tsconfig_path = temp_tsconfig.name
347
+
348
+ try:
349
+ # Run ESLint with TypeScript support
350
+ eslint_cmd = [
351
+ 'npx',
352
+ 'eslint',
353
+ '--config', temp_config_path,
354
+ '--format', 'json',
355
+ '--ext', '.ts,.tsx',
356
+ ] + ts_files
357
+
358
+ eslint_process = subprocess.run(
359
+ eslint_cmd,
360
+ stdout=subprocess.PIPE,
361
+ stderr=subprocess.PIPE,
362
+ text=True,
363
+ check=False,
364
+ )
365
+
366
+ # Parse ESLint output
367
+ eslint_issues = []
368
+ if eslint_process.stdout.strip():
369
+ try:
370
+ eslint_results = json.loads(eslint_process.stdout)
371
+
372
+ # Extract issues from ESLint results
373
+ for result in eslint_results:
374
+ file_path = result.get('filePath', '')
375
+ for message in result.get('messages', []):
376
+ eslint_issues.append({
377
+ 'path': file_path,
378
+ 'line': message.get('line', 0),
379
+ 'column': message.get('column', 0),
380
+ 'message': message.get('message', ''),
381
+ 'severity': message.get('severity', 0),
382
+ 'ruleId': message.get('ruleId', ''),
383
+ 'source': 'eslint',
384
+ })
385
+ except json.JSONDecodeError:
386
+ logger.error(f"Error parsing ESLint output: {eslint_process.stdout}")
387
+
388
+ # Run TypeScript compiler for type checking
389
+ tsc_cmd = [
390
+ 'npx',
391
+ 'tsc',
392
+ '--project', temp_tsconfig_path,
393
+ '--noEmit',
394
+ ]
395
+
396
+ tsc_process = subprocess.run(
397
+ tsc_cmd,
398
+ stdout=subprocess.PIPE,
399
+ stderr=subprocess.PIPE,
400
+ text=True,
401
+ check=False,
402
+ )
403
+
404
+ # Parse TSC output
405
+ tsc_issues = []
406
+ if tsc_process.stderr.strip():
407
+ # TSC error format: file.ts(line,col): error TS2551: message
408
+ for line in tsc_process.stderr.splitlines():
409
+ if ': error ' in line or ': warning ' in line:
410
+ try:
411
+ file_info, error_info = line.split(':', 1)
412
+ file_path, line_col = file_info.rsplit('(', 1)
413
+ line_num, col_num = line_col.rstrip(')').split(',')
414
+
415
+ error_type, error_message = error_info.split(':', 1)
416
+ error_type = error_type.strip()
417
+ error_message = error_message.strip()
418
+
419
+ tsc_issues.append({
420
+ 'path': file_path,
421
+ 'line': int(line_num),
422
+ 'column': int(col_num),
423
+ 'message': error_message,
424
+ 'severity': 2 if 'error' in error_type else 1,
425
+ 'ruleId': error_type,
426
+ 'source': 'tsc',
427
+ })
428
+ except Exception as e:
429
+ logger.warning(f"Error parsing TSC output line: {line}, error: {e}")
430
+
431
+ # Combine issues from both tools
432
+ all_issues = eslint_issues + tsc_issues
433
+
434
+ # Group issues by source and severity
435
+ issues_by_source = defaultdict(list)
436
+ issues_by_severity = defaultdict(list)
437
+
438
+ for issue in all_issues:
439
+ source = issue.get('source', 'unknown')
440
+ issues_by_source[source].append(issue)
441
+
442
+ severity = issue.get('severity', 0)
443
+ severity_name = {0: 'off', 1: 'warning', 2: 'error'}.get(severity, 'unknown')
444
+ issues_by_severity[severity_name].append(issue)
445
+
446
+ return {
447
+ 'status': 'success',
448
+ 'issues': all_issues,
449
+ 'issues_by_source': dict(issues_by_source),
450
+ 'issues_by_severity': dict(issues_by_severity),
451
+ 'issue_count': len(all_issues),
452
+ 'files_analyzed': len(ts_files),
453
+ }
454
+
455
+ except Exception as e:
456
+ logger.error(f"Error analyzing TypeScript code: {e}")
457
+ return {
458
+ 'status': 'error',
459
+ 'error': str(e),
460
+ 'issues': [],
461
+ }
462
+
463
+ finally:
464
+ # Clean up temporary files
465
+ for temp_file in [temp_config_path, temp_tsconfig_path]:
466
+ if os.path.exists(temp_file):
467
+ os.unlink(temp_file)
468
+
469
+ def _analyze_java(self, repo_path):
470
+ """
471
+ Analyze Java code using PMD.
472
+
473
+ Args:
474
+ repo_path (str): The path to the repository.
475
+
476
+ Returns:
477
+ dict: Analysis results.
478
+ """
479
+ logger.info(f"Analyzing Java code in {repo_path}")
480
+
481
+ # Find Java files
482
+ java_files = []
483
+ for root, _, files in os.walk(repo_path):
484
+ for file in files:
485
+ if file.endswith('.java'):
486
+ java_files.append(os.path.join(root, file))
487
+
488
+ if not java_files:
489
+ return {
490
+ 'status': 'no_files',
491
+ 'message': 'No Java files found in the repository.',
492
+ 'issues': [],
493
+ }
494
+
495
+ # Create a temporary file to store PMD output
496
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_file:
497
+ temp_path = temp_file.name
498
+
499
+ try:
500
+ # Run PMD with JSON reporter
501
+ cmd = [
502
+ 'pmd',
503
+ 'check',
504
+ '--dir', repo_path,
505
+ '--format', 'json',
506
+ '--rulesets', 'category/java/bestpractices.xml,category/java/codestyle.xml,category/java/design.xml,category/java/errorprone.xml,category/java/multithreading.xml,category/java/performance.xml,category/java/security.xml',
507
+ ]
508
+
509
+ process = subprocess.run(
510
+ cmd,
511
+ stdout=subprocess.PIPE,
512
+ stderr=subprocess.PIPE,
513
+ text=True,
514
+ check=False,
515
+ )
516
+
517
+ # Parse PMD output
518
+ if process.stdout.strip():
519
+ try:
520
+ pmd_results = json.loads(process.stdout)
521
+
522
+ # Extract issues from PMD results
523
+ issues = []
524
+ for file_result in pmd_results.get('files', []):
525
+ file_path = file_result.get('filename', '')
526
+ for violation in file_result.get('violations', []):
527
+ issues.append({
528
+ 'path': file_path,
529
+ 'line': violation.get('beginline', 0),
530
+ 'endLine': violation.get('endline', 0),
531
+ 'column': violation.get('begincolumn', 0),
532
+ 'endColumn': violation.get('endcolumn', 0),
533
+ 'message': violation.get('description', ''),
534
+ 'rule': violation.get('rule', ''),
535
+ 'ruleset': violation.get('ruleset', ''),
536
+ 'priority': violation.get('priority', 0),
537
+ })
538
+ except json.JSONDecodeError:
539
+ logger.error(f"Error parsing PMD output: {process.stdout}")
540
+ issues = []
541
+ else:
542
+ issues = []
543
+
544
+ # Group issues by ruleset
545
+ issues_by_ruleset = defaultdict(list)
546
+ for issue in issues:
547
+ ruleset = issue.get('ruleset', 'unknown')
548
+ issues_by_ruleset[ruleset].append(issue)
549
+
550
+ return {
551
+ 'status': 'success',
552
+ 'issues': issues,
553
+ 'issues_by_ruleset': dict(issues_by_ruleset),
554
+ 'issue_count': len(issues),
555
+ 'files_analyzed': len(java_files),
556
+ }
557
+
558
+ except Exception as e:
559
+ logger.error(f"Error running PMD: {e}")
560
+ return {
561
+ 'status': 'error',
562
+ 'error': str(e),
563
+ 'issues': [],
564
+ }
565
+
566
+ finally:
567
+ # Clean up the temporary file
568
+ if os.path.exists(temp_path):
569
+ os.unlink(temp_path)
570
+
571
+ def _analyze_go(self, repo_path):
572
+ """
573
+ Analyze Go code using golangci-lint.
574
+
575
+ Args:
576
+ repo_path (str): The path to the repository.
577
+
578
+ Returns:
579
+ dict: Analysis results.
580
+ """
581
+ logger.info(f"Analyzing Go code in {repo_path}")
582
+
583
+ # Find Go files
584
+ go_files = []
585
+ for root, _, files in os.walk(repo_path):
586
+ for file in files:
587
+ if file.endswith('.go'):
588
+ go_files.append(os.path.join(root, file))
589
+
590
+ if not go_files:
591
+ return {
592
+ 'status': 'no_files',
593
+ 'message': 'No Go files found in the repository.',
594
+ 'issues': [],
595
+ }
596
+
597
+ try:
598
+ # Run golangci-lint with JSON output
599
+ cmd = [
600
+ 'golangci-lint',
601
+ 'run',
602
+ '--out-format=json',
603
+ repo_path,
604
+ ]
605
+
606
+ process = subprocess.run(
607
+ cmd,
608
+ stdout=subprocess.PIPE,
609
+ stderr=subprocess.PIPE,
610
+ text=True,
611
+ check=False,
612
+ cwd=repo_path, # Run in the repository directory
613
+ )
614
+
615
+ # Parse golangci-lint output
616
+ if process.stdout.strip():
617
+ try:
618
+ lint_results = json.loads(process.stdout)
619
+
620
+ # Extract issues from golangci-lint results
621
+ issues = []
622
+ for issue in lint_results.get('Issues', []):
623
+ issues.append({
624
+ 'path': issue.get('Pos', {}).get('Filename', ''),
625
+ 'line': issue.get('Pos', {}).get('Line', 0),
626
+ 'column': issue.get('Pos', {}).get('Column', 0),
627
+ 'message': issue.get('Text', ''),
628
+ 'linter': issue.get('FromLinter', ''),
629
+ 'severity': 'error' if issue.get('Severity', '') == 'error' else 'warning',
630
+ })
631
+ except json.JSONDecodeError:
632
+ logger.error(f"Error parsing golangci-lint output: {process.stdout}")
633
+ issues = []
634
+ else:
635
+ issues = []
636
+
637
+ # Group issues by linter
638
+ issues_by_linter = defaultdict(list)
639
+ for issue in issues:
640
+ linter = issue.get('linter', 'unknown')
641
+ issues_by_linter[linter].append(issue)
642
+
643
+ return {
644
+ 'status': 'success',
645
+ 'issues': issues,
646
+ 'issues_by_linter': dict(issues_by_linter),
647
+ 'issue_count': len(issues),
648
+ 'files_analyzed': len(go_files),
649
+ }
650
+
651
+ except Exception as e:
652
+ logger.error(f"Error running golangci-lint: {e}")
653
+ return {
654
+ 'status': 'error',
655
+ 'error': str(e),
656
+ 'issues': [],
657
+ }
658
+
659
+ def _analyze_rust(self, repo_path):
660
+ """
661
+ Analyze Rust code using clippy.
662
+
663
+ Args:
664
+ repo_path (str): The path to the repository.
665
+
666
+ Returns:
667
+ dict: Analysis results.
668
+ """
669
+ logger.info(f"Analyzing Rust code in {repo_path}")
670
+
671
+ # Find Rust files
672
+ rust_files = []
673
+ for root, _, files in os.walk(repo_path):
674
+ for file in files:
675
+ if file.endswith('.rs'):
676
+ rust_files.append(os.path.join(root, file))
677
+
678
+ if not rust_files:
679
+ return {
680
+ 'status': 'no_files',
681
+ 'message': 'No Rust files found in the repository.',
682
+ 'issues': [],
683
+ }
684
+
685
+ try:
686
+ # Run clippy with JSON output
687
+ cmd = [
688
+ 'cargo',
689
+ 'clippy',
690
+ '--message-format=json',
691
+ ]
692
+
693
+ process = subprocess.run(
694
+ cmd,
695
+ stdout=subprocess.PIPE,
696
+ stderr=subprocess.PIPE,
697
+ text=True,
698
+ check=False,
699
+ cwd=repo_path, # Run in the repository directory
700
+ )
701
+
702
+ # Parse clippy output
703
+ issues = []
704
+ if process.stdout.strip():
705
+ for line in process.stdout.splitlines():
706
+ try:
707
+ message = json.loads(line)
708
+ if message.get('reason') == 'compiler-message':
709
+ msg = message.get('message', {})
710
+ spans = msg.get('spans', [])
711
+
712
+ if spans:
713
+ primary_span = next((s for s in spans if s.get('is_primary')), spans[0])
714
+ file_path = primary_span.get('file_name', '')
715
+ line_num = primary_span.get('line_start', 0)
716
+ column = primary_span.get('column_start', 0)
717
+
718
+ issues.append({
719
+ 'path': file_path,
720
+ 'line': line_num,
721
+ 'column': column,
722
+ 'message': msg.get('message', ''),
723
+ 'level': msg.get('level', ''),
724
+ 'code': msg.get('code', {}).get('code', ''),
725
+ })
726
+ except json.JSONDecodeError:
727
+ continue
728
+
729
+ # Group issues by level
730
+ issues_by_level = defaultdict(list)
731
+ for issue in issues:
732
+ level = issue.get('level', 'unknown')
733
+ issues_by_level[level].append(issue)
734
+
735
+ return {
736
+ 'status': 'success',
737
+ 'issues': issues,
738
+ 'issues_by_level': dict(issues_by_level),
739
+ 'issue_count': len(issues),
740
+ 'files_analyzed': len(rust_files),
741
+ }
742
+
743
+ except Exception as e:
744
+ logger.error(f"Error running clippy: {e}")
745
+ return {
746
+ 'status': 'error',
747
+ 'error': str(e),
748
+ 'issues': [],
749
+ }
src/services/performance_analyzer.py ADDED
@@ -0,0 +1,750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Performance Analyzer Service
6
+
7
+ This module provides functionality for analyzing code performance across different languages.
8
+ """
9
+
10
+ import os
11
+ import re
12
+ import logging
13
+ from collections import defaultdict
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class PerformanceAnalyzer:
18
+ """
19
+ Service for analyzing code performance across different languages.
20
+ """
21
+
22
+ def __init__(self):
23
+ """
24
+ Initialize the PerformanceAnalyzer.
25
+ """
26
+ logger.info("Initialized PerformanceAnalyzer")
27
+ self.analyzers = {
28
+ 'Python': self._analyze_python_performance,
29
+ 'JavaScript': self._analyze_javascript_performance,
30
+ 'TypeScript': self._analyze_typescript_performance,
31
+ 'Java': self._analyze_java_performance,
32
+ 'Go': self._analyze_go_performance,
33
+ 'Rust': self._analyze_rust_performance,
34
+ }
35
+
36
+ # Initialize performance patterns for different languages
37
+ self._init_performance_patterns()
38
+
39
+ def _init_performance_patterns(self):
40
+ """
41
+ Initialize performance patterns for different languages.
42
+ """
43
+ # Python performance patterns
44
+ self.python_patterns = [
45
+ {
46
+ 'name': 'Inefficient list comprehension',
47
+ 'pattern': r'\[.*?for.*?in.*?for.*?in.*?\]',
48
+ 'severity': 'medium',
49
+ 'description': 'Nested list comprehensions can be inefficient for large datasets.',
50
+ 'suggestion': 'Consider using itertools or breaking into separate operations.',
51
+ },
52
+ {
53
+ 'name': 'String concatenation in loop',
54
+ 'pattern': r'for.*?\+\=\s*[\'\"](.*?)[\'\"]',
55
+ 'severity': 'medium',
56
+ 'description': 'String concatenation in loops is inefficient in Python.',
57
+ 'suggestion': 'Use string join() or a list of strings with join() at the end.',
58
+ },
59
+ {
60
+ 'name': 'Global variable in loop',
61
+ 'pattern': r'global\s+\w+.*?for\s+\w+\s+in',
62
+ 'severity': 'medium',
63
+ 'description': 'Modifying global variables in loops can be inefficient.',
64
+ 'suggestion': 'Use local variables and return values instead.',
65
+ },
66
+ {
67
+ 'name': 'Inefficient dict/list access in loop',
68
+ 'pattern': r'for.*?in.*?:\s*.*?\[.*?\]\s*=',
69
+ 'severity': 'medium',
70
+ 'description': 'Repeatedly accessing dictionary or list elements in a loop can be inefficient.',
71
+ 'suggestion': 'Consider using a local variable to store the accessed element.',
72
+ },
73
+ {
74
+ 'name': 'Using range(len())',
75
+ 'pattern': r'for\s+\w+\s+in\s+range\(len\(',
76
+ 'severity': 'low',
77
+ 'description': 'Using range(len()) is less readable than using enumerate().',
78
+ 'suggestion': 'Use enumerate() instead of range(len()).',
79
+ },
80
+ {
81
+ 'name': 'Inefficient regular expression',
82
+ 'pattern': r're\.compile\([\'\"].*?[\+\*].*?[\'\"]\)',
83
+ 'severity': 'medium',
84
+ 'description': 'Complex regular expressions can be inefficient.',
85
+ 'suggestion': 'Simplify the regular expression or use more specific patterns.',
86
+ },
87
+ {
88
+ 'name': 'Large memory allocation',
89
+ 'pattern': r'\[.*?for.*?in\s+range\(\d{7,}\)\]',
90
+ 'severity': 'high',
91
+ 'description': 'Creating large lists in memory can cause performance issues.',
92
+ 'suggestion': 'Use generators or iterators instead of creating large lists.',
93
+ },
94
+ {
95
+ 'name': 'Inefficient database query in loop',
96
+ 'pattern': r'for.*?in.*?:\s*.*?\.execute\(',
97
+ 'severity': 'high',
98
+ 'description': 'Executing database queries in a loop can be very inefficient.',
99
+ 'suggestion': 'Use batch operations or join queries instead of querying in a loop.',
100
+ },
101
+ ]
102
+
103
+ # JavaScript performance patterns
104
+ self.javascript_patterns = [
105
+ {
106
+ 'name': 'DOM manipulation in loop',
107
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?document\..*?\}',
108
+ 'severity': 'high',
109
+ 'description': 'Manipulating the DOM inside loops can cause performance issues.',
110
+ 'suggestion': 'Batch DOM updates or use DocumentFragment.',
111
+ },
112
+ {
113
+ 'name': 'Inefficient array manipulation',
114
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?splice\(.*?\}',
115
+ 'severity': 'medium',
116
+ 'description': 'Using splice() in loops can be inefficient for large arrays.',
117
+ 'suggestion': 'Consider using filter() or other array methods.',
118
+ },
119
+ {
120
+ 'name': 'Creating functions in loops',
121
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?function\s*\(.*?\)\s*\{.*?\}.*?\}',
122
+ 'severity': 'medium',
123
+ 'description': 'Creating functions inside loops can lead to performance issues.',
124
+ 'suggestion': 'Define the function outside the loop and reference it.',
125
+ },
126
+ {
127
+ 'name': 'Inefficient string concatenation',
128
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?\+\=\s*[\'\"](.*?)[\'\"].*?\}',
129
+ 'severity': 'medium',
130
+ 'description': 'String concatenation in loops can be inefficient.',
131
+ 'suggestion': 'Use array join() or template literals.',
132
+ },
133
+ {
134
+ 'name': 'Using eval()',
135
+ 'pattern': r'eval\(',
136
+ 'severity': 'high',
137
+ 'description': 'Using eval() is slow and can introduce security vulnerabilities.',
138
+ 'suggestion': 'Avoid using eval() and use safer alternatives.',
139
+ },
140
+ {
141
+ 'name': 'Inefficient event handling',
142
+ 'pattern': r'addEventListener\([\'\"].*?[\'\"],\s*function',
143
+ 'severity': 'medium',
144
+ 'description': 'Anonymous functions in event listeners can lead to memory leaks.',
145
+ 'suggestion': 'Use named functions for event handlers to allow proper cleanup.',
146
+ },
147
+ ]
148
+
149
+ # TypeScript performance patterns (extends JavaScript patterns)
150
+ self.typescript_patterns = self.javascript_patterns + [
151
+ {
152
+ 'name': 'Inefficient type assertion',
153
+ 'pattern': r'<.*?>\s*\(.*?\)',
154
+ 'severity': 'low',
155
+ 'description': 'Excessive type assertions can impact runtime performance.',
156
+ 'suggestion': 'Use proper typing and interfaces instead of frequent type assertions.',
157
+ },
158
+ {
159
+ 'name': 'Complex type definitions',
160
+ 'pattern': r'type\s+\w+\s*=\s*\{[^\}]{500,}\}',
161
+ 'severity': 'medium',
162
+ 'description': 'Overly complex type definitions can slow down the TypeScript compiler.',
163
+ 'suggestion': 'Break complex types into smaller, reusable interfaces.',
164
+ },
165
+ ]
166
+
167
+ # Java performance patterns
168
+ self.java_patterns = [
169
+ {
170
+ 'name': 'Inefficient string concatenation',
171
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?\+\=\s*[\'\"](.*?)[\'\"].*?\}',
172
+ 'severity': 'medium',
173
+ 'description': 'String concatenation in loops is inefficient in Java.',
174
+ 'suggestion': 'Use StringBuilder or StringBuffer instead.',
175
+ },
176
+ {
177
+ 'name': 'Creating objects in loops',
178
+ 'pattern': r'for\s*\(.*?\)\s*\{.*?new\s+\w+\(.*?\).*?\}',
179
+ 'severity': 'medium',
180
+ 'description': 'Creating objects inside loops can lead to excessive garbage collection.',
181
+ 'suggestion': 'Create objects outside the loop or use object pooling.',
182
+ },
183
+ {
184
+ 'name': 'Inefficient collection iteration',
185
+ 'pattern': r'for\s*\(int\s+i\s*=\s*0.*?i\s*<\s*\w+\.size\(\).*?\)',
186
+ 'severity': 'low',
187
+ 'description': 'Calling size() in each iteration can be inefficient for some collections.',
188
+ 'suggestion': 'Store the size in a variable before the loop.',
189
+ },
190
+ {
191
+ 'name': 'Using boxed primitives in performance-critical code',
192
+ 'pattern': r'(Integer|Boolean|Double|Float|Long)\s+\w+\s*=',
193
+ 'severity': 'low',
194
+ 'description': 'Using boxed primitives can be less efficient than primitive types.',
195
+ 'suggestion': 'Use primitive types (int, boolean, etc.) in performance-critical code.',
196
+ },
197
+ {
198
+ 'name': 'Inefficient exception handling',
199
+ 'pattern': r'try\s*\{.*?\}\s*catch\s*\(Exception\s+\w+\)\s*\{',
200
+ 'severity': 'medium',
201
+ 'description': 'Catching generic exceptions can hide issues and impact performance.',
202
+ 'suggestion': 'Catch specific exceptions and handle them appropriately.',
203
+ },
204
+ ]
205
+
206
+ # Go performance patterns
207
+ self.go_patterns = [
208
+ {
209
+ 'name': 'Inefficient string concatenation',
210
+ 'pattern': r'for\s+.*?\{.*?\+\=\s*[\'\"](.*?)[\'\"].*?\}',
211
+ 'severity': 'medium',
212
+ 'description': 'String concatenation in loops can be inefficient.',
213
+ 'suggestion': 'Use strings.Builder for string concatenation in loops.',
214
+ },
215
+ {
216
+ 'name': 'Inefficient slice operations',
217
+ 'pattern': r'for\s+.*?\{.*?append\(.*?\}',
218
+ 'severity': 'medium',
219
+ 'description': 'Repeatedly appending to a slice can cause multiple allocations.',
220
+ 'suggestion': 'Pre-allocate slices with make() when the size is known.',
221
+ },
222
+ {
223
+ 'name': 'Mutex in hot path',
224
+ 'pattern': r'func\s+\(.*?\)\s+\w+\(.*?\)\s+\{.*?Lock\(\).*?Unlock\(\)',
225
+ 'severity': 'medium',
226
+ 'description': 'Using mutexes in frequently called functions can impact performance.',
227
+ 'suggestion': 'Consider using atomic operations or redesigning for less contention.',
228
+ },
229
+ {
230
+ 'name': 'Inefficient map iteration',
231
+ 'pattern': r'for\s+\w+,\s*_\s*:=\s*range',
232
+ 'severity': 'low',
233
+ 'description': 'Iterating over maps when only keys are needed can be inefficient.',
234
+ 'suggestion': 'Use a slice for ordered data when possible.',
235
+ },
236
+ ]
237
+
238
+ # Rust performance patterns
239
+ self.rust_patterns = [
240
+ {
241
+ 'name': 'Inefficient string operations',
242
+ 'pattern': r'for\s+.*?\{.*?\.push_str\(.*?\}',
243
+ 'severity': 'medium',
244
+ 'description': 'Repeatedly pushing to strings can be inefficient.',
245
+ 'suggestion': 'Use string concatenation with the format! macro or String::with_capacity().',
246
+ },
247
+ {
248
+ 'name': 'Excessive cloning',
249
+ 'pattern': r'\.clone\(\)',
250
+ 'severity': 'medium',
251
+ 'description': 'Excessive cloning can impact performance.',
252
+ 'suggestion': 'Use references or ownership transfer where possible.',
253
+ },
254
+ {
255
+ 'name': 'Inefficient vector operations',
256
+ 'pattern': r'for\s+.*?\{.*?\.push\(.*?\}',
257
+ 'severity': 'medium',
258
+ 'description': 'Repeatedly pushing to vectors can cause multiple allocations.',
259
+ 'suggestion': 'Pre-allocate vectors with Vec::with_capacity() when the size is known.',
260
+ },
261
+ {
262
+ 'name': 'Box allocation in loops',
263
+ 'pattern': r'for\s+.*?\{.*?Box::new\(.*?\}',
264
+ 'severity': 'medium',
265
+ 'description': 'Allocating boxes in loops can be inefficient.',
266
+ 'suggestion': 'Allocate memory outside the loop when possible.',
267
+ },
268
+ ]
269
+
270
+ def analyze_repository(self, repo_path, languages):
271
+ """
272
+ Analyze code performance in a repository for the specified languages.
273
+
274
+ Args:
275
+ repo_path (str): The path to the repository.
276
+ languages (list): A list of programming languages to analyze.
277
+
278
+ Returns:
279
+ dict: A dictionary containing performance analysis results for each language.
280
+ """
281
+ logger.info(f"Analyzing performance in repository at {repo_path} for languages: {languages}")
282
+
283
+ results = {}
284
+
285
+ for language in languages:
286
+ if language in self.analyzers:
287
+ try:
288
+ logger.info(f"Analyzing {language} code performance in {repo_path}")
289
+ results[language] = self.analyzers[language](repo_path)
290
+ except Exception as e:
291
+ logger.error(f"Error analyzing {language} code performance: {e}")
292
+ results[language] = {
293
+ 'status': 'error',
294
+ 'error': str(e),
295
+ 'issues': [],
296
+ }
297
+ else:
298
+ logger.warning(f"No performance analyzer available for {language}")
299
+ results[language] = {
300
+ 'status': 'not_supported',
301
+ 'message': f"Performance analysis for {language} is not supported yet.",
302
+ 'issues': [],
303
+ }
304
+
305
+ # Identify hotspots (files with multiple performance issues)
306
+ hotspots = self._identify_hotspots(results)
307
+
308
+ return {
309
+ 'language_results': results,
310
+ 'hotspots': hotspots,
311
+ }
312
+
313
+ def _identify_hotspots(self, results):
314
+ """
315
+ Identify performance hotspots across all languages.
316
+
317
+ Args:
318
+ results (dict): Performance analysis results for each language.
319
+
320
+ Returns:
321
+ list: A list of hotspot files with multiple performance issues.
322
+ """
323
+ # Count issues per file across all languages
324
+ file_issue_count = defaultdict(int)
325
+ file_issues = defaultdict(list)
326
+
327
+ for language, language_result in results.items():
328
+ for issue in language_result.get('issues', []):
329
+ file_path = issue.get('file', '')
330
+ if file_path:
331
+ file_issue_count[file_path] += 1
332
+ file_issues[file_path].append(issue)
333
+
334
+ # Identify hotspots (files with multiple issues)
335
+ hotspots = []
336
+ for file_path, count in sorted(file_issue_count.items(), key=lambda x: x[1], reverse=True):
337
+ if count >= 2: # Files with at least 2 issues are considered hotspots
338
+ hotspots.append({
339
+ 'file': file_path,
340
+ 'issue_count': count,
341
+ 'issues': file_issues[file_path],
342
+ })
343
+
344
+ return hotspots[:10] # Return top 10 hotspots
345
+
346
+ def _analyze_python_performance(self, repo_path):
347
+ """
348
+ Analyze Python code for performance issues.
349
+
350
+ Args:
351
+ repo_path (str): The path to the repository.
352
+
353
+ Returns:
354
+ dict: Performance analysis results for Python code.
355
+ """
356
+ logger.info(f"Analyzing Python code performance in {repo_path}")
357
+
358
+ # Find Python files
359
+ python_files = []
360
+ for root, _, files in os.walk(repo_path):
361
+ for file in files:
362
+ if file.endswith('.py'):
363
+ python_files.append(os.path.join(root, file))
364
+
365
+ if not python_files:
366
+ return {
367
+ 'status': 'no_files',
368
+ 'message': 'No Python files found in the repository.',
369
+ 'issues': [],
370
+ }
371
+
372
+ # Analyze each Python file
373
+ issues = []
374
+ for file_path in python_files:
375
+ try:
376
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
377
+ content = f.read()
378
+
379
+ # Check for performance patterns
380
+ for pattern in self.python_patterns:
381
+ matches = re.finditer(pattern['pattern'], content)
382
+ for match in matches:
383
+ line_number = content[:match.start()].count('\n') + 1
384
+ code_snippet = match.group(0)
385
+
386
+ issues.append({
387
+ 'file': file_path,
388
+ 'line': line_number,
389
+ 'code': code_snippet,
390
+ 'issue': pattern['name'],
391
+ 'description': pattern['description'],
392
+ 'suggestion': pattern['suggestion'],
393
+ 'severity': pattern['severity'],
394
+ 'language': 'Python',
395
+ })
396
+ except Exception as e:
397
+ logger.error(f"Error analyzing Python file {file_path}: {e}")
398
+
399
+ # Group issues by severity
400
+ issues_by_severity = defaultdict(list)
401
+ for issue in issues:
402
+ severity = issue.get('severity', 'unknown')
403
+ issues_by_severity[severity].append(issue)
404
+
405
+ return {
406
+ 'status': 'success',
407
+ 'issues': issues,
408
+ 'issues_by_severity': dict(issues_by_severity),
409
+ 'issue_count': len(issues),
410
+ 'files_analyzed': len(python_files),
411
+ }
412
+
413
+ def _analyze_javascript_performance(self, repo_path):
414
+ """
415
+ Analyze JavaScript code for performance issues.
416
+
417
+ Args:
418
+ repo_path (str): The path to the repository.
419
+
420
+ Returns:
421
+ dict: Performance analysis results for JavaScript code.
422
+ """
423
+ logger.info(f"Analyzing JavaScript code performance in {repo_path}")
424
+
425
+ # Find JavaScript files
426
+ js_files = []
427
+ for root, _, files in os.walk(repo_path):
428
+ if 'node_modules' in root:
429
+ continue
430
+ for file in files:
431
+ if file.endswith(('.js', '.jsx')):
432
+ js_files.append(os.path.join(root, file))
433
+
434
+ if not js_files:
435
+ return {
436
+ 'status': 'no_files',
437
+ 'message': 'No JavaScript files found in the repository.',
438
+ 'issues': [],
439
+ }
440
+
441
+ # Analyze each JavaScript file
442
+ issues = []
443
+ for file_path in js_files:
444
+ try:
445
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
446
+ content = f.read()
447
+
448
+ # Check for performance patterns
449
+ for pattern in self.javascript_patterns:
450
+ matches = re.finditer(pattern['pattern'], content)
451
+ for match in matches:
452
+ line_number = content[:match.start()].count('\n') + 1
453
+ code_snippet = match.group(0)
454
+
455
+ issues.append({
456
+ 'file': file_path,
457
+ 'line': line_number,
458
+ 'code': code_snippet,
459
+ 'issue': pattern['name'],
460
+ 'description': pattern['description'],
461
+ 'suggestion': pattern['suggestion'],
462
+ 'severity': pattern['severity'],
463
+ 'language': 'JavaScript',
464
+ })
465
+ except Exception as e:
466
+ logger.error(f"Error analyzing JavaScript file {file_path}: {e}")
467
+
468
+ # Group issues by severity
469
+ issues_by_severity = defaultdict(list)
470
+ for issue in issues:
471
+ severity = issue.get('severity', 'unknown')
472
+ issues_by_severity[severity].append(issue)
473
+
474
+ return {
475
+ 'status': 'success',
476
+ 'issues': issues,
477
+ 'issues_by_severity': dict(issues_by_severity),
478
+ 'issue_count': len(issues),
479
+ 'files_analyzed': len(js_files),
480
+ }
481
+
482
+ def _analyze_typescript_performance(self, repo_path):
483
+ """
484
+ Analyze TypeScript code for performance issues.
485
+
486
+ Args:
487
+ repo_path (str): The path to the repository.
488
+
489
+ Returns:
490
+ dict: Performance analysis results for TypeScript code.
491
+ """
492
+ logger.info(f"Analyzing TypeScript code performance in {repo_path}")
493
+
494
+ # Find TypeScript files
495
+ ts_files = []
496
+ for root, _, files in os.walk(repo_path):
497
+ if 'node_modules' in root:
498
+ continue
499
+ for file in files:
500
+ if file.endswith(('.ts', '.tsx')):
501
+ ts_files.append(os.path.join(root, file))
502
+
503
+ if not ts_files:
504
+ return {
505
+ 'status': 'no_files',
506
+ 'message': 'No TypeScript files found in the repository.',
507
+ 'issues': [],
508
+ }
509
+
510
+ # Analyze each TypeScript file
511
+ issues = []
512
+ for file_path in ts_files:
513
+ try:
514
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
515
+ content = f.read()
516
+
517
+ # Check for performance patterns
518
+ for pattern in self.typescript_patterns:
519
+ matches = re.finditer(pattern['pattern'], content)
520
+ for match in matches:
521
+ line_number = content[:match.start()].count('\n') + 1
522
+ code_snippet = match.group(0)
523
+
524
+ issues.append({
525
+ 'file': file_path,
526
+ 'line': line_number,
527
+ 'code': code_snippet,
528
+ 'issue': pattern['name'],
529
+ 'description': pattern['description'],
530
+ 'suggestion': pattern['suggestion'],
531
+ 'severity': pattern['severity'],
532
+ 'language': 'TypeScript',
533
+ })
534
+ except Exception as e:
535
+ logger.error(f"Error analyzing TypeScript file {file_path}: {e}")
536
+
537
+ # Group issues by severity
538
+ issues_by_severity = defaultdict(list)
539
+ for issue in issues:
540
+ severity = issue.get('severity', 'unknown')
541
+ issues_by_severity[severity].append(issue)
542
+
543
+ return {
544
+ 'status': 'success',
545
+ 'issues': issues,
546
+ 'issues_by_severity': dict(issues_by_severity),
547
+ 'issue_count': len(issues),
548
+ 'files_analyzed': len(ts_files),
549
+ }
550
+
551
+ def _analyze_java_performance(self, repo_path):
552
+ """
553
+ Analyze Java code for performance issues.
554
+
555
+ Args:
556
+ repo_path (str): The path to the repository.
557
+
558
+ Returns:
559
+ dict: Performance analysis results for Java code.
560
+ """
561
+ logger.info(f"Analyzing Java code performance in {repo_path}")
562
+
563
+ # Find Java files
564
+ java_files = []
565
+ for root, _, files in os.walk(repo_path):
566
+ for file in files:
567
+ if file.endswith('.java'):
568
+ java_files.append(os.path.join(root, file))
569
+
570
+ if not java_files:
571
+ return {
572
+ 'status': 'no_files',
573
+ 'message': 'No Java files found in the repository.',
574
+ 'issues': [],
575
+ }
576
+
577
+ # Analyze each Java file
578
+ issues = []
579
+ for file_path in java_files:
580
+ try:
581
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
582
+ content = f.read()
583
+
584
+ # Check for performance patterns
585
+ for pattern in self.java_patterns:
586
+ matches = re.finditer(pattern['pattern'], content)
587
+ for match in matches:
588
+ line_number = content[:match.start()].count('\n') + 1
589
+ code_snippet = match.group(0)
590
+
591
+ issues.append({
592
+ 'file': file_path,
593
+ 'line': line_number,
594
+ 'code': code_snippet,
595
+ 'issue': pattern['name'],
596
+ 'description': pattern['description'],
597
+ 'suggestion': pattern['suggestion'],
598
+ 'severity': pattern['severity'],
599
+ 'language': 'Java',
600
+ })
601
+ except Exception as e:
602
+ logger.error(f"Error analyzing Java file {file_path}: {e}")
603
+
604
+ # Group issues by severity
605
+ issues_by_severity = defaultdict(list)
606
+ for issue in issues:
607
+ severity = issue.get('severity', 'unknown')
608
+ issues_by_severity[severity].append(issue)
609
+
610
+ return {
611
+ 'status': 'success',
612
+ 'issues': issues,
613
+ 'issues_by_severity': dict(issues_by_severity),
614
+ 'issue_count': len(issues),
615
+ 'files_analyzed': len(java_files),
616
+ }
617
+
618
+ def _analyze_go_performance(self, repo_path):
619
+ """
620
+ Analyze Go code for performance issues.
621
+
622
+ Args:
623
+ repo_path (str): The path to the repository.
624
+
625
+ Returns:
626
+ dict: Performance analysis results for Go code.
627
+ """
628
+ logger.info(f"Analyzing Go code performance in {repo_path}")
629
+
630
+ # Find Go files
631
+ go_files = []
632
+ for root, _, files in os.walk(repo_path):
633
+ for file in files:
634
+ if file.endswith('.go'):
635
+ go_files.append(os.path.join(root, file))
636
+
637
+ if not go_files:
638
+ return {
639
+ 'status': 'no_files',
640
+ 'message': 'No Go files found in the repository.',
641
+ 'issues': [],
642
+ }
643
+
644
+ # Analyze each Go file
645
+ issues = []
646
+ for file_path in go_files:
647
+ try:
648
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
649
+ content = f.read()
650
+
651
+ # Check for performance patterns
652
+ for pattern in self.go_patterns:
653
+ matches = re.finditer(pattern['pattern'], content)
654
+ for match in matches:
655
+ line_number = content[:match.start()].count('\n') + 1
656
+ code_snippet = match.group(0)
657
+
658
+ issues.append({
659
+ 'file': file_path,
660
+ 'line': line_number,
661
+ 'code': code_snippet,
662
+ 'issue': pattern['name'],
663
+ 'description': pattern['description'],
664
+ 'suggestion': pattern['suggestion'],
665
+ 'severity': pattern['severity'],
666
+ 'language': 'Go',
667
+ })
668
+ except Exception as e:
669
+ logger.error(f"Error analyzing Go file {file_path}: {e}")
670
+
671
+ # Group issues by severity
672
+ issues_by_severity = defaultdict(list)
673
+ for issue in issues:
674
+ severity = issue.get('severity', 'unknown')
675
+ issues_by_severity[severity].append(issue)
676
+
677
+ return {
678
+ 'status': 'success',
679
+ 'issues': issues,
680
+ 'issues_by_severity': dict(issues_by_severity),
681
+ 'issue_count': len(issues),
682
+ 'files_analyzed': len(go_files),
683
+ }
684
+
685
+ def _analyze_rust_performance(self, repo_path):
686
+ """
687
+ Analyze Rust code for performance issues.
688
+
689
+ Args:
690
+ repo_path (str): The path to the repository.
691
+
692
+ Returns:
693
+ dict: Performance analysis results for Rust code.
694
+ """
695
+ logger.info(f"Analyzing Rust code performance in {repo_path}")
696
+
697
+ # Find Rust files
698
+ rust_files = []
699
+ for root, _, files in os.walk(repo_path):
700
+ for file in files:
701
+ if file.endswith('.rs'):
702
+ rust_files.append(os.path.join(root, file))
703
+
704
+ if not rust_files:
705
+ return {
706
+ 'status': 'no_files',
707
+ 'message': 'No Rust files found in the repository.',
708
+ 'issues': [],
709
+ }
710
+
711
+ # Analyze each Rust file
712
+ issues = []
713
+ for file_path in rust_files:
714
+ try:
715
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
716
+ content = f.read()
717
+
718
+ # Check for performance patterns
719
+ for pattern in self.rust_patterns:
720
+ matches = re.finditer(pattern['pattern'], content)
721
+ for match in matches:
722
+ line_number = content[:match.start()].count('\n') + 1
723
+ code_snippet = match.group(0)
724
+
725
+ issues.append({
726
+ 'file': file_path,
727
+ 'line': line_number,
728
+ 'code': code_snippet,
729
+ 'issue': pattern['name'],
730
+ 'description': pattern['description'],
731
+ 'suggestion': pattern['suggestion'],
732
+ 'severity': pattern['severity'],
733
+ 'language': 'Rust',
734
+ })
735
+ except Exception as e:
736
+ logger.error(f"Error analyzing Rust file {file_path}: {e}")
737
+
738
+ # Group issues by severity
739
+ issues_by_severity = defaultdict(list)
740
+ for issue in issues:
741
+ severity = issue.get('severity', 'unknown')
742
+ issues_by_severity[severity].append(issue)
743
+
744
+ return {
745
+ 'status': 'success',
746
+ 'issues': issues,
747
+ 'issues_by_severity': dict(issues_by_severity),
748
+ 'issue_count': len(issues),
749
+ 'files_analyzed': len(rust_files),
750
+ }
src/services/report_generator.py ADDED
@@ -0,0 +1,741 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Report Generator Service
6
+
7
+ This module provides functionality for generating comprehensive code review reports
8
+ in various formats based on the analysis results.
9
+ """
10
+
11
+ import os
12
+ import json
13
+ import logging
14
+ import datetime
15
+ from pathlib import Path
16
+ import markdown
17
+ import pdfkit
18
+ import csv
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ class ReportGenerator:
23
+ """
24
+ Service for generating code review reports in various formats.
25
+ """
26
+
27
+ def __init__(self, output_dir="reports"):
28
+ """
29
+ Initialize the ReportGenerator.
30
+
31
+ Args:
32
+ output_dir (str): Directory to save generated reports.
33
+ """
34
+ self.output_dir = output_dir
35
+ os.makedirs(output_dir, exist_ok=True)
36
+ logger.info(f"Initialized ReportGenerator with output directory: {output_dir}")
37
+
38
+ def generate_report(self, repo_name, results, format_type="all"):
39
+ """
40
+ Generate a report based on the analysis results.
41
+
42
+ Args:
43
+ repo_name (str): Name of the repository.
44
+ results (dict): Analysis results.
45
+ format_type (str): Report format type (json, html, pdf, csv, or all).
46
+
47
+ Returns:
48
+ dict: Paths to the generated reports.
49
+ """
50
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
51
+ report_name = f"{repo_name}_{timestamp}"
52
+ report_paths = {}
53
+
54
+ # Create report content
55
+ report_content = self._create_report_content(repo_name, results)
56
+
57
+ # Generate reports in requested formats
58
+ if format_type in ["json", "all"]:
59
+ json_path = self._generate_json_report(report_name, report_content)
60
+ report_paths["json"] = json_path
61
+
62
+ if format_type in ["html", "all"]:
63
+ html_path = self._generate_html_report(report_name, report_content)
64
+ report_paths["html"] = html_path
65
+
66
+ if format_type in ["pdf", "all"]:
67
+ pdf_path = self._generate_pdf_report(report_name, report_content)
68
+ report_paths["pdf"] = pdf_path
69
+
70
+ if format_type in ["csv", "all"]:
71
+ csv_path = self._generate_csv_report(report_name, report_content)
72
+ report_paths["csv"] = csv_path
73
+
74
+ logger.info(f"Generated {len(report_paths)} report(s) for {repo_name}")
75
+ return report_paths
76
+
77
+ def _create_report_content(self, repo_name, results):
78
+ """
79
+ Create the content for the report.
80
+
81
+ Args:
82
+ repo_name (str): Name of the repository.
83
+ results (dict): Analysis results.
84
+
85
+ Returns:
86
+ dict: Structured report content.
87
+ """
88
+ # Extract repository info
89
+ repo_info = results.get("repository_info", {})
90
+
91
+ # Extract language breakdown
92
+ language_breakdown = results.get("language_breakdown", {})
93
+
94
+ # Extract code analysis results
95
+ code_analysis = results.get("code_analysis", {})
96
+
97
+ # Extract security scan results
98
+ security_scan = results.get("security_scan", {})
99
+
100
+ # Extract performance analysis results
101
+ performance_analysis = results.get("performance_analysis", {})
102
+
103
+ # Extract AI review results
104
+ ai_review = results.get("ai_review", {})
105
+
106
+ # Calculate summary metrics
107
+ summary_metrics = self._calculate_summary_metrics(results)
108
+
109
+ # Create report structure
110
+ report = {
111
+ "metadata": {
112
+ "repository_name": repo_name,
113
+ "report_date": datetime.datetime.now().isoformat(),
114
+ "repository_info": repo_info,
115
+ },
116
+ "summary": {
117
+ "metrics": summary_metrics,
118
+ "language_breakdown": language_breakdown,
119
+ "executive_summary": ai_review.get("summary", "No AI review summary available."),
120
+ },
121
+ "code_quality": {
122
+ "issues_by_language": code_analysis,
123
+ "top_issues": self._extract_top_issues(code_analysis),
124
+ },
125
+ "security": {
126
+ "vulnerabilities_by_language": security_scan,
127
+ "critical_vulnerabilities": self._extract_critical_vulnerabilities(security_scan),
128
+ },
129
+ "performance": {
130
+ "issues_by_language": performance_analysis.get("language_results", {}),
131
+ "hotspots": performance_analysis.get("hotspots", []),
132
+ },
133
+ "ai_review": {
134
+ "file_reviews": ai_review.get("reviews", {}),
135
+ "summary": ai_review.get("summary", "No AI review summary available."),
136
+ },
137
+ "recommendations": self._generate_recommendations(results),
138
+ }
139
+
140
+ return report
141
+
142
+ def _calculate_summary_metrics(self, results):
143
+ """
144
+ Calculate summary metrics from the analysis results.
145
+
146
+ Args:
147
+ results (dict): Analysis results.
148
+
149
+ Returns:
150
+ dict: Summary metrics.
151
+ """
152
+ metrics = {
153
+ "total_files": results.get("repository_info", {}).get("file_count", 0),
154
+ "repository_size": results.get("repository_info", {}).get("size", 0),
155
+ }
156
+
157
+ # Count code quality issues
158
+ code_analysis = results.get("code_analysis", {})
159
+ total_issues = 0
160
+ critical_issues = 0
161
+ for language, language_results in code_analysis.items():
162
+ total_issues += language_results.get("issue_count", 0)
163
+ for issue in language_results.get("issues", []):
164
+ if issue.get("severity", "").lower() in ["critical", "high"]:
165
+ critical_issues += 1
166
+
167
+ metrics["total_code_issues"] = total_issues
168
+ metrics["critical_code_issues"] = critical_issues
169
+
170
+ # Count security vulnerabilities
171
+ security_scan = results.get("security_scan", {})
172
+ total_vulnerabilities = 0
173
+ critical_vulnerabilities = 0
174
+ for language, language_results in security_scan.items():
175
+ total_vulnerabilities += language_results.get("vulnerability_count", 0)
176
+ for vuln in language_results.get("vulnerabilities", []):
177
+ if vuln.get("severity", "").lower() in ["critical", "high"]:
178
+ critical_vulnerabilities += 1
179
+
180
+ metrics["total_vulnerabilities"] = total_vulnerabilities
181
+ metrics["critical_vulnerabilities"] = critical_vulnerabilities
182
+
183
+ # Count performance issues
184
+ performance_analysis = results.get("performance_analysis", {})
185
+ total_performance_issues = 0
186
+ for language, language_results in performance_analysis.get("language_results", {}).items():
187
+ total_performance_issues += language_results.get("issue_count", 0)
188
+
189
+ metrics["total_performance_issues"] = total_performance_issues
190
+ metrics["performance_hotspots"] = len(performance_analysis.get("hotspots", []))
191
+
192
+ # Calculate overall score (0-100)
193
+ # This is a simple scoring algorithm that can be refined
194
+ base_score = 100
195
+ deductions = 0
196
+
197
+ # Deduct for code issues (more weight for critical issues)
198
+ if metrics["total_files"] > 0:
199
+ code_issue_ratio = metrics["total_code_issues"] / metrics["total_files"]
200
+ deductions += min(30, code_issue_ratio * 100)
201
+ deductions += min(20, (metrics["critical_code_issues"] / metrics["total_files"]) * 200)
202
+
203
+ # Deduct for security vulnerabilities (heavy weight for critical vulnerabilities)
204
+ if metrics["total_files"] > 0:
205
+ deductions += min(30, (metrics["total_vulnerabilities"] / metrics["total_files"]) * 150)
206
+ deductions += min(40, (metrics["critical_vulnerabilities"] / metrics["total_files"]) * 300)
207
+
208
+ # Deduct for performance issues
209
+ if metrics["total_files"] > 0:
210
+ deductions += min(20, (metrics["total_performance_issues"] / metrics["total_files"]) * 80)
211
+ deductions += min(10, (metrics["performance_hotspots"] / metrics["total_files"]) * 100)
212
+
213
+ metrics["overall_score"] = max(0, min(100, base_score - deductions))
214
+
215
+ # Determine quality rating based on score
216
+ if metrics["overall_score"] >= 90:
217
+ metrics["quality_rating"] = "Excellent"
218
+ elif metrics["overall_score"] >= 80:
219
+ metrics["quality_rating"] = "Good"
220
+ elif metrics["overall_score"] >= 70:
221
+ metrics["quality_rating"] = "Satisfactory"
222
+ elif metrics["overall_score"] >= 50:
223
+ metrics["quality_rating"] = "Needs Improvement"
224
+ else:
225
+ metrics["quality_rating"] = "Poor"
226
+
227
+ return metrics
228
+
229
+ def _extract_top_issues(self, code_analysis, limit=10):
230
+ """
231
+ Extract the top code quality issues from the analysis results.
232
+
233
+ Args:
234
+ code_analysis (dict): Code analysis results.
235
+ limit (int): Maximum number of issues to extract.
236
+
237
+ Returns:
238
+ list: Top code quality issues.
239
+ """
240
+ all_issues = []
241
+
242
+ for language, language_results in code_analysis.items():
243
+ for issue in language_results.get("issues", []):
244
+ # Add language to the issue
245
+ issue["language"] = language
246
+ all_issues.append(issue)
247
+
248
+ # Sort issues by severity and then by line count if available
249
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
250
+
251
+ def issue_sort_key(issue):
252
+ severity = issue.get("severity", "").lower()
253
+ severity_value = severity_order.get(severity, 5)
254
+ return (severity_value, -issue.get("line_count", 0))
255
+
256
+ sorted_issues = sorted(all_issues, key=issue_sort_key)
257
+
258
+ return sorted_issues[:limit]
259
+
260
+ def _extract_critical_vulnerabilities(self, security_scan, limit=10):
261
+ """
262
+ Extract critical security vulnerabilities from the scan results.
263
+
264
+ Args:
265
+ security_scan (dict): Security scan results.
266
+ limit (int): Maximum number of vulnerabilities to extract.
267
+
268
+ Returns:
269
+ list: Critical security vulnerabilities.
270
+ """
271
+ all_vulnerabilities = []
272
+
273
+ for language, language_results in security_scan.items():
274
+ for vuln in language_results.get("vulnerabilities", []):
275
+ # Add language to the vulnerability
276
+ vuln["language"] = language
277
+ all_vulnerabilities.append(vuln)
278
+
279
+ # Sort vulnerabilities by severity
280
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
281
+
282
+ def vuln_sort_key(vuln):
283
+ severity = vuln.get("severity", "").lower()
284
+ severity_value = severity_order.get(severity, 5)
285
+ return severity_value
286
+
287
+ sorted_vulnerabilities = sorted(all_vulnerabilities, key=vuln_sort_key)
288
+
289
+ return sorted_vulnerabilities[:limit]
290
+
291
+ def _generate_recommendations(self, results):
292
+ """
293
+ Generate recommendations based on the analysis results.
294
+
295
+ Args:
296
+ results (dict): Analysis results.
297
+
298
+ Returns:
299
+ dict: Recommendations categorized by priority.
300
+ """
301
+ recommendations = {
302
+ "high_priority": [],
303
+ "medium_priority": [],
304
+ "low_priority": [],
305
+ }
306
+
307
+ # Extract critical security vulnerabilities as high priority recommendations
308
+ security_scan = results.get("security_scan", {})
309
+ for language, language_results in security_scan.items():
310
+ for vuln in language_results.get("vulnerabilities", []):
311
+ if vuln.get("severity", "").lower() in ["critical", "high"]:
312
+ recommendations["high_priority"].append({
313
+ "type": "security",
314
+ "language": language,
315
+ "issue": vuln.get("issue", "Unknown vulnerability"),
316
+ "description": vuln.get("description", ""),
317
+ "file": vuln.get("file", ""),
318
+ "line": vuln.get("line", ""),
319
+ "recommendation": vuln.get("recommendation", "Fix this security vulnerability."),
320
+ })
321
+
322
+ # Extract critical code quality issues as medium priority recommendations
323
+ code_analysis = results.get("code_analysis", {})
324
+ for language, language_results in code_analysis.items():
325
+ for issue in language_results.get("issues", []):
326
+ if issue.get("severity", "").lower() in ["critical", "high"]:
327
+ recommendations["medium_priority"].append({
328
+ "type": "code_quality",
329
+ "language": language,
330
+ "issue": issue.get("issue", "Unknown issue"),
331
+ "description": issue.get("description", ""),
332
+ "file": issue.get("file", ""),
333
+ "line": issue.get("line", ""),
334
+ "recommendation": issue.get("recommendation", "Address this code quality issue."),
335
+ })
336
+
337
+ # Extract performance hotspots as medium priority recommendations
338
+ performance_analysis = results.get("performance_analysis", {})
339
+ for hotspot in performance_analysis.get("hotspots", []):
340
+ recommendations["medium_priority"].append({
341
+ "type": "performance",
342
+ "language": hotspot.get("language", ""),
343
+ "issue": "Performance Hotspot",
344
+ "description": f"File contains {hotspot.get('issue_count', 0)} performance issues",
345
+ "file": hotspot.get("file", ""),
346
+ "recommendation": "Optimize this file to improve performance.",
347
+ })
348
+
349
+ # Extract other performance issues as low priority recommendations
350
+ for language, language_results in performance_analysis.get("language_results", {}).items():
351
+ for issue in language_results.get("issues", []):
352
+ # Skip issues that are already part of hotspots
353
+ if any(hotspot.get("file", "") == issue.get("file", "") for hotspot in performance_analysis.get("hotspots", [])):
354
+ continue
355
+
356
+ recommendations["low_priority"].append({
357
+ "type": "performance",
358
+ "language": language,
359
+ "issue": issue.get("issue", "Unknown issue"),
360
+ "description": issue.get("description", ""),
361
+ "file": issue.get("file", ""),
362
+ "line": issue.get("line", ""),
363
+ "recommendation": issue.get("recommendation", "Consider optimizing this code."),
364
+ })
365
+
366
+ # Extract AI review suggestions as recommendations
367
+ ai_review = results.get("ai_review", {})
368
+ for file_path, review in ai_review.get("reviews", {}).items():
369
+ for suggestion in review.get("suggestions", []):
370
+ priority = "medium_priority"
371
+ if "security" in suggestion.get("section", "").lower():
372
+ priority = "high_priority"
373
+ elif "performance" in suggestion.get("section", "").lower():
374
+ priority = "low_priority"
375
+
376
+ recommendations[priority].append({
377
+ "type": "ai_review",
378
+ "language": "", # AI review doesn't specify language
379
+ "issue": suggestion.get("section", "AI Suggestion"),
380
+ "description": suggestion.get("description", ""),
381
+ "file": file_path,
382
+ "line": suggestion.get("line", ""),
383
+ "recommendation": suggestion.get("details", ""),
384
+ })
385
+
386
+ # Limit the number of recommendations in each category
387
+ limit = 15
388
+ recommendations["high_priority"] = recommendations["high_priority"][:limit]
389
+ recommendations["medium_priority"] = recommendations["medium_priority"][:limit]
390
+ recommendations["low_priority"] = recommendations["low_priority"][:limit]
391
+
392
+ return recommendations
393
+
394
+ def _generate_json_report(self, report_name, report_content):
395
+ """
396
+ Generate a JSON report.
397
+
398
+ Args:
399
+ report_name (str): Name of the report.
400
+ report_content (dict): Report content.
401
+
402
+ Returns:
403
+ str: Path to the generated report.
404
+ """
405
+ report_path = os.path.join(self.output_dir, f"{report_name}.json")
406
+
407
+ with open(report_path, "w", encoding="utf-8") as f:
408
+ json.dump(report_content, f, indent=2, ensure_ascii=False)
409
+
410
+ logger.info(f"Generated JSON report: {report_path}")
411
+ return report_path
412
+
413
+ def _generate_html_report(self, report_name, report_content):
414
+ """
415
+ Generate an HTML report.
416
+
417
+ Args:
418
+ report_name (str): Name of the report.
419
+ report_content (dict): Report content.
420
+
421
+ Returns:
422
+ str: Path to the generated report.
423
+ """
424
+ report_path = os.path.join(self.output_dir, f"{report_name}.html")
425
+
426
+ # Convert report content to markdown
427
+ md_content = self._convert_to_markdown(report_content)
428
+
429
+ # Convert markdown to HTML
430
+ html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"])
431
+
432
+ # Add CSS styling
433
+ html_content = f"""
434
+ <!DOCTYPE html>
435
+ <html>
436
+ <head>
437
+ <meta charset="utf-8">
438
+ <meta name="viewport" content="width=device-width, initial-scale=1">
439
+ <title>Code Review Report: {report_content['metadata']['repository_name']}</title>
440
+ <style>
441
+ body {{font-family: Arial, sans-serif; line-height: 1.6; max-width: 1200px; margin: 0 auto; padding: 20px;}}
442
+ h1, h2, h3, h4 {{color: #333; margin-top: 30px;}}
443
+ h1 {{border-bottom: 2px solid #333; padding-bottom: 10px;}}
444
+ h2 {{border-bottom: 1px solid #ccc; padding-bottom: 5px;}}
445
+ table {{border-collapse: collapse; width: 100%; margin: 20px 0;}}
446
+ th, td {{text-align: left; padding: 12px; border-bottom: 1px solid #ddd;}}
447
+ th {{background-color: #f2f2f2;}}
448
+ tr:hover {{background-color: #f5f5f5;}}
449
+ .metric-card {{background-color: #f9f9f9; border-radius: 5px; padding: 15px; margin: 10px 0; box-shadow: 0 2px 4px rgba(0,0,0,0.1);}}
450
+ .metric-value {{font-size: 24px; font-weight: bold; color: #333;}}
451
+ .metric-label {{font-size: 14px; color: #666;}}
452
+ .severity-critical {{color: #d9534f; font-weight: bold;}}
453
+ .severity-high {{color: #f0ad4e; font-weight: bold;}}
454
+ .severity-medium {{color: #5bc0de; font-weight: bold;}}
455
+ .severity-low {{color: #5cb85c; font-weight: bold;}}
456
+ .metrics-container {{display: flex; flex-wrap: wrap; gap: 20px; justify-content: space-between;}}
457
+ .metric-card {{flex: 1; min-width: 200px;}}
458
+ pre {{background-color: #f5f5f5; padding: 15px; border-radius: 5px; overflow-x: auto;}}
459
+ code {{font-family: Consolas, Monaco, 'Andale Mono', monospace; font-size: 14px;}}
460
+ .recommendation {{background-color: #f9f9f9; border-left: 4px solid #5bc0de; padding: 10px; margin: 10px 0;}}
461
+ .high-priority {{border-left-color: #d9534f;}}
462
+ .medium-priority {{border-left-color: #f0ad4e;}}
463
+ .low-priority {{border-left-color: #5cb85c;}}
464
+ </style>
465
+ </head>
466
+ <body>
467
+ {html_content}
468
+ </body>
469
+ </html>
470
+ """
471
+
472
+ with open(report_path, "w", encoding="utf-8") as f:
473
+ f.write(html_content)
474
+
475
+ logger.info(f"Generated HTML report: {report_path}")
476
+ return report_path
477
+
478
+ def _generate_pdf_report(self, report_name, report_content):
479
+ """
480
+ Generate a PDF report.
481
+
482
+ Args:
483
+ report_name (str): Name of the report.
484
+ report_content (dict): Report content.
485
+
486
+ Returns:
487
+ str: Path to the generated report.
488
+ """
489
+ report_path = os.path.join(self.output_dir, f"{report_name}.pdf")
490
+
491
+ # First generate HTML report
492
+ html_path = self._generate_html_report(f"{report_name}_temp", report_content)
493
+
494
+ try:
495
+ # Convert HTML to PDF using pdfkit
496
+ pdfkit.from_file(html_path, report_path)
497
+
498
+ # Remove temporary HTML file
499
+ os.remove(html_path)
500
+
501
+ logger.info(f"Generated PDF report: {report_path}")
502
+ return report_path
503
+
504
+ except Exception as e:
505
+ logger.error(f"Error generating PDF report: {e}")
506
+ return html_path
507
+
508
+ def _generate_csv_report(self, report_name, report_content):
509
+ """
510
+ Generate a CSV report with issues and recommendations.
511
+
512
+ Args:
513
+ report_name (str): Name of the report.
514
+ report_content (dict): Report content.
515
+
516
+ Returns:
517
+ str: Path to the generated report.
518
+ """
519
+ report_path = os.path.join(self.output_dir, f"{report_name}.csv")
520
+
521
+ # Collect all issues and recommendations
522
+ rows = []
523
+
524
+ # Add code quality issues
525
+ for language, language_results in report_content["code_quality"]["issues_by_language"].items():
526
+ for issue in language_results.get("issues", []):
527
+ rows.append({
528
+ "Type": "Code Quality",
529
+ "Language": language,
530
+ "Severity": issue.get("severity", ""),
531
+ "Issue": issue.get("issue", ""),
532
+ "Description": issue.get("description", ""),
533
+ "File": issue.get("file", ""),
534
+ "Line": issue.get("line", ""),
535
+ "Recommendation": issue.get("recommendation", ""),
536
+ })
537
+
538
+ # Add security vulnerabilities
539
+ for language, language_results in report_content["security"]["vulnerabilities_by_language"].items():
540
+ for vuln in language_results.get("vulnerabilities", []):
541
+ rows.append({
542
+ "Type": "Security",
543
+ "Language": language,
544
+ "Severity": vuln.get("severity", ""),
545
+ "Issue": vuln.get("issue", ""),
546
+ "Description": vuln.get("description", ""),
547
+ "File": vuln.get("file", ""),
548
+ "Line": vuln.get("line", ""),
549
+ "Recommendation": vuln.get("recommendation", ""),
550
+ })
551
+
552
+ # Add performance issues
553
+ for language, language_results in report_content["performance"]["issues_by_language"].items():
554
+ for issue in language_results.get("issues", []):
555
+ rows.append({
556
+ "Type": "Performance",
557
+ "Language": language,
558
+ "Severity": issue.get("severity", "Medium"),
559
+ "Issue": issue.get("issue", ""),
560
+ "Description": issue.get("description", ""),
561
+ "File": issue.get("file", ""),
562
+ "Line": issue.get("line", ""),
563
+ "Recommendation": issue.get("recommendation", ""),
564
+ })
565
+
566
+ # Add AI review suggestions
567
+ for file_path, review in report_content["ai_review"]["file_reviews"].items():
568
+ for suggestion in review.get("suggestions", []):
569
+ rows.append({
570
+ "Type": "AI Review",
571
+ "Language": "",
572
+ "Severity": "",
573
+ "Issue": suggestion.get("section", ""),
574
+ "Description": suggestion.get("description", ""),
575
+ "File": file_path,
576
+ "Line": suggestion.get("line", ""),
577
+ "Recommendation": suggestion.get("details", ""),
578
+ })
579
+
580
+ # Write to CSV
581
+ with open(report_path, "w", newline="", encoding="utf-8") as f:
582
+ fieldnames = ["Type", "Language", "Severity", "Issue", "Description", "File", "Line", "Recommendation"]
583
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
584
+ writer.writeheader()
585
+ writer.writerows(rows)
586
+
587
+ logger.info(f"Generated CSV report: {report_path}")
588
+ return report_path
589
+
590
+ def _convert_to_markdown(self, report_content):
591
+ """
592
+ Convert report content to markdown format.
593
+
594
+ Args:
595
+ report_content (dict): Report content.
596
+
597
+ Returns:
598
+ str: Markdown formatted report.
599
+ """
600
+ md = []
601
+
602
+ # Title and metadata
603
+ md.append(f"# Code Review Report: {report_content['metadata']['repository_name']}")
604
+ md.append(f"**Report Date:** {report_content['metadata']['report_date']}")
605
+ md.append("")
606
+
607
+ # Repository info
608
+ repo_info = report_content['metadata']['repository_info']
609
+ md.append("## Repository Information")
610
+ md.append(f"**Branch:** {repo_info.get('branch', 'N/A')}")
611
+ md.append(f"**Commit:** {repo_info.get('commit', 'N/A')}")
612
+ md.append(f"**Remote URL:** {repo_info.get('remote_url', 'N/A')}")
613
+ md.append(f"**Size:** {repo_info.get('size', 0)} bytes")
614
+ md.append(f"**File Count:** {repo_info.get('file_count', 0)}")
615
+ md.append("")
616
+
617
+ # Summary metrics
618
+ md.append("## Executive Summary")
619
+ metrics = report_content['summary']['metrics']
620
+ md.append(f"**Overall Score:** {metrics.get('overall_score', 0)}/100")
621
+ md.append(f"**Quality Rating:** {metrics.get('quality_rating', 'N/A')}")
622
+ md.append("")
623
+ md.append("### Key Metrics")
624
+ md.append("| Metric | Value |")
625
+ md.append("| ------ | ----- |")
626
+ md.append(f"| Total Files | {metrics.get('total_files', 0)} |")
627
+ md.append(f"| Code Quality Issues | {metrics.get('total_code_issues', 0)} |")
628
+ md.append(f"| Critical Code Issues | {metrics.get('critical_code_issues', 0)} |")
629
+ md.append(f"| Security Vulnerabilities | {metrics.get('total_vulnerabilities', 0)} |")
630
+ md.append(f"| Critical Vulnerabilities | {metrics.get('critical_vulnerabilities', 0)} |")
631
+ md.append(f"| Performance Issues | {metrics.get('total_performance_issues', 0)} |")
632
+ md.append(f"| Performance Hotspots | {metrics.get('performance_hotspots', 0)} |")
633
+ md.append("")
634
+
635
+ # Language breakdown
636
+ md.append("### Language Breakdown")
637
+ language_breakdown = report_content['summary']['language_breakdown']
638
+ md.append("| Language | Files | Lines | Percentage |")
639
+ md.append("| -------- | ----- | ----- | ---------- |")
640
+ for language, stats in language_breakdown.items():
641
+ md.append(f"| {language} | {stats.get('files', 0)} | {stats.get('lines', 0)} | {stats.get('percentage', 0)}% |")
642
+ md.append("")
643
+
644
+ # Executive summary from AI review
645
+ md.append("### Executive Summary")
646
+ md.append(report_content['summary']['executive_summary'])
647
+ md.append("")
648
+
649
+ # Code quality issues
650
+ md.append("## Code Quality Analysis")
651
+ md.append("### Top Issues")
652
+ top_issues = report_content['code_quality']['top_issues']
653
+ if top_issues:
654
+ md.append("| Severity | Language | Issue | File | Line |")
655
+ md.append("| -------- | -------- | ----- | ---- | ---- |")
656
+ for issue in top_issues:
657
+ md.append(f"| {issue.get('severity', 'N/A')} | {issue.get('language', 'N/A')} | {issue.get('issue', 'N/A')} | {issue.get('file', 'N/A')} | {issue.get('line', 'N/A')} |")
658
+ else:
659
+ md.append("No code quality issues found.")
660
+ md.append("")
661
+
662
+ # Security vulnerabilities
663
+ md.append("## Security Analysis")
664
+ md.append("### Critical Vulnerabilities")
665
+ critical_vulnerabilities = report_content['security']['critical_vulnerabilities']
666
+ if critical_vulnerabilities:
667
+ md.append("| Severity | Language | Vulnerability | File | Line |")
668
+ md.append("| -------- | -------- | ------------- | ---- | ---- |")
669
+ for vuln in critical_vulnerabilities:
670
+ md.append(f"| {vuln.get('severity', 'N/A')} | {vuln.get('language', 'N/A')} | {vuln.get('issue', 'N/A')} | {vuln.get('file', 'N/A')} | {vuln.get('line', 'N/A')} |")
671
+ else:
672
+ md.append("No critical security vulnerabilities found.")
673
+ md.append("")
674
+
675
+ # Performance analysis
676
+ md.append("## Performance Analysis")
677
+ md.append("### Performance Hotspots")
678
+ hotspots = report_content['performance']['hotspots']
679
+ if hotspots:
680
+ md.append("| Language | File | Issue Count |")
681
+ md.append("| -------- | ---- | ----------- |")
682
+ for hotspot in hotspots:
683
+ md.append(f"| {hotspot.get('language', 'N/A')} | {hotspot.get('file', 'N/A')} | {hotspot.get('issue_count', 0)} |")
684
+ else:
685
+ md.append("No performance hotspots found.")
686
+ md.append("")
687
+
688
+ # Recommendations
689
+ md.append("## Recommendations")
690
+
691
+ # High priority recommendations
692
+ md.append("### High Priority")
693
+ high_priority = report_content['recommendations']['high_priority']
694
+ if high_priority:
695
+ for i, rec in enumerate(high_priority, 1):
696
+ md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**")
697
+ md.append(f"- **Type:** {rec.get('type', 'N/A')}")
698
+ md.append(f"- **File:** {rec.get('file', 'N/A')}")
699
+ if rec.get('line'):
700
+ md.append(f"- **Line:** {rec.get('line')}")
701
+ md.append(f"- **Description:** {rec.get('description', 'N/A')}")
702
+ md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}")
703
+ md.append("")
704
+ else:
705
+ md.append("No high priority recommendations.")
706
+ md.append("")
707
+
708
+ # Medium priority recommendations
709
+ md.append("### Medium Priority")
710
+ medium_priority = report_content['recommendations']['medium_priority']
711
+ if medium_priority:
712
+ for i, rec in enumerate(medium_priority, 1):
713
+ md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**")
714
+ md.append(f"- **Type:** {rec.get('type', 'N/A')}")
715
+ md.append(f"- **File:** {rec.get('file', 'N/A')}")
716
+ if rec.get('line'):
717
+ md.append(f"- **Line:** {rec.get('line')}")
718
+ md.append(f"- **Description:** {rec.get('description', 'N/A')}")
719
+ md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}")
720
+ md.append("")
721
+ else:
722
+ md.append("No medium priority recommendations.")
723
+ md.append("")
724
+
725
+ # Low priority recommendations
726
+ md.append("### Low Priority")
727
+ low_priority = report_content['recommendations']['low_priority']
728
+ if low_priority:
729
+ for i, rec in enumerate(low_priority, 1):
730
+ md.append(f"**{i}. {rec.get('issue', 'Recommendation')}**")
731
+ md.append(f"- **Type:** {rec.get('type', 'N/A')}")
732
+ md.append(f"- **File:** {rec.get('file', 'N/A')}")
733
+ if rec.get('line'):
734
+ md.append(f"- **Line:** {rec.get('line')}")
735
+ md.append(f"- **Description:** {rec.get('description', 'N/A')}")
736
+ md.append(f"- **Recommendation:** {rec.get('recommendation', 'N/A')}")
737
+ md.append("")
738
+ else:
739
+ md.append("No low priority recommendations.")
740
+
741
+ return "\n".join(md)
src/services/repository_service.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Repository Service
6
+
7
+ This module provides functionality for cloning and managing Git repositories.
8
+ """
9
+
10
+ import os
11
+ import shutil
12
+ import tempfile
13
+ import logging
14
+ import re
15
+ from git import Repo
16
+ from git.exc import GitCommandError
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class RepositoryService:
21
+ """
22
+ Service for cloning and managing Git repositories.
23
+ """
24
+
25
+ def __init__(self, base_temp_dir=None):
26
+ """
27
+ Initialize the RepositoryService.
28
+
29
+ Args:
30
+ base_temp_dir (str, optional): Base directory for temporary repositories.
31
+ If None, system temp directory will be used.
32
+ """
33
+ self.base_temp_dir = base_temp_dir or tempfile.gettempdir()
34
+ self.repos = {}
35
+ logger.info(f"Initialized RepositoryService with base temp dir: {self.base_temp_dir}")
36
+
37
+ def validate_github_url(self, url):
38
+ """
39
+ Validate if the provided URL is a valid GitHub repository URL.
40
+
41
+ Args:
42
+ url (str): The GitHub repository URL to validate.
43
+
44
+ Returns:
45
+ bool: True if the URL is valid, False otherwise.
46
+ """
47
+ # GitHub URL patterns
48
+ patterns = [
49
+ r'^https?://github\.com/[\w.-]+/[\w.-]+(\.git)?$', # https://github.com/user/repo[.git]
50
+ r'^git@github\.com:[\w.-]+/[\w.-]+(\.git)?$', # git@github.com:user/repo[.git]
51
+ ]
52
+
53
+ for pattern in patterns:
54
+ if re.match(pattern, url):
55
+ return True
56
+
57
+ return False
58
+
59
+ def normalize_github_url(self, url):
60
+ """
61
+ Normalize a GitHub URL to a consistent format.
62
+
63
+ Args:
64
+ url (str): The GitHub repository URL to normalize.
65
+
66
+ Returns:
67
+ str: The normalized URL.
68
+ """
69
+ # Convert SSH URL to HTTPS URL
70
+ if url.startswith('git@github.com:'):
71
+ user_repo = url[len('git@github.com:'):]
72
+ if user_repo.endswith('.git'):
73
+ user_repo = user_repo[:-4]
74
+ return f"https://github.com/{user_repo}"
75
+
76
+ # Ensure HTTPS URL ends without .git
77
+ if url.startswith('http'):
78
+ if url.endswith('.git'):
79
+ return url[:-4]
80
+
81
+ return url
82
+
83
+ def extract_repo_name(self, url):
84
+ """
85
+ Extract repository name from a GitHub URL.
86
+
87
+ Args:
88
+ url (str): The GitHub repository URL.
89
+
90
+ Returns:
91
+ str: The repository name.
92
+ """
93
+ normalized_url = self.normalize_github_url(url)
94
+ return normalized_url.split('/')[-1]
95
+
96
+ def clone_repository(self, url, branch=None):
97
+ """
98
+ Clone a Git repository from the provided URL.
99
+
100
+ Args:
101
+ url (str): The repository URL to clone.
102
+ branch (str, optional): The branch to checkout. If None, the default branch is used.
103
+
104
+ Returns:
105
+ str: The path to the cloned repository.
106
+
107
+ Raises:
108
+ ValueError: If the URL is not a valid GitHub repository URL.
109
+ GitCommandError: If there's an error during the Git operation.
110
+ """
111
+ if not self.validate_github_url(url):
112
+ raise ValueError(f"Invalid GitHub repository URL: {url}")
113
+
114
+ repo_name = self.extract_repo_name(url)
115
+ repo_dir = os.path.join(self.base_temp_dir, f"codereview_{repo_name}_{os.urandom(4).hex()}")
116
+
117
+ logger.info(f"Cloning repository {url} to {repo_dir}")
118
+
119
+ try:
120
+ # Clone the repository
121
+ if branch:
122
+ repo = Repo.clone_from(url, repo_dir, branch=branch)
123
+ logger.info(f"Cloned repository {url} (branch: {branch}) to {repo_dir}")
124
+ else:
125
+ repo = Repo.clone_from(url, repo_dir)
126
+ logger.info(f"Cloned repository {url} (default branch) to {repo_dir}")
127
+
128
+ # Store the repository instance
129
+ self.repos[repo_dir] = repo
130
+
131
+ return repo_dir
132
+
133
+ except GitCommandError as e:
134
+ logger.error(f"Error cloning repository {url}: {e}")
135
+ # Clean up the directory if it was created
136
+ if os.path.exists(repo_dir):
137
+ shutil.rmtree(repo_dir, ignore_errors=True)
138
+ raise
139
+
140
+ def get_repository_info(self, repo_path):
141
+ """
142
+ Get information about a repository.
143
+
144
+ Args:
145
+ repo_path (str): The path to the repository.
146
+
147
+ Returns:
148
+ dict: A dictionary containing repository information.
149
+ """
150
+ if repo_path not in self.repos:
151
+ try:
152
+ self.repos[repo_path] = Repo(repo_path)
153
+ except Exception as e:
154
+ logger.error(f"Error opening repository at {repo_path}: {e}")
155
+ return {}
156
+
157
+ repo = self.repos[repo_path]
158
+
159
+ try:
160
+ # Get the active branch
161
+ try:
162
+ active_branch = repo.active_branch.name
163
+ except TypeError:
164
+ # Detached HEAD state
165
+ active_branch = 'HEAD detached'
166
+
167
+ # Get the latest commit
168
+ latest_commit = repo.head.commit
169
+
170
+ # Get remote URL
171
+ try:
172
+ remote_url = repo.remotes.origin.url
173
+ except AttributeError:
174
+ remote_url = 'No remote URL found'
175
+
176
+ # Get repository size (approximate)
177
+ repo_size = sum(os.path.getsize(os.path.join(dirpath, filename))
178
+ for dirpath, _, filenames in os.walk(repo_path)
179
+ for filename in filenames)
180
+
181
+ # Count files
182
+ file_count = sum(len(files) for _, _, files in os.walk(repo_path))
183
+
184
+ return {
185
+ 'path': repo_path,
186
+ 'active_branch': active_branch,
187
+ 'latest_commit': {
188
+ 'hash': latest_commit.hexsha,
189
+ 'author': f"{latest_commit.author.name} <{latest_commit.author.email}>",
190
+ 'date': latest_commit.committed_datetime.isoformat(),
191
+ 'message': latest_commit.message.strip(),
192
+ },
193
+ 'remote_url': remote_url,
194
+ 'size_bytes': repo_size,
195
+ 'file_count': file_count,
196
+ }
197
+
198
+ except Exception as e:
199
+ logger.error(f"Error getting repository info for {repo_path}: {e}")
200
+ return {
201
+ 'path': repo_path,
202
+ 'error': str(e),
203
+ }
204
+
205
+ def cleanup_repository(self, repo_path):
206
+ """
207
+ Clean up a cloned repository.
208
+
209
+ Args:
210
+ repo_path (str): The path to the repository to clean up.
211
+
212
+ Returns:
213
+ bool: True if the cleanup was successful, False otherwise.
214
+ """
215
+ logger.info(f"Cleaning up repository at {repo_path}")
216
+
217
+ # Remove the repository from the tracked repos
218
+ if repo_path in self.repos:
219
+ del self.repos[repo_path]
220
+
221
+ # Remove the directory
222
+ try:
223
+ if os.path.exists(repo_path):
224
+ shutil.rmtree(repo_path, ignore_errors=True)
225
+ return True
226
+ except Exception as e:
227
+ logger.error(f"Error cleaning up repository at {repo_path}: {e}")
228
+ return False
229
+
230
+ def cleanup_all_repositories(self):
231
+ """
232
+ Clean up all cloned repositories.
233
+
234
+ Returns:
235
+ bool: True if all cleanups were successful, False otherwise.
236
+ """
237
+ logger.info("Cleaning up all repositories")
238
+
239
+ success = True
240
+ for repo_path in list(self.repos.keys()):
241
+ if not self.cleanup_repository(repo_path):
242
+ success = False
243
+
244
+ return success
src/services/security_scanner.py ADDED
@@ -0,0 +1,810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Security Scanner Service
6
+
7
+ This module provides functionality for scanning code for security vulnerabilities.
8
+ """
9
+
10
+ import os
11
+ import subprocess
12
+ import logging
13
+ import json
14
+ import tempfile
15
+ from collections import defaultdict
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class SecurityScanner:
20
+ """
21
+ Service for scanning code for security vulnerabilities.
22
+ """
23
+
24
+ def __init__(self):
25
+ """
26
+ Initialize the SecurityScanner.
27
+ """
28
+ logger.info("Initialized SecurityScanner")
29
+ self.scanners = {
30
+ 'Python': self._scan_python,
31
+ 'JavaScript': self._scan_javascript,
32
+ 'TypeScript': self._scan_javascript, # TypeScript uses the same scanner as JavaScript
33
+ 'Java': self._scan_java,
34
+ 'Go': self._scan_go,
35
+ 'Rust': self._scan_rust,
36
+ }
37
+
38
+ def scan_repository(self, repo_path, languages):
39
+ """
40
+ Scan a repository for security vulnerabilities in the specified languages.
41
+
42
+ Args:
43
+ repo_path (str): The path to the repository.
44
+ languages (list): A list of programming languages to scan.
45
+
46
+ Returns:
47
+ dict: A dictionary containing scan results for each language.
48
+ """
49
+ logger.info(f"Scanning repository at {repo_path} for security vulnerabilities in languages: {languages}")
50
+
51
+ results = {}
52
+
53
+ # Scan dependencies first (language-agnostic)
54
+ results['dependencies'] = self._scan_dependencies(repo_path)
55
+
56
+ # Scan each language
57
+ for language in languages:
58
+ if language in self.scanners:
59
+ try:
60
+ logger.info(f"Scanning {language} code in {repo_path} for security vulnerabilities")
61
+ results[language] = self.scanners[language](repo_path)
62
+ except Exception as e:
63
+ logger.error(f"Error scanning {language} code for security vulnerabilities: {e}")
64
+ results[language] = {
65
+ 'status': 'error',
66
+ 'error': str(e),
67
+ 'vulnerabilities': [],
68
+ }
69
+ else:
70
+ logger.warning(f"No security scanner available for {language}")
71
+ results[language] = {
72
+ 'status': 'not_supported',
73
+ 'message': f"Security scanning for {language} is not supported yet.",
74
+ 'vulnerabilities': [],
75
+ }
76
+
77
+ return results
78
+
79
+ def _scan_dependencies(self, repo_path):
80
+ """
81
+ Scan dependencies for known vulnerabilities.
82
+
83
+ Args:
84
+ repo_path (str): The path to the repository.
85
+
86
+ Returns:
87
+ dict: Dependency scan results.
88
+ """
89
+ logger.info(f"Scanning dependencies in {repo_path}")
90
+
91
+ results = {
92
+ 'python': self._scan_python_dependencies(repo_path),
93
+ 'javascript': self._scan_javascript_dependencies(repo_path),
94
+ 'java': self._scan_java_dependencies(repo_path),
95
+ 'go': self._scan_go_dependencies(repo_path),
96
+ 'rust': self._scan_rust_dependencies(repo_path),
97
+ }
98
+
99
+ # Aggregate vulnerabilities
100
+ all_vulnerabilities = []
101
+ for lang_result in results.values():
102
+ all_vulnerabilities.extend(lang_result.get('vulnerabilities', []))
103
+
104
+ return {
105
+ 'status': 'success',
106
+ 'vulnerabilities': all_vulnerabilities,
107
+ 'vulnerability_count': len(all_vulnerabilities),
108
+ 'language_results': results,
109
+ }
110
+
111
+ def _scan_python_dependencies(self, repo_path):
112
+ """
113
+ Scan Python dependencies for known vulnerabilities using safety.
114
+
115
+ Args:
116
+ repo_path (str): The path to the repository.
117
+
118
+ Returns:
119
+ dict: Scan results for Python dependencies.
120
+ """
121
+ logger.info(f"Scanning Python dependencies in {repo_path}")
122
+
123
+ # Find requirements files
124
+ requirements_files = []
125
+ for root, _, files in os.walk(repo_path):
126
+ for file in files:
127
+ if file == 'requirements.txt' or file == 'Pipfile' or file == 'Pipfile.lock' or file == 'setup.py':
128
+ requirements_files.append(os.path.join(root, file))
129
+
130
+ if not requirements_files:
131
+ return {
132
+ 'status': 'no_dependencies',
133
+ 'message': 'No Python dependency files found.',
134
+ 'vulnerabilities': [],
135
+ }
136
+
137
+ vulnerabilities = []
138
+
139
+ for req_file in requirements_files:
140
+ try:
141
+ # Run safety check
142
+ cmd = [
143
+ 'safety',
144
+ 'check',
145
+ '--file', req_file,
146
+ '--json',
147
+ ]
148
+
149
+ process = subprocess.run(
150
+ cmd,
151
+ stdout=subprocess.PIPE,
152
+ stderr=subprocess.PIPE,
153
+ text=True,
154
+ check=False,
155
+ )
156
+
157
+ # Parse safety output
158
+ if process.stdout.strip():
159
+ try:
160
+ safety_results = json.loads(process.stdout)
161
+
162
+ for vuln in safety_results.get('vulnerabilities', []):
163
+ vulnerabilities.append({
164
+ 'package': vuln.get('package_name', ''),
165
+ 'installed_version': vuln.get('installed_version', ''),
166
+ 'affected_versions': vuln.get('vulnerable_spec', ''),
167
+ 'description': vuln.get('advisory', ''),
168
+ 'severity': vuln.get('severity', ''),
169
+ 'file': req_file,
170
+ 'language': 'Python',
171
+ })
172
+ except json.JSONDecodeError:
173
+ logger.error(f"Error parsing safety output: {process.stdout}")
174
+ except Exception as e:
175
+ logger.error(f"Error running safety on {req_file}: {e}")
176
+
177
+ return {
178
+ 'status': 'success',
179
+ 'vulnerabilities': vulnerabilities,
180
+ 'vulnerability_count': len(vulnerabilities),
181
+ 'files_scanned': requirements_files,
182
+ }
183
+
184
+ def _scan_javascript_dependencies(self, repo_path):
185
+ """
186
+ Scan JavaScript/TypeScript dependencies for known vulnerabilities using npm audit.
187
+
188
+ Args:
189
+ repo_path (str): The path to the repository.
190
+
191
+ Returns:
192
+ dict: Scan results for JavaScript dependencies.
193
+ """
194
+ logger.info(f"Scanning JavaScript dependencies in {repo_path}")
195
+
196
+ # Find package.json files
197
+ package_files = []
198
+ for root, _, files in os.walk(repo_path):
199
+ if 'package.json' in files:
200
+ package_files.append(os.path.join(root, 'package.json'))
201
+
202
+ if not package_files:
203
+ return {
204
+ 'status': 'no_dependencies',
205
+ 'message': 'No JavaScript dependency files found.',
206
+ 'vulnerabilities': [],
207
+ }
208
+
209
+ vulnerabilities = []
210
+
211
+ for pkg_file in package_files:
212
+ pkg_dir = os.path.dirname(pkg_file)
213
+ try:
214
+ # Run npm audit
215
+ cmd = [
216
+ 'npm',
217
+ 'audit',
218
+ '--json',
219
+ ]
220
+
221
+ process = subprocess.run(
222
+ cmd,
223
+ stdout=subprocess.PIPE,
224
+ stderr=subprocess.PIPE,
225
+ text=True,
226
+ check=False,
227
+ cwd=pkg_dir, # Run in the directory containing package.json
228
+ )
229
+
230
+ # Parse npm audit output
231
+ if process.stdout.strip():
232
+ try:
233
+ audit_results = json.loads(process.stdout)
234
+
235
+ # Extract vulnerabilities from npm audit results
236
+ for vuln_id, vuln_info in audit_results.get('vulnerabilities', {}).items():
237
+ vulnerabilities.append({
238
+ 'package': vuln_info.get('name', ''),
239
+ 'installed_version': vuln_info.get('version', ''),
240
+ 'affected_versions': vuln_info.get('range', ''),
241
+ 'description': vuln_info.get('overview', ''),
242
+ 'severity': vuln_info.get('severity', ''),
243
+ 'file': pkg_file,
244
+ 'language': 'JavaScript',
245
+ 'cwe': vuln_info.get('cwe', ''),
246
+ 'recommendation': vuln_info.get('recommendation', ''),
247
+ })
248
+ except json.JSONDecodeError:
249
+ logger.error(f"Error parsing npm audit output: {process.stdout}")
250
+ except Exception as e:
251
+ logger.error(f"Error running npm audit on {pkg_file}: {e}")
252
+
253
+ return {
254
+ 'status': 'success',
255
+ 'vulnerabilities': vulnerabilities,
256
+ 'vulnerability_count': len(vulnerabilities),
257
+ 'files_scanned': package_files,
258
+ }
259
+
260
+ def _scan_java_dependencies(self, repo_path):
261
+ """
262
+ Scan Java dependencies for known vulnerabilities.
263
+
264
+ Args:
265
+ repo_path (str): The path to the repository.
266
+
267
+ Returns:
268
+ dict: Scan results for Java dependencies.
269
+ """
270
+ logger.info(f"Scanning Java dependencies in {repo_path}")
271
+
272
+ # Find pom.xml or build.gradle files
273
+ dependency_files = []
274
+ for root, _, files in os.walk(repo_path):
275
+ for file in files:
276
+ if file == 'pom.xml' or file == 'build.gradle':
277
+ dependency_files.append(os.path.join(root, file))
278
+
279
+ if not dependency_files:
280
+ return {
281
+ 'status': 'no_dependencies',
282
+ 'message': 'No Java dependency files found.',
283
+ 'vulnerabilities': [],
284
+ }
285
+
286
+ # For now, we'll just return a placeholder since we don't have a direct tool
287
+ # In a real implementation, you might use OWASP Dependency Check or similar
288
+ return {
289
+ 'status': 'not_implemented',
290
+ 'message': 'Java dependency scanning is not fully implemented yet.',
291
+ 'vulnerabilities': [],
292
+ 'files_scanned': dependency_files,
293
+ }
294
+
295
+ def _scan_go_dependencies(self, repo_path):
296
+ """
297
+ Scan Go dependencies for known vulnerabilities using govulncheck.
298
+
299
+ Args:
300
+ repo_path (str): The path to the repository.
301
+
302
+ Returns:
303
+ dict: Scan results for Go dependencies.
304
+ """
305
+ logger.info(f"Scanning Go dependencies in {repo_path}")
306
+
307
+ # Check if go.mod exists
308
+ go_mod_path = os.path.join(repo_path, 'go.mod')
309
+ if not os.path.exists(go_mod_path):
310
+ return {
311
+ 'status': 'no_dependencies',
312
+ 'message': 'No Go dependency files found.',
313
+ 'vulnerabilities': [],
314
+ }
315
+
316
+ try:
317
+ # Run govulncheck
318
+ cmd = [
319
+ 'govulncheck',
320
+ '-json',
321
+ './...',
322
+ ]
323
+
324
+ process = subprocess.run(
325
+ cmd,
326
+ stdout=subprocess.PIPE,
327
+ stderr=subprocess.PIPE,
328
+ text=True,
329
+ check=False,
330
+ cwd=repo_path, # Run in the repository directory
331
+ )
332
+
333
+ # Parse govulncheck output
334
+ vulnerabilities = []
335
+ if process.stdout.strip():
336
+ for line in process.stdout.splitlines():
337
+ try:
338
+ result = json.loads(line)
339
+ if 'vulnerability' in result:
340
+ vuln = result['vulnerability']
341
+ vulnerabilities.append({
342
+ 'package': vuln.get('package', ''),
343
+ 'description': vuln.get('details', ''),
344
+ 'severity': 'high', # govulncheck doesn't provide severity
345
+ 'file': go_mod_path,
346
+ 'language': 'Go',
347
+ 'cve': vuln.get('osv', {}).get('id', ''),
348
+ 'affected_versions': vuln.get('osv', {}).get('affected', ''),
349
+ })
350
+ except json.JSONDecodeError:
351
+ continue
352
+
353
+ return {
354
+ 'status': 'success',
355
+ 'vulnerabilities': vulnerabilities,
356
+ 'vulnerability_count': len(vulnerabilities),
357
+ 'files_scanned': [go_mod_path],
358
+ }
359
+
360
+ except Exception as e:
361
+ logger.error(f"Error running govulncheck: {e}")
362
+ return {
363
+ 'status': 'error',
364
+ 'error': str(e),
365
+ 'vulnerabilities': [],
366
+ }
367
+
368
+ def _scan_rust_dependencies(self, repo_path):
369
+ """
370
+ Scan Rust dependencies for known vulnerabilities using cargo-audit.
371
+
372
+ Args:
373
+ repo_path (str): The path to the repository.
374
+
375
+ Returns:
376
+ dict: Scan results for Rust dependencies.
377
+ """
378
+ logger.info(f"Scanning Rust dependencies in {repo_path}")
379
+
380
+ # Check if Cargo.toml exists
381
+ cargo_toml_path = os.path.join(repo_path, 'Cargo.toml')
382
+ if not os.path.exists(cargo_toml_path):
383
+ return {
384
+ 'status': 'no_dependencies',
385
+ 'message': 'No Rust dependency files found.',
386
+ 'vulnerabilities': [],
387
+ }
388
+
389
+ try:
390
+ # Run cargo-audit
391
+ cmd = [
392
+ 'cargo',
393
+ 'audit',
394
+ '--json',
395
+ ]
396
+
397
+ process = subprocess.run(
398
+ cmd,
399
+ stdout=subprocess.PIPE,
400
+ stderr=subprocess.PIPE,
401
+ text=True,
402
+ check=False,
403
+ cwd=repo_path, # Run in the repository directory
404
+ )
405
+
406
+ # Parse cargo-audit output
407
+ vulnerabilities = []
408
+ if process.stdout.strip():
409
+ try:
410
+ audit_results = json.loads(process.stdout)
411
+
412
+ for vuln in audit_results.get('vulnerabilities', {}).get('list', []):
413
+ vulnerabilities.append({
414
+ 'package': vuln.get('package', {}).get('name', ''),
415
+ 'installed_version': vuln.get('package', {}).get('version', ''),
416
+ 'description': vuln.get('advisory', {}).get('description', ''),
417
+ 'severity': vuln.get('advisory', {}).get('severity', ''),
418
+ 'file': cargo_toml_path,
419
+ 'language': 'Rust',
420
+ 'cve': vuln.get('advisory', {}).get('id', ''),
421
+ })
422
+ except json.JSONDecodeError:
423
+ logger.error(f"Error parsing cargo-audit output: {process.stdout}")
424
+
425
+ return {
426
+ 'status': 'success',
427
+ 'vulnerabilities': vulnerabilities,
428
+ 'vulnerability_count': len(vulnerabilities),
429
+ 'files_scanned': [cargo_toml_path],
430
+ }
431
+
432
+ except Exception as e:
433
+ logger.error(f"Error running cargo-audit: {e}")
434
+ return {
435
+ 'status': 'error',
436
+ 'error': str(e),
437
+ 'vulnerabilities': [],
438
+ }
439
+
440
+ def _scan_python(self, repo_path):
441
+ """
442
+ Scan Python code for security vulnerabilities using bandit.
443
+
444
+ Args:
445
+ repo_path (str): The path to the repository.
446
+
447
+ Returns:
448
+ dict: Scan results for Python code.
449
+ """
450
+ logger.info(f"Scanning Python code in {repo_path} for security vulnerabilities")
451
+
452
+ # Find Python files
453
+ python_files = []
454
+ for root, _, files in os.walk(repo_path):
455
+ for file in files:
456
+ if file.endswith('.py'):
457
+ python_files.append(os.path.join(root, file))
458
+
459
+ if not python_files:
460
+ return {
461
+ 'status': 'no_files',
462
+ 'message': 'No Python files found in the repository.',
463
+ 'vulnerabilities': [],
464
+ }
465
+
466
+ try:
467
+ # Run bandit
468
+ cmd = [
469
+ 'bandit',
470
+ '-r',
471
+ '-f', 'json',
472
+ repo_path,
473
+ ]
474
+
475
+ process = subprocess.run(
476
+ cmd,
477
+ stdout=subprocess.PIPE,
478
+ stderr=subprocess.PIPE,
479
+ text=True,
480
+ check=False,
481
+ )
482
+
483
+ # Parse bandit output
484
+ vulnerabilities = []
485
+ if process.stdout.strip():
486
+ try:
487
+ bandit_results = json.loads(process.stdout)
488
+
489
+ for result in bandit_results.get('results', []):
490
+ vulnerabilities.append({
491
+ 'file': result.get('filename', ''),
492
+ 'line': result.get('line_number', 0),
493
+ 'code': result.get('code', ''),
494
+ 'issue': result.get('issue_text', ''),
495
+ 'severity': result.get('issue_severity', ''),
496
+ 'confidence': result.get('issue_confidence', ''),
497
+ 'cwe': result.get('cwe', ''),
498
+ 'test_id': result.get('test_id', ''),
499
+ 'test_name': result.get('test_name', ''),
500
+ 'language': 'Python',
501
+ })
502
+ except json.JSONDecodeError:
503
+ logger.error(f"Error parsing bandit output: {process.stdout}")
504
+
505
+ # Group vulnerabilities by severity
506
+ vulns_by_severity = defaultdict(list)
507
+ for vuln in vulnerabilities:
508
+ severity = vuln.get('severity', 'unknown')
509
+ vulns_by_severity[severity].append(vuln)
510
+
511
+ return {
512
+ 'status': 'success',
513
+ 'vulnerabilities': vulnerabilities,
514
+ 'vulnerabilities_by_severity': dict(vulns_by_severity),
515
+ 'vulnerability_count': len(vulnerabilities),
516
+ 'files_scanned': len(python_files),
517
+ }
518
+
519
+ except Exception as e:
520
+ logger.error(f"Error running bandit: {e}")
521
+ return {
522
+ 'status': 'error',
523
+ 'error': str(e),
524
+ 'vulnerabilities': [],
525
+ }
526
+
527
+ def _scan_javascript(self, repo_path):
528
+ """
529
+ Scan JavaScript/TypeScript code for security vulnerabilities using NodeJSScan.
530
+
531
+ Args:
532
+ repo_path (str): The path to the repository.
533
+
534
+ Returns:
535
+ dict: Scan results for JavaScript/TypeScript code.
536
+ """
537
+ logger.info(f"Scanning JavaScript/TypeScript code in {repo_path} for security vulnerabilities")
538
+
539
+ # Find JavaScript/TypeScript files
540
+ js_files = []
541
+ for root, _, files in os.walk(repo_path):
542
+ if 'node_modules' in root:
543
+ continue
544
+ for file in files:
545
+ if file.endswith(('.js', '.jsx', '.ts', '.tsx')):
546
+ js_files.append(os.path.join(root, file))
547
+
548
+ if not js_files:
549
+ return {
550
+ 'status': 'no_files',
551
+ 'message': 'No JavaScript/TypeScript files found in the repository.',
552
+ 'vulnerabilities': [],
553
+ }
554
+
555
+ # For now, we'll use a simplified approach since NodeJSScan might not be available
556
+ # In a real implementation, you might use NodeJSScan or similar
557
+
558
+ # Create a temporary ESLint configuration file with security rules
559
+ eslint_config = {
560
+ "env": {
561
+ "browser": True,
562
+ "es2021": True,
563
+ "node": True
564
+ },
565
+ "extends": [
566
+ "eslint:recommended",
567
+ "plugin:security/recommended"
568
+ ],
569
+ "plugins": [
570
+ "security"
571
+ ],
572
+ "parserOptions": {
573
+ "ecmaVersion": 12,
574
+ "sourceType": "module",
575
+ "ecmaFeatures": {
576
+ "jsx": True
577
+ }
578
+ },
579
+ "rules": {}
580
+ }
581
+
582
+ with tempfile.NamedTemporaryFile(suffix='.json', delete=False) as temp_config:
583
+ json.dump(eslint_config, temp_config)
584
+ temp_config_path = temp_config.name
585
+
586
+ try:
587
+ # Run ESLint with security plugin
588
+ cmd = [
589
+ 'npx',
590
+ 'eslint',
591
+ '--config', temp_config_path,
592
+ '--format', 'json',
593
+ '--plugin', 'security',
594
+ ] + js_files
595
+
596
+ process = subprocess.run(
597
+ cmd,
598
+ stdout=subprocess.PIPE,
599
+ stderr=subprocess.PIPE,
600
+ text=True,
601
+ check=False,
602
+ )
603
+
604
+ # Parse ESLint output
605
+ vulnerabilities = []
606
+ if process.stdout.strip():
607
+ try:
608
+ eslint_results = json.loads(process.stdout)
609
+
610
+ for result in eslint_results:
611
+ file_path = result.get('filePath', '')
612
+ for message in result.get('messages', []):
613
+ # Only include security-related issues
614
+ rule_id = message.get('ruleId', '')
615
+ if rule_id and ('security' in rule_id or 'no-eval' in rule_id or 'no-implied-eval' in rule_id):
616
+ vulnerabilities.append({
617
+ 'file': file_path,
618
+ 'line': message.get('line', 0),
619
+ 'column': message.get('column', 0),
620
+ 'issue': message.get('message', ''),
621
+ 'severity': 'high' if message.get('severity', 0) == 2 else 'medium',
622
+ 'rule': rule_id,
623
+ 'language': 'JavaScript',
624
+ })
625
+ except json.JSONDecodeError:
626
+ logger.error(f"Error parsing ESLint output: {process.stdout}")
627
+
628
+ # Group vulnerabilities by severity
629
+ vulns_by_severity = defaultdict(list)
630
+ for vuln in vulnerabilities:
631
+ severity = vuln.get('severity', 'unknown')
632
+ vulns_by_severity[severity].append(vuln)
633
+
634
+ return {
635
+ 'status': 'success',
636
+ 'vulnerabilities': vulnerabilities,
637
+ 'vulnerabilities_by_severity': dict(vulns_by_severity),
638
+ 'vulnerability_count': len(vulnerabilities),
639
+ 'files_scanned': len(js_files),
640
+ }
641
+
642
+ except Exception as e:
643
+ logger.error(f"Error scanning JavaScript/TypeScript code: {e}")
644
+ return {
645
+ 'status': 'error',
646
+ 'error': str(e),
647
+ 'vulnerabilities': [],
648
+ }
649
+
650
+ finally:
651
+ # Clean up the temporary configuration file
652
+ if os.path.exists(temp_config_path):
653
+ os.unlink(temp_config_path)
654
+
655
+ def _scan_java(self, repo_path):
656
+ """
657
+ Scan Java code for security vulnerabilities.
658
+
659
+ Args:
660
+ repo_path (str): The path to the repository.
661
+
662
+ Returns:
663
+ dict: Scan results for Java code.
664
+ """
665
+ logger.info(f"Scanning Java code in {repo_path} for security vulnerabilities")
666
+
667
+ # Find Java files
668
+ java_files = []
669
+ for root, _, files in os.walk(repo_path):
670
+ for file in files:
671
+ if file.endswith('.java'):
672
+ java_files.append(os.path.join(root, file))
673
+
674
+ if not java_files:
675
+ return {
676
+ 'status': 'no_files',
677
+ 'message': 'No Java files found in the repository.',
678
+ 'vulnerabilities': [],
679
+ }
680
+
681
+ # For now, we'll just return a placeholder since we don't have a direct tool
682
+ # In a real implementation, you might use FindSecBugs or similar
683
+ return {
684
+ 'status': 'not_implemented',
685
+ 'message': 'Java security scanning is not fully implemented yet.',
686
+ 'vulnerabilities': [],
687
+ 'files_scanned': java_files,
688
+ }
689
+
690
+ def _scan_go(self, repo_path):
691
+ """
692
+ Scan Go code for security vulnerabilities using gosec.
693
+
694
+ Args:
695
+ repo_path (str): The path to the repository.
696
+
697
+ Returns:
698
+ dict: Scan results for Go code.
699
+ """
700
+ logger.info(f"Scanning Go code in {repo_path} for security vulnerabilities")
701
+
702
+ # Find Go files
703
+ go_files = []
704
+ for root, _, files in os.walk(repo_path):
705
+ for file in files:
706
+ if file.endswith('.go'):
707
+ go_files.append(os.path.join(root, file))
708
+
709
+ if not go_files:
710
+ return {
711
+ 'status': 'no_files',
712
+ 'message': 'No Go files found in the repository.',
713
+ 'vulnerabilities': [],
714
+ }
715
+
716
+ try:
717
+ # Run gosec
718
+ cmd = [
719
+ 'gosec',
720
+ '-fmt', 'json',
721
+ '-quiet',
722
+ './...',
723
+ ]
724
+
725
+ process = subprocess.run(
726
+ cmd,
727
+ stdout=subprocess.PIPE,
728
+ stderr=subprocess.PIPE,
729
+ text=True,
730
+ check=False,
731
+ cwd=repo_path, # Run in the repository directory
732
+ )
733
+
734
+ # Parse gosec output
735
+ vulnerabilities = []
736
+ if process.stdout.strip():
737
+ try:
738
+ gosec_results = json.loads(process.stdout)
739
+
740
+ for issue in gosec_results.get('Issues', []):
741
+ vulnerabilities.append({
742
+ 'file': issue.get('file', ''),
743
+ 'line': issue.get('line', ''),
744
+ 'code': issue.get('code', ''),
745
+ 'issue': issue.get('details', ''),
746
+ 'severity': issue.get('severity', ''),
747
+ 'confidence': issue.get('confidence', ''),
748
+ 'cwe': issue.get('cwe', {}).get('ID', ''),
749
+ 'rule_id': issue.get('rule_id', ''),
750
+ 'language': 'Go',
751
+ })
752
+ except json.JSONDecodeError:
753
+ logger.error(f"Error parsing gosec output: {process.stdout}")
754
+
755
+ # Group vulnerabilities by severity
756
+ vulns_by_severity = defaultdict(list)
757
+ for vuln in vulnerabilities:
758
+ severity = vuln.get('severity', 'unknown')
759
+ vulns_by_severity[severity].append(vuln)
760
+
761
+ return {
762
+ 'status': 'success',
763
+ 'vulnerabilities': vulnerabilities,
764
+ 'vulnerabilities_by_severity': dict(vulns_by_severity),
765
+ 'vulnerability_count': len(vulnerabilities),
766
+ 'files_scanned': len(go_files),
767
+ }
768
+
769
+ except Exception as e:
770
+ logger.error(f"Error running gosec: {e}")
771
+ return {
772
+ 'status': 'error',
773
+ 'error': str(e),
774
+ 'vulnerabilities': [],
775
+ }
776
+
777
+ def _scan_rust(self, repo_path):
778
+ """
779
+ Scan Rust code for security vulnerabilities.
780
+
781
+ Args:
782
+ repo_path (str): The path to the repository.
783
+
784
+ Returns:
785
+ dict: Scan results for Rust code.
786
+ """
787
+ logger.info(f"Scanning Rust code in {repo_path} for security vulnerabilities")
788
+
789
+ # Find Rust files
790
+ rust_files = []
791
+ for root, _, files in os.walk(repo_path):
792
+ for file in files:
793
+ if file.endswith('.rs'):
794
+ rust_files.append(os.path.join(root, file))
795
+
796
+ if not rust_files:
797
+ return {
798
+ 'status': 'no_files',
799
+ 'message': 'No Rust files found in the repository.',
800
+ 'vulnerabilities': [],
801
+ }
802
+
803
+ # For now, we'll just return a placeholder since we don't have a direct tool
804
+ # In a real implementation, you might use cargo-audit or similar for code scanning
805
+ return {
806
+ 'status': 'not_implemented',
807
+ 'message': 'Rust security scanning is not fully implemented yet.',
808
+ 'vulnerabilities': [],
809
+ 'files_scanned': rust_files,
810
+ }
src/ui/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # UI Package for Code Review Agent
src/ui/components/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # UI Components Package for Code Review Agent
src/ui/components/export_manager.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Export Manager Component
6
+
7
+ This module provides the UI component for exporting the code review results in various formats.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_export_manager():
17
+ """
18
+ Create the export manager component.
19
+
20
+ Returns:
21
+ list: A list of tuples containing (export_button, export_format).
22
+ """
23
+ export_buttons = []
24
+ export_formats = []
25
+
26
+ with gr.Group():
27
+ gr.Markdown("### 📤 Export Results")
28
+
29
+ with gr.Row():
30
+ # PDF Export
31
+ pdf_btn = gr.Button("Export as PDF", variant="secondary")
32
+ pdf_format = gr.Textbox(value="pdf", visible=False)
33
+ export_buttons.append((pdf_btn, pdf_format))
34
+ export_formats.append(pdf_format)
35
+
36
+ # JSON Export
37
+ json_btn = gr.Button("Export as JSON", variant="secondary")
38
+ json_format = gr.Textbox(value="json", visible=False)
39
+ export_buttons.append((json_btn, json_format))
40
+ export_formats.append(json_format)
41
+
42
+ # HTML Export
43
+ html_btn = gr.Button("Export as HTML", variant="secondary")
44
+ html_format = gr.Textbox(value="html", visible=False)
45
+ export_buttons.append((html_btn, html_format))
46
+ export_formats.append(html_format)
47
+
48
+ # CSV Export
49
+ csv_btn = gr.Button("Export as CSV", variant="secondary")
50
+ csv_format = gr.Textbox(value="csv", visible=False)
51
+ export_buttons.append((csv_btn, csv_format))
52
+ export_formats.append(csv_format)
53
+
54
+ return export_buttons
src/ui/components/language_selector.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Language Selector Component
6
+
7
+ This module provides the UI component for selecting programming languages to analyze.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # List of supported programming languages
16
+ SUPPORTED_LANGUAGES = [
17
+ "Python", "JavaScript", "TypeScript", "Java",
18
+ "Go", "Rust", "C++", "C#", "PHP", "Ruby",
19
+ "Swift", "Kotlin", "Scala", "R", "Shell"
20
+ ]
21
+
22
+
23
+ def create_language_selector():
24
+ """
25
+ Create the language selector component.
26
+
27
+ Returns:
28
+ gr.CheckboxGroup: The language selector component.
29
+ """
30
+ with gr.Group():
31
+ gr.Markdown("### 🔤 Languages (Optional)")
32
+
33
+ language_selector = gr.CheckboxGroup(
34
+ choices=SUPPORTED_LANGUAGES,
35
+ label="Select languages to analyze",
36
+ info="Leave empty to auto-detect languages",
37
+ value=[],
38
+ )
39
+
40
+ gr.Markdown(
41
+ "*Note: If no languages are selected, the agent will automatically detect languages in the repository.*",
42
+ elem_classes=["small-text"]
43
+ )
44
+
45
+ return language_selector
src/ui/components/progress_tracker.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Progress Tracker Component
6
+
7
+ This module provides the UI component for tracking the progress of the code review process.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_progress_tracker():
17
+ """
18
+ Create the progress tracker component.
19
+
20
+ Returns:
21
+ tuple: A tuple containing (overall_progress, status_message, step_progress_dict)
22
+ """
23
+ # Overall progress bar
24
+ overall_progress = gr.Slider(
25
+ minimum=0,
26
+ maximum=100,
27
+ value=0,
28
+ label="Overall Progress",
29
+ interactive=False,
30
+ )
31
+
32
+ # Status message
33
+ status_message = gr.Markdown(
34
+ "*Initializing...*"
35
+ )
36
+
37
+ # Detailed progress steps
38
+ steps = [
39
+ "Repository Cloning",
40
+ "Language Detection",
41
+ "Code Analysis",
42
+ "Security Scanning",
43
+ "Performance Analysis",
44
+ "AI Review",
45
+ "Report Generation"
46
+ ]
47
+
48
+ with gr.Accordion("Detailed Progress", open=False):
49
+ step_progress = {}
50
+ for step in steps:
51
+ with gr.Row(variant="panel"):
52
+ with gr.Column(scale=1, min_width=150):
53
+ gr.Markdown(f"**{step}**")
54
+ with gr.Column(scale=4):
55
+ step_progress[step] = gr.Slider(
56
+ minimum=0,
57
+ maximum=100,
58
+ value=0,
59
+ label="",
60
+ interactive=False,
61
+ scale=2
62
+ )
63
+
64
+ return overall_progress, status_message, step_progress
src/ui/components/repo_input.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Repository Input Component
6
+
7
+ This module provides the UI component for entering a GitHub repository URL.
8
+ """
9
+
10
+ import gradio as gr
11
+ import re
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def validate_github_url(url):
18
+ """
19
+ Validate that the input is a proper GitHub repository URL.
20
+
21
+ Args:
22
+ url (str): The URL to validate.
23
+
24
+ Returns:
25
+ str or None: Error message if invalid, None if valid.
26
+ """
27
+ if not url:
28
+ return None
29
+
30
+ # Basic GitHub URL pattern
31
+ pattern = r'^https?://github\.com/[\w.-]+/[\w.-]+/?$'
32
+ if not re.match(pattern, url):
33
+ return "Please enter a valid GitHub repository URL"
34
+ return None
35
+
36
+
37
+ def create_repo_input():
38
+ """
39
+ Create the repository input component.
40
+
41
+ Returns:
42
+ tuple: (repo_url, github_token, submit_btn) - The repository URL input, GitHub token input, and submit button.
43
+ """
44
+ with gr.Group():
45
+ gr.Markdown("### 📂 GitHub Repository")
46
+
47
+ repo_url = gr.Textbox(
48
+ label="Repository URL",
49
+ placeholder="https://github.com/username/repository",
50
+ info="Enter the URL of a GitHub repository",
51
+ )
52
+
53
+ github_token = gr.Textbox(
54
+ label="GitHub Token (Optional)",
55
+ placeholder="For private repositories only",
56
+ info="Required only for private repositories",
57
+ type="password",
58
+ visible=True
59
+ )
60
+
61
+ submit_btn = gr.Button(
62
+ value="Analyze Repository",
63
+ variant="primary",
64
+ scale=0,
65
+ )
66
+
67
+ # Add validation for GitHub URL format
68
+ error_box = gr.Textbox(
69
+ label="Error",
70
+ visible=True,
71
+ interactive=False,
72
+ container=False,
73
+ show_label=False
74
+ )
75
+
76
+ repo_url.change(
77
+ fn=validate_github_url,
78
+ inputs=[repo_url],
79
+ outputs=[error_box],
80
+ show_progress=False
81
+ )
82
+
83
+ return repo_url, github_token, submit_btn
src/ui/components/results_dashboard.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Results Dashboard Component
6
+
7
+ This module provides the UI component for displaying the code review results.
8
+ """
9
+
10
+ import gradio as gr
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def create_results_dashboard():
17
+ """
18
+ Create the results dashboard component.
19
+
20
+ Returns:
21
+ gr.Group: The results dashboard component group.
22
+ """
23
+ with gr.Group(visible=False) as results_group:
24
+ gr.Markdown("### 📊 Analysis Results")
25
+
26
+ # Executive Summary Tab
27
+ with gr.Tab("Executive Summary"):
28
+ with gr.Row():
29
+ with gr.Column(scale=2):
30
+ gr.Markdown("#### 📝 Overview")
31
+ summary_text = gr.Markdown("")
32
+
33
+ with gr.Column(scale=1):
34
+ gr.Markdown("#### 📈 Key Metrics")
35
+ with gr.Row():
36
+ gr.Label("Code Quality Score", value="N/A")
37
+ with gr.Row():
38
+ gr.Label("Security Score", value="N/A")
39
+ with gr.Row():
40
+ gr.Label("Performance Score", value="N/A")
41
+
42
+ # Technical Details Tab
43
+ with gr.Tab("Technical Details"):
44
+ with gr.Accordion("Repository Structure", open=True):
45
+ repo_structure = gr.Markdown("")
46
+
47
+ with gr.Accordion("Language Breakdown", open=True):
48
+ language_breakdown = gr.BarPlot(
49
+ x="Language",
50
+ y="Lines of Code",
51
+ title="Language Distribution",
52
+ tooltip=["Language", "Lines of Code"],
53
+ height=300,
54
+ )
55
+
56
+ with gr.Accordion("Code Quality Issues", open=True):
57
+ quality_issues = gr.Dataframe(
58
+ headers=["File", "Line", "Issue", "Severity", "Description"],
59
+ datatype=["str", "number", "str", "str", "str"],
60
+ row_count=10,
61
+ )
62
+
63
+ # Security Analysis Tab
64
+ with gr.Tab("Security Analysis"):
65
+ with gr.Accordion("Vulnerabilities", open=True):
66
+ vulnerabilities = gr.Dataframe(
67
+ headers=["File", "Line", "Vulnerability", "Severity", "Description", "Recommendation"],
68
+ datatype=["str", "number", "str", "str", "str", "str"],
69
+ row_count=10,
70
+ )
71
+
72
+ with gr.Accordion("Dependency Issues", open=True):
73
+ dependency_issues = gr.Dataframe(
74
+ headers=["Package", "Current Version", "Recommended Version", "Vulnerability", "Severity"],
75
+ datatype=["str", "str", "str", "str", "str"],
76
+ row_count=10,
77
+ )
78
+
79
+ # Performance Analysis Tab
80
+ with gr.Tab("Performance Analysis"):
81
+ with gr.Accordion("Performance Hotspots", open=True):
82
+ performance_hotspots = gr.Dataframe(
83
+ headers=["File", "Function", "Issue", "Impact", "Recommendation"],
84
+ datatype=["str", "str", "str", "str", "str"],
85
+ row_count=10,
86
+ )
87
+
88
+ with gr.Accordion("Resource Usage", open=True):
89
+ resource_usage = gr.BarPlot(
90
+ x="Component",
91
+ y="Usage",
92
+ title="Resource Usage",
93
+ tooltip=["Component", "Usage"],
94
+ height=300,
95
+ )
96
+
97
+ # Recommendations Tab
98
+ with gr.Tab("Recommendations"):
99
+ with gr.Accordion("High Priority", open=True):
100
+ high_priority_recs = gr.Markdown("")
101
+
102
+ with gr.Accordion("Medium Priority", open=True):
103
+ medium_priority_recs = gr.Markdown("")
104
+
105
+ with gr.Accordion("Low Priority", open=True):
106
+ low_priority_recs = gr.Markdown("")
107
+
108
+ return results_group
src/ui/gradio_app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Gradio Application for Code Review Agent
6
+
7
+ This module defines the Gradio web interface for the Code Review Agent.
8
+ It creates a professional UI with components for repository input, language selection,
9
+ progress tracking, and results display.
10
+ """
11
+
12
+ import os
13
+ import gradio as gr
14
+ import logging
15
+
16
+ from src.ui.components.repo_input import create_repo_input
17
+ from src.ui.components.language_selector import create_language_selector
18
+ from src.ui.components.progress_tracker import create_progress_tracker
19
+ from src.ui.components.results_dashboard import create_results_dashboard
20
+ from src.ui.components.export_manager import create_export_manager
21
+ from src.ui.styles.themes import get_theme
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def create_gradio_app(agent_manager):
27
+ """
28
+ Create and configure the Gradio application.
29
+
30
+ Args:
31
+ agent_manager: The AgentManager instance that handles the business logic.
32
+
33
+ Returns:
34
+ gr.Blocks: The configured Gradio application.
35
+ """
36
+ # Load custom CSS
37
+ css_path = os.path.join(os.path.dirname(__file__), 'styles', 'custom.css')
38
+ with open(css_path, 'r') as f:
39
+ custom_css = f.read()
40
+
41
+ # Create the Gradio app with custom theme
42
+ theme = get_theme()
43
+
44
+ with gr.Blocks(css=custom_css, theme=theme, title="Code Review Agent") as app:
45
+ gr.Markdown(
46
+ """
47
+ # 🔍 Professional Code Review Agent
48
+
49
+ Upload a GitHub repository URL and get comprehensive code analysis with actionable recommendations.
50
+ """
51
+ )
52
+
53
+ with gr.Row():
54
+ with gr.Column(scale=3):
55
+ # Repository input component
56
+ repo_url, github_token, submit_btn = create_repo_input()
57
+
58
+ # Language selector component
59
+ selected_languages = create_language_selector()
60
+
61
+ with gr.Column(scale=1):
62
+ # Information panel
63
+ gr.Markdown(
64
+ """
65
+ ### 📋 Features
66
+ - Multi-language support (15+ languages)
67
+ - Security vulnerability detection
68
+ - Performance analysis
69
+ - Code quality metrics
70
+ - Actionable recommendations
71
+ """
72
+ )
73
+
74
+ # Progress tracker component
75
+ with gr.Group(visible=False) as progress_group:
76
+ gr.Markdown("### ⏳ Analysis Progress")
77
+ overall_progress, status_message, step_progress = create_progress_tracker()
78
+
79
+ # Results dashboard component
80
+ results_dashboard = create_results_dashboard()
81
+
82
+ # Export options component
83
+ export_buttons = create_export_manager()
84
+
85
+ # Set up event handlers
86
+ submit_btn.click(
87
+ fn=agent_manager.start_review,
88
+ inputs=[repo_url, github_token, selected_languages],
89
+ outputs=[progress_group, overall_progress, status_message, results_dashboard]
90
+ )
91
+
92
+ for export_btn, export_format in export_buttons:
93
+ export_btn.click(
94
+ fn=agent_manager.export_report,
95
+ inputs=[results_dashboard, export_format],
96
+ outputs=[]
97
+ )
98
+
99
+ # Add WebSocket for real-time updates
100
+ app.queue()
101
+
102
+ return app
src/ui/styles/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # UI Styles Package for Code Review Agent
src/ui/styles/custom.css ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom CSS for Code Review Agent */
2
+
3
+ /* Global Styles */
4
+ body {
5
+ font-family: 'Inter', sans-serif;
6
+ }
7
+
8
+ /* Glass-morphism effect for containers */
9
+ .glass-container {
10
+ background: rgba(255, 255, 255, 0.7);
11
+ backdrop-filter: blur(10px);
12
+ border-radius: 10px;
13
+ border: 1px solid rgba(255, 255, 255, 0.2);
14
+ box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.1);
15
+ }
16
+
17
+ /* Animations */
18
+ .fade-in {
19
+ animation: fadeIn 0.5s ease-in-out;
20
+ }
21
+
22
+ @keyframes fadeIn {
23
+ from { opacity: 0; }
24
+ to { opacity: 1; }
25
+ }
26
+
27
+ /* Typography */
28
+ .small-text {
29
+ font-size: 0.8rem;
30
+ color: #6b7280;
31
+ }
32
+
33
+ /* Custom Scrollbar */
34
+ ::-webkit-scrollbar {
35
+ width: 8px;
36
+ height: 8px;
37
+ }
38
+
39
+ ::-webkit-scrollbar-track {
40
+ background: #f1f1f1;
41
+ border-radius: 4px;
42
+ }
43
+
44
+ ::-webkit-scrollbar-thumb {
45
+ background: #c1c1c1;
46
+ border-radius: 4px;
47
+ }
48
+
49
+ ::-webkit-scrollbar-thumb:hover {
50
+ background: #a1a1a1;
51
+ }
52
+
53
+ /* Progress Bar Styling */
54
+ .progress-step-complete {
55
+ color: #10b981;
56
+ font-weight: bold;
57
+ }
58
+
59
+ .progress-step-current {
60
+ color: #3b82f6;
61
+ font-weight: bold;
62
+ }
63
+
64
+ .progress-step-pending {
65
+ color: #6b7280;
66
+ }
67
+
68
+ /* Results Dashboard Styling */
69
+ .metric-card {
70
+ border-radius: 8px;
71
+ padding: 16px;
72
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
73
+ margin-bottom: 16px;
74
+ }
75
+
76
+ .metric-card-good {
77
+ background-color: rgba(16, 185, 129, 0.1);
78
+ border-left: 4px solid #10b981;
79
+ }
80
+
81
+ .metric-card-warning {
82
+ background-color: rgba(245, 158, 11, 0.1);
83
+ border-left: 4px solid #f59e0b;
84
+ }
85
+
86
+ .metric-card-critical {
87
+ background-color: rgba(239, 68, 68, 0.1);
88
+ border-left: 4px solid #ef4444;
89
+ }
90
+
91
+ /* Code Snippet Styling */
92
+ .code-snippet {
93
+ font-family: 'Fira Code', monospace;
94
+ background-color: #f8f9fa;
95
+ border-radius: 4px;
96
+ padding: 12px;
97
+ overflow-x: auto;
98
+ margin: 8px 0;
99
+ border-left: 3px solid #3b82f6;
100
+ }
101
+
102
+ .code-line-highlight {
103
+ background-color: rgba(59, 130, 246, 0.1);
104
+ display: block;
105
+ }
106
+
107
+ /* Responsive Design Adjustments */
108
+ @media (max-width: 768px) {
109
+ .hide-on-mobile {
110
+ display: none;
111
+ }
112
+
113
+ .mobile-full-width {
114
+ width: 100% !important;
115
+ }
116
+ }
src/ui/styles/themes.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ UI Themes
6
+
7
+ This module provides theme configuration for the Gradio interface.
8
+ """
9
+
10
+ import gradio as gr
11
+
12
+
13
+ def get_theme():
14
+ """
15
+ Create a custom theme for the Gradio interface.
16
+
17
+ Returns:
18
+ gr.Theme: A custom Gradio theme.
19
+ """
20
+ return gr.Theme(
21
+ primary_hue="blue",
22
+ secondary_hue="indigo",
23
+ neutral_hue="slate",
24
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]
25
+ )
tests/test_agent_manager.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Agent Manager
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Add the project root directory to the Python path
15
+ project_root = Path(__file__).resolve().parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.core.agent_manager import AgentManager
19
+
20
+
21
+ class TestAgentManager(unittest.TestCase):
22
+ """Test cases for the AgentManager class"""
23
+
24
+ def setUp(self):
25
+ """Set up test fixtures"""
26
+ # Create mock components
27
+ self.mock_progress_tracker = MagicMock()
28
+ self.mock_results_dashboard = MagicMock()
29
+
30
+ # Create the agent manager with mocked components
31
+ with patch('src.core.agent_manager.LanguageDetector'), \
32
+ patch('src.services.repository_service'), \
33
+ patch('src.services.code_analyzer.CodeAnalyzer'), \
34
+ patch('src.services.security_scanner.SecurityScanner'), \
35
+ patch('src.services.performance_analyzer.PerformanceAnalyzer'), \
36
+ patch('src.mcp.ai_review.AIReviewService'), \
37
+ patch('src.services.report_generator.ReportGenerator'):
38
+
39
+ self.agent_manager = AgentManager()
40
+
41
+ # Replace the UI components with mocks
42
+ self.agent_manager._progress_tracker = self.mock_progress_tracker
43
+ self.agent_manager._results_dashboard = self.mock_results_dashboard
44
+
45
+ @patch('src.services.repository_service.validate_github_url')
46
+ @patch('src.services.repository_service.clone_repository')
47
+ @patch('src.services.repository_service.get_repository_info')
48
+ @patch('src.core.language_detector.LanguageDetector.detect_languages')
49
+ @patch('src.core.language_detector.LanguageDetector.get_language_breakdown')
50
+ def test_start_review(self, mock_get_breakdown, mock_detect_languages,
51
+ mock_get_repo_info, mock_clone_repo, mock_validate_url):
52
+ """Test start_review method"""
53
+ # Set up the mocks
54
+ mock_validate_url.return_value = True
55
+ mock_clone_repo.return_value = "/test/repo"
56
+ mock_get_repo_info.return_value = {"branch": "main", "commit": "abc123"}
57
+ mock_detect_languages.return_value = ["Python", "JavaScript"]
58
+ mock_get_breakdown.return_value = {
59
+ "Python": {"files": 5, "lines": 500, "percentage": 70},
60
+ "JavaScript": {"files": 3, "lines": 200, "percentage": 30}
61
+ }
62
+
63
+ # Mock the analysis methods
64
+ self.agent_manager._analyze_code = MagicMock()
65
+ self.agent_manager._scan_security = MagicMock()
66
+ self.agent_manager._analyze_performance = MagicMock()
67
+ self.agent_manager._perform_ai_review = MagicMock()
68
+ self.agent_manager._generate_report = MagicMock()
69
+
70
+ # Call the method
71
+ result = self.agent_manager.start_review(
72
+ repo_url="https://github.com/user/repo",
73
+ languages=["Python", "JavaScript"],
74
+ features=["code_analysis", "security_scan", "performance_analysis", "ai_review"]
75
+ )
76
+
77
+ # Verify the result
78
+ self.assertTrue(result["success"])
79
+ self.assertEqual(result["repo_path"], "/test/repo")
80
+
81
+ # Verify the method calls
82
+ mock_validate_url.assert_called_once_with("https://github.com/user/repo")
83
+ mock_clone_repo.assert_called_once()
84
+ mock_get_repo_info.assert_called_once_with("/test/repo")
85
+ mock_detect_languages.assert_called_once_with("/test/repo")
86
+ mock_get_breakdown.assert_called_once_with("/test/repo")
87
+
88
+ # Verify the analysis method calls
89
+ self.agent_manager._analyze_code.assert_called_once()
90
+ self.agent_manager._scan_security.assert_called_once()
91
+ self.agent_manager._analyze_performance.assert_called_once()
92
+ self.agent_manager._perform_ai_review.assert_called_once()
93
+ self.agent_manager._generate_report.assert_called_once()
94
+
95
+ # Verify the progress updates
96
+ self.assertEqual(self.mock_progress_tracker.update.call_count, 8) # Initial + 7 steps
97
+
98
+ @patch('src.services.repository_service.validate_github_url')
99
+ def test_start_review_invalid_url(self, mock_validate_url):
100
+ """Test start_review method with invalid URL"""
101
+ # Set up the mock
102
+ mock_validate_url.return_value = False
103
+
104
+ # Call the method
105
+ result = self.agent_manager.start_review(
106
+ repo_url="invalid_url",
107
+ languages=["Python"],
108
+ features=["code_analysis"]
109
+ )
110
+
111
+ # Verify the result
112
+ self.assertFalse(result["success"])
113
+ self.assertIn("Invalid GitHub URL", result["error"])
114
+
115
+ @patch('src.services.repository_service.validate_github_url')
116
+ @patch('src.services.repository_service.clone_repository')
117
+ def test_start_review_clone_error(self, mock_clone_repo, mock_validate_url):
118
+ """Test start_review method with clone error"""
119
+ # Set up the mocks
120
+ mock_validate_url.return_value = True
121
+ mock_clone_repo.side_effect = Exception("Clone error")
122
+
123
+ # Call the method
124
+ result = self.agent_manager.start_review(
125
+ repo_url="https://github.com/user/repo",
126
+ languages=["Python"],
127
+ features=["code_analysis"]
128
+ )
129
+
130
+ # Verify the result
131
+ self.assertFalse(result["success"])
132
+ self.assertIn("Failed to clone repository", result["error"])
133
+
134
+ @patch('src.services.code_analyzer.CodeAnalyzer.analyze_code')
135
+ def test_analyze_code(self, mock_analyze_code):
136
+ """Test _analyze_code method"""
137
+ # Set up the mock
138
+ mock_analyze_code.return_value = {"Python": {"issues": [], "issue_count": 0}}
139
+
140
+ # Call the method
141
+ self.agent_manager._repo_path = "/test/repo"
142
+ self.agent_manager._languages = ["Python"]
143
+ self.agent_manager._results = {}
144
+
145
+ self.agent_manager._analyze_code()
146
+
147
+ # Verify the result
148
+ self.assertIn("code_analysis", self.agent_manager._results)
149
+ mock_analyze_code.assert_called_once_with("/test/repo", ["Python"])
150
+
151
+ @patch('src.services.security_scanner.SecurityScanner.scan_repository')
152
+ def test_scan_security(self, mock_scan_repo):
153
+ """Test _scan_security method"""
154
+ # Set up the mock
155
+ mock_scan_repo.return_value = {"Python": {"vulnerabilities": [], "vulnerability_count": 0}}
156
+
157
+ # Call the method
158
+ self.agent_manager._repo_path = "/test/repo"
159
+ self.agent_manager._languages = ["Python"]
160
+ self.agent_manager._results = {}
161
+
162
+ self.agent_manager._scan_security()
163
+
164
+ # Verify the result
165
+ self.assertIn("security_scan", self.agent_manager._results)
166
+ mock_scan_repo.assert_called_once_with("/test/repo", ["Python"])
167
+
168
+ @patch('src.services.performance_analyzer.PerformanceAnalyzer.analyze_repository')
169
+ def test_analyze_performance(self, mock_analyze_repo):
170
+ """Test _analyze_performance method"""
171
+ # Set up the mock
172
+ mock_analyze_repo.return_value = {
173
+ "language_results": {"Python": {"issues": [], "issue_count": 0}},
174
+ "hotspots": []
175
+ }
176
+
177
+ # Call the method
178
+ self.agent_manager._repo_path = "/test/repo"
179
+ self.agent_manager._languages = ["Python"]
180
+ self.agent_manager._results = {}
181
+
182
+ self.agent_manager._analyze_performance()
183
+
184
+ # Verify the result
185
+ self.assertIn("performance_analysis", self.agent_manager._results)
186
+ mock_analyze_repo.assert_called_once_with("/test/repo", ["Python"])
187
+
188
+ @patch('src.mcp.ai_review.AIReviewService.is_available')
189
+ @patch('src.mcp.ai_review.AIReviewService.review_repository')
190
+ def test_perform_ai_review(self, mock_review_repo, mock_is_available):
191
+ """Test _perform_ai_review method"""
192
+ # Set up the mocks
193
+ mock_is_available.return_value = True
194
+ mock_review_repo.return_value = {
195
+ "status": "success",
196
+ "reviews": {},
197
+ "summary": "AI review summary"
198
+ }
199
+
200
+ # Call the method
201
+ self.agent_manager._repo_path = "/test/repo"
202
+ self.agent_manager._languages = ["Python"]
203
+ self.agent_manager._results = {}
204
+
205
+ self.agent_manager._perform_ai_review()
206
+
207
+ # Verify the result
208
+ self.assertIn("ai_review", self.agent_manager._results)
209
+ mock_review_repo.assert_called_once()
210
+
211
+ @patch('src.mcp.ai_review.AIReviewService.is_available')
212
+ def test_perform_ai_review_unavailable(self, mock_is_available):
213
+ """Test _perform_ai_review method when AI review is unavailable"""
214
+ # Set up the mock
215
+ mock_is_available.return_value = False
216
+
217
+ # Call the method
218
+ self.agent_manager._repo_path = "/test/repo"
219
+ self.agent_manager._languages = ["Python"]
220
+ self.agent_manager._results = {}
221
+
222
+ self.agent_manager._perform_ai_review()
223
+
224
+ # Verify the result
225
+ self.assertIn("ai_review", self.agent_manager._results)
226
+ self.assertEqual(self.agent_manager._results["ai_review"]["status"], "error")
227
+ self.assertIn("AI review service is not available", self.agent_manager._results["ai_review"]["error"])
228
+
229
+ @patch('src.services.report_generator.ReportGenerator.generate_report')
230
+ def test_generate_report(self, mock_generate_report):
231
+ """Test _generate_report method"""
232
+ # Set up the mock
233
+ mock_generate_report.return_value = {
234
+ "json": "/test/reports/report.json",
235
+ "html": "/test/reports/report.html"
236
+ }
237
+
238
+ # Call the method
239
+ self.agent_manager._repo_name = "repo"
240
+ self.agent_manager._results = {"test": "data"}
241
+
242
+ self.agent_manager._generate_report()
243
+
244
+ # Verify the result
245
+ self.assertIn("report_paths", self.agent_manager._results)
246
+ mock_generate_report.assert_called_once_with("repo", {"test": "data"}, "all")
247
+
248
+ @patch('src.services.report_generator.ReportGenerator.generate_report')
249
+ def test_export_report(self, mock_generate_report):
250
+ """Test export_report method"""
251
+ # Set up the mock
252
+ mock_generate_report.return_value = {
253
+ "json": "/test/reports/report.json"
254
+ }
255
+
256
+ # Call the method
257
+ self.agent_manager._repo_name = "repo"
258
+ self.agent_manager._results = {"test": "data"}
259
+
260
+ result = self.agent_manager.export_report("json")
261
+
262
+ # Verify the result
263
+ self.assertTrue(result["success"])
264
+ self.assertEqual(result["report_path"], "/test/reports/report.json")
265
+ mock_generate_report.assert_called_once_with("repo", {"test": "data"}, "json")
266
+
267
+ @patch('src.services.report_generator.ReportGenerator.generate_report')
268
+ def test_export_report_error(self, mock_generate_report):
269
+ """Test export_report method with error"""
270
+ # Set up the mock
271
+ mock_generate_report.side_effect = Exception("Export error")
272
+
273
+ # Call the method
274
+ self.agent_manager._repo_name = "repo"
275
+ self.agent_manager._results = {"test": "data"}
276
+
277
+ result = self.agent_manager.export_report("json")
278
+
279
+ # Verify the result
280
+ self.assertFalse(result["success"])
281
+ self.assertIn("Failed to export report", result["error"])
282
+
283
+ @patch('src.services.repository_service.clone_repository')
284
+ def test_clone_repository(self, mock_clone_repo):
285
+ """Test _clone_repository method"""
286
+ # Set up the mock
287
+ mock_clone_repo.return_value = "/test/repo"
288
+
289
+ # Call the method
290
+ repo_path = self.agent_manager._clone_repository("https://github.com/user/repo")
291
+
292
+ # Verify the result
293
+ self.assertEqual(repo_path, "/test/repo")
294
+ mock_clone_repo.assert_called_once()
295
+
296
+ def test_update_progress(self):
297
+ """Test _update_progress method"""
298
+ # Call the method
299
+ self.agent_manager._update_progress("Test step", 50, "Test message")
300
+
301
+ # Verify the result
302
+ self.mock_progress_tracker.update.assert_called_once_with(
303
+ "Test step", 50, "Test message"
304
+ )
305
+
306
+ def test_create_progress_tracker(self):
307
+ """Test _create_progress_tracker method"""
308
+ # Mock the gradio components
309
+ with patch('gradio.Markdown'), patch('gradio.Slider'), patch('gradio.Accordion'), patch('gradio.Group'):
310
+ # Call the method
311
+ progress_tracker = self.agent_manager._create_progress_tracker()
312
+
313
+ # Verify the result
314
+ self.assertIsNotNone(progress_tracker)
315
+
316
+ def test_create_results_dashboard(self):
317
+ """Test _create_results_dashboard method"""
318
+ # Mock the gradio components
319
+ with patch('gradio.Markdown'), patch('gradio.Dataframe'), patch('gradio.HighlightedText'), \
320
+ patch('gradio.Code'), patch('gradio.Accordion'), patch('gradio.Tab'), patch('gradio.Tabs'), \
321
+ patch('gradio.Group'):
322
+
323
+ # Call the method
324
+ results_dashboard = self.agent_manager._create_results_dashboard()
325
+
326
+ # Verify the result
327
+ self.assertIsNotNone(results_dashboard)
328
+
329
+ def test_create_error_progress_tracker(self):
330
+ """Test _create_error_progress_tracker method"""
331
+ # Mock the gradio components
332
+ with patch('gradio.Markdown'), patch('gradio.Group'):
333
+ # Call the method
334
+ error_tracker = self.agent_manager._create_error_progress_tracker("Test error")
335
+
336
+ # Verify the result
337
+ self.assertIsNotNone(error_tracker)
338
+
339
+
340
+ if __name__ == "__main__":
341
+ unittest.main()
tests/test_ai_review.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the AI Review Service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Add the project root directory to the Python path
15
+ project_root = Path(__file__).resolve().parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.mcp.ai_review import AIReviewService
19
+
20
+
21
+ class TestAIReviewService(unittest.TestCase):
22
+ """Test cases for the AIReviewService class"""
23
+
24
+ def setUp(self):
25
+ """Set up test fixtures"""
26
+ # Mock environment variables
27
+ self.env_patcher = patch.dict('os.environ', {'ANTHROPIC_API_KEY': 'test_api_key'})
28
+ self.env_patcher.start()
29
+
30
+ # Create the service
31
+ self.service = AIReviewService()
32
+
33
+ def tearDown(self):
34
+ """Tear down test fixtures"""
35
+ self.env_patcher.stop()
36
+
37
+ def test_init(self):
38
+ """Test initialization of the service"""
39
+ self.assertIsNotNone(self.service)
40
+ self.assertEqual(self.service.api_key, 'test_api_key')
41
+ self.assertTrue(self.service.is_available())
42
+
43
+ def test_is_available(self):
44
+ """Test is_available method"""
45
+ # With API key
46
+ self.assertTrue(self.service.is_available())
47
+
48
+ # Without API key
49
+ with patch.dict('os.environ', {}, clear=True):
50
+ service = AIReviewService()
51
+ self.assertFalse(service.is_available())
52
+
53
+ @patch('anthropic.Anthropic')
54
+ def test_review_code(self, mock_anthropic):
55
+ """Test review_code method"""
56
+ # Mock the Anthropic client
57
+ mock_client = MagicMock()
58
+ mock_anthropic.return_value = mock_client
59
+
60
+ # Mock the response
61
+ mock_response = MagicMock()
62
+ mock_content = MagicMock()
63
+ mock_content.text = "# Code Review\n\n## Code Quality\nThe code is well-structured.\n\n## Potential Issues\nLine 10: Variable 'x' is not used."
64
+ mock_response.content = [mock_content]
65
+ mock_client.messages.create.return_value = mock_response
66
+
67
+ # Test the method
68
+ result = self.service.review_code(
69
+ file_path="test.py",
70
+ file_content="def test():\n x = 1\n return 2",
71
+ language="Python"
72
+ )
73
+
74
+ # Verify the result
75
+ self.assertEqual(result['status'], 'success')
76
+ self.assertEqual(result['review_text'], mock_content.text)
77
+ self.assertIn('suggestions', result)
78
+
79
+ @patch('anthropic.Anthropic')
80
+ def test_review_code_error(self, mock_anthropic):
81
+ """Test review_code method with error"""
82
+ # Mock the Anthropic client
83
+ mock_client = MagicMock()
84
+ mock_anthropic.return_value = mock_client
85
+
86
+ # Mock an error
87
+ mock_client.messages.create.side_effect = Exception("API error")
88
+
89
+ # Test the method
90
+ result = self.service.review_code(
91
+ file_path="test.py",
92
+ file_content="def test():\n return 1",
93
+ language="Python"
94
+ )
95
+
96
+ # Verify the result
97
+ self.assertEqual(result['status'], 'error')
98
+ self.assertEqual(result['error'], 'API error')
99
+ self.assertEqual(result['suggestions'], [])
100
+
101
+ def test_review_code_unavailable(self):
102
+ """Test review_code method when service is unavailable"""
103
+ # Create a service without API key
104
+ with patch.dict('os.environ', {}, clear=True):
105
+ service = AIReviewService()
106
+
107
+ # Test the method
108
+ result = service.review_code(
109
+ file_path="test.py",
110
+ file_content="def test():\n return 1",
111
+ language="Python"
112
+ )
113
+
114
+ # Verify the result
115
+ self.assertEqual(result['status'], 'error')
116
+ self.assertIn('AI review service is not available', result['error'])
117
+ self.assertEqual(result['suggestions'], [])
118
+
119
+ @patch('anthropic.Anthropic')
120
+ @patch('builtins.open', new_callable=unittest.mock.mock_open, read_data="def test():\n return 1")
121
+ def test_review_repository(self, mock_open, mock_anthropic):
122
+ """Test review_repository method"""
123
+ # Mock the Anthropic client
124
+ mock_client = MagicMock()
125
+ mock_anthropic.return_value = mock_client
126
+
127
+ # Mock the response for file review
128
+ mock_file_response = MagicMock()
129
+ mock_file_content = MagicMock()
130
+ mock_file_content.text = "# Code Review\n\n## Code Quality\nThe code is well-structured."
131
+ mock_file_response.content = [mock_file_content]
132
+
133
+ # Mock the response for repository summary
134
+ mock_summary_response = MagicMock()
135
+ mock_summary_content = MagicMock()
136
+ mock_summary_content.text = "# Repository Review\n\nOverall, the code quality is good."
137
+ mock_summary_response.content = [mock_summary_content]
138
+
139
+ # Set up the mock to return different responses
140
+ mock_client.messages.create.side_effect = [mock_file_response, mock_summary_response]
141
+
142
+ # Test the method
143
+ result = self.service.review_repository(
144
+ repo_path="/test/repo",
145
+ files=["test.py"],
146
+ languages=["Python"]
147
+ )
148
+
149
+ # Verify the result
150
+ self.assertEqual(result['status'], 'success')
151
+ self.assertIn('reviews', result)
152
+ self.assertIn('test.py', result['reviews'])
153
+ self.assertEqual(result['summary'], mock_summary_content.text)
154
+
155
+ def test_get_language_from_extension(self):
156
+ """Test _get_language_from_extension method"""
157
+ self.assertEqual(self.service._get_language_from_extension(".py"), "Python")
158
+ self.assertEqual(self.service._get_language_from_extension(".js"), "JavaScript")
159
+ self.assertEqual(self.service._get_language_from_extension(".ts"), "TypeScript")
160
+ self.assertEqual(self.service._get_language_from_extension(".java"), "Java")
161
+ self.assertEqual(self.service._get_language_from_extension(".go"), "Go")
162
+ self.assertEqual(self.service._get_language_from_extension(".rs"), "Rust")
163
+ self.assertIsNone(self.service._get_language_from_extension(".unknown"))
164
+
165
+
166
+ if __name__ == "__main__":
167
+ unittest.main()
tests/test_code_analyzer.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Code Analyzer service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.code_analyzer import CodeAnalyzer
20
+
21
+
22
+ class TestCodeAnalyzer(unittest.TestCase):
23
+ """Test cases for the CodeAnalyzer class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ self.analyzer = CodeAnalyzer()
28
+ self.test_repo_path = "/test/repo"
29
+
30
+ @patch('os.path.exists')
31
+ @patch('subprocess.run')
32
+ def test_analyze_python_code(self, mock_run, mock_exists):
33
+ """Test analyze_python_code method"""
34
+ # Set up the mocks
35
+ mock_exists.return_value = True
36
+
37
+ # Mock the subprocess.run result
38
+ mock_process = MagicMock()
39
+ mock_process.returncode = 0
40
+ mock_process.stdout = json.dumps({
41
+ "messages": [
42
+ {
43
+ "type": "convention",
44
+ "module": "test_module",
45
+ "obj": "",
46
+ "line": 10,
47
+ "column": 0,
48
+ "path": "test.py",
49
+ "symbol": "missing-docstring",
50
+ "message": "Missing module docstring",
51
+ "message-id": "C0111"
52
+ }
53
+ ]
54
+ })
55
+ mock_run.return_value = mock_process
56
+
57
+ # Mock the file discovery
58
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.py']):
59
+ # Call the method
60
+ result = self.analyzer.analyze_python_code(self.test_repo_path)
61
+
62
+ # Verify the result
63
+ self.assertEqual(len(result['issues']), 1)
64
+ self.assertEqual(result['issue_count'], 1)
65
+ self.assertEqual(result['issues'][0]['type'], 'convention')
66
+ self.assertEqual(result['issues'][0]['file'], 'test.py')
67
+ self.assertEqual(result['issues'][0]['line'], 10)
68
+ self.assertEqual(result['issues'][0]['message'], 'Missing module docstring')
69
+
70
+ @patch('os.path.exists')
71
+ @patch('subprocess.run')
72
+ def test_analyze_javascript_code(self, mock_run, mock_exists):
73
+ """Test analyze_javascript_code method"""
74
+ # Set up the mocks
75
+ mock_exists.return_value = True
76
+
77
+ # Mock the subprocess.run result
78
+ mock_process = MagicMock()
79
+ mock_process.returncode = 0
80
+ mock_process.stdout = json.dumps([
81
+ {
82
+ "filePath": "/test/repo/test.js",
83
+ "messages": [
84
+ {
85
+ "ruleId": "semi",
86
+ "severity": 2,
87
+ "message": "Missing semicolon.",
88
+ "line": 5,
89
+ "column": 20,
90
+ "nodeType": "ExpressionStatement"
91
+ }
92
+ ],
93
+ "errorCount": 1,
94
+ "warningCount": 0,
95
+ "fixableErrorCount": 1,
96
+ "fixableWarningCount": 0
97
+ }
98
+ ])
99
+ mock_run.return_value = mock_process
100
+
101
+ # Mock the file discovery
102
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.js']):
103
+ # Call the method
104
+ result = self.analyzer.analyze_javascript_code(self.test_repo_path)
105
+
106
+ # Verify the result
107
+ self.assertEqual(len(result['issues']), 1)
108
+ self.assertEqual(result['issue_count'], 1)
109
+ self.assertEqual(result['issues'][0]['type'], 'error')
110
+ self.assertEqual(result['issues'][0]['file'], 'test.js')
111
+ self.assertEqual(result['issues'][0]['line'], 5)
112
+ self.assertEqual(result['issues'][0]['message'], 'Missing semicolon.')
113
+
114
+ @patch('os.path.exists')
115
+ @patch('subprocess.run')
116
+ def test_analyze_typescript_code(self, mock_run, mock_exists):
117
+ """Test analyze_typescript_code method"""
118
+ # Set up the mocks
119
+ mock_exists.return_value = True
120
+
121
+ # Mock the subprocess.run results
122
+ # First for ESLint
123
+ eslint_process = MagicMock()
124
+ eslint_process.returncode = 0
125
+ eslint_process.stdout = json.dumps([
126
+ {
127
+ "filePath": "/test/repo/test.ts",
128
+ "messages": [
129
+ {
130
+ "ruleId": "@typescript-eslint/no-unused-vars",
131
+ "severity": 1,
132
+ "message": "'x' is defined but never used.",
133
+ "line": 3,
134
+ "column": 7,
135
+ "nodeType": "Identifier"
136
+ }
137
+ ],
138
+ "errorCount": 0,
139
+ "warningCount": 1,
140
+ "fixableErrorCount": 0,
141
+ "fixableWarningCount": 0
142
+ }
143
+ ])
144
+
145
+ # Then for TSC
146
+ tsc_process = MagicMock()
147
+ tsc_process.returncode = 2 # Error code for TypeScript compiler
148
+ tsc_process.stderr = "test.ts(10,15): error TS2339: Property 'foo' does not exist on type 'Bar'."
149
+
150
+ # Set up the mock to return different values on consecutive calls
151
+ mock_run.side_effect = [eslint_process, tsc_process]
152
+
153
+ # Mock the file discovery
154
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.ts']):
155
+ # Call the method
156
+ result = self.analyzer.analyze_typescript_code(self.test_repo_path)
157
+
158
+ # Verify the result
159
+ self.assertEqual(len(result['issues']), 2) # One from ESLint, one from TSC
160
+ self.assertEqual(result['issue_count'], 2)
161
+
162
+ # Check the ESLint issue
163
+ eslint_issue = next(issue for issue in result['issues'] if issue['source'] == 'eslint')
164
+ self.assertEqual(eslint_issue['type'], 'warning')
165
+ self.assertEqual(eslint_issue['file'], 'test.ts')
166
+ self.assertEqual(eslint_issue['line'], 3)
167
+ self.assertEqual(eslint_issue['message'], "'x' is defined but never used.")
168
+
169
+ # Check the TSC issue
170
+ tsc_issue = next(issue for issue in result['issues'] if issue['source'] == 'tsc')
171
+ self.assertEqual(tsc_issue['type'], 'error')
172
+ self.assertEqual(tsc_issue['file'], 'test.ts')
173
+ self.assertEqual(tsc_issue['line'], 10)
174
+ self.assertEqual(tsc_issue['message'], "Property 'foo' does not exist on type 'Bar'.")
175
+
176
+ @patch('os.path.exists')
177
+ @patch('subprocess.run')
178
+ def test_analyze_java_code(self, mock_run, mock_exists):
179
+ """Test analyze_java_code method"""
180
+ # Set up the mocks
181
+ mock_exists.return_value = True
182
+
183
+ # Mock the subprocess.run result
184
+ mock_process = MagicMock()
185
+ mock_process.returncode = 0
186
+ mock_process.stdout = """
187
+ <?xml version="1.0" encoding="UTF-8"?>
188
+ <pmd version="6.55.0" timestamp="2023-06-01T12:00:00.000">
189
+ <file name="/test/repo/Test.java">
190
+ <violation beginline="10" endline="10" begincolumn="5" endcolumn="20" rule="UnusedLocalVariable" ruleset="Best Practices" class="Test" method="main" variable="unusedVar" externalInfoUrl="https://pmd.github.io/pmd-6.55.0/pmd_rules_java_bestpractices.html#unusedlocalvariable" priority="3">
191
+ Avoid unused local variables such as 'unusedVar'.
192
+ </violation>
193
+ </file>
194
+ </pmd>
195
+ """
196
+ mock_run.return_value = mock_process
197
+
198
+ # Mock the file discovery
199
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/Test.java']):
200
+ # Call the method
201
+ result = self.analyzer.analyze_java_code(self.test_repo_path)
202
+
203
+ # Verify the result
204
+ self.assertEqual(len(result['issues']), 1)
205
+ self.assertEqual(result['issue_count'], 1)
206
+ self.assertEqual(result['issues'][0]['type'], 'warning') # Priority 3 maps to warning
207
+ self.assertEqual(result['issues'][0]['file'], 'Test.java')
208
+ self.assertEqual(result['issues'][0]['line'], 10)
209
+ self.assertEqual(result['issues'][0]['message'], "Avoid unused local variables such as 'unusedVar'.")
210
+
211
+ @patch('os.path.exists')
212
+ @patch('subprocess.run')
213
+ def test_analyze_go_code(self, mock_run, mock_exists):
214
+ """Test analyze_go_code method"""
215
+ # Set up the mocks
216
+ mock_exists.return_value = True
217
+
218
+ # Mock the subprocess.run result
219
+ mock_process = MagicMock()
220
+ mock_process.returncode = 0
221
+ mock_process.stdout = json.dumps({
222
+ "Issues": [
223
+ {
224
+ "FromLinter": "gosimple",
225
+ "Text": "S1000: should use a simple channel send/receive instead of select with a single case",
226
+ "Pos": {
227
+ "Filename": "test.go",
228
+ "Line": 15,
229
+ "Column": 2
230
+ },
231
+ "Severity": "warning"
232
+ }
233
+ ]
234
+ })
235
+ mock_run.return_value = mock_process
236
+
237
+ # Call the method
238
+ result = self.analyzer.analyze_go_code(self.test_repo_path)
239
+
240
+ # Verify the result
241
+ self.assertEqual(len(result['issues']), 1)
242
+ self.assertEqual(result['issue_count'], 1)
243
+ self.assertEqual(result['issues'][0]['type'], 'warning')
244
+ self.assertEqual(result['issues'][0]['file'], 'test.go')
245
+ self.assertEqual(result['issues'][0]['line'], 15)
246
+ self.assertEqual(result['issues'][0]['message'], 'S1000: should use a simple channel send/receive instead of select with a single case')
247
+
248
+ @patch('os.path.exists')
249
+ @patch('subprocess.run')
250
+ def test_analyze_rust_code(self, mock_run, mock_exists):
251
+ """Test analyze_rust_code method"""
252
+ # Set up the mocks
253
+ mock_exists.return_value = True
254
+
255
+ # Mock the subprocess.run result
256
+ mock_process = MagicMock()
257
+ mock_process.returncode = 0
258
+ mock_process.stdout = json.dumps({
259
+ "reason": "compiler-message",
260
+ "message": {
261
+ "rendered": "warning: unused variable: `x`\n --> src/main.rs:2:9\n |\n2 | let x = 5;\n | ^ help: if this is intentional, prefix it with an underscore: `_x`\n |\n = note: `#[warn(unused_variables)]` on by default\n\n",
262
+ "children": [],
263
+ "code": {
264
+ "code": "unused_variables",
265
+ "explanation": null
266
+ },
267
+ "level": "warning",
268
+ "message": "unused variable: `x`",
269
+ "spans": [
270
+ {
271
+ "byte_end": 26,
272
+ "byte_start": 25,
273
+ "column_end": 10,
274
+ "column_start": 9,
275
+ "expansion": null,
276
+ "file_name": "src/main.rs",
277
+ "is_primary": true,
278
+ "label": "help: if this is intentional, prefix it with an underscore: `_x`",
279
+ "line_end": 2,
280
+ "line_start": 2,
281
+ "suggested_replacement": "_x",
282
+ "suggestion_applicability": "MachineApplicable",
283
+ "text": [
284
+ {
285
+ "highlight_end": 10,
286
+ "highlight_start": 9,
287
+ "text": " let x = 5;"
288
+ }
289
+ ]
290
+ }
291
+ ]
292
+ }
293
+ })
294
+ mock_run.return_value = mock_process
295
+
296
+ # Call the method
297
+ result = self.analyzer.analyze_rust_code(self.test_repo_path)
298
+
299
+ # Verify the result
300
+ self.assertEqual(len(result['issues']), 1)
301
+ self.assertEqual(result['issue_count'], 1)
302
+ self.assertEqual(result['issues'][0]['type'], 'warning')
303
+ self.assertEqual(result['issues'][0]['file'], 'src/main.rs')
304
+ self.assertEqual(result['issues'][0]['line'], 2)
305
+ self.assertEqual(result['issues'][0]['message'], 'unused variable: `x`')
306
+
307
+ def test_analyze_code(self):
308
+ """Test analyze_code method"""
309
+ # Mock the language-specific analysis methods
310
+ self.analyzer.analyze_python_code = MagicMock(return_value={
311
+ 'issues': [{'type': 'convention', 'file': 'test.py', 'line': 10, 'message': 'Test issue'}],
312
+ 'issue_count': 1
313
+ })
314
+ self.analyzer.analyze_javascript_code = MagicMock(return_value={
315
+ 'issues': [{'type': 'error', 'file': 'test.js', 'line': 5, 'message': 'Test issue'}],
316
+ 'issue_count': 1
317
+ })
318
+
319
+ # Call the method
320
+ result = self.analyzer.analyze_code(self.test_repo_path, ['Python', 'JavaScript'])
321
+
322
+ # Verify the result
323
+ self.assertEqual(len(result), 2) # Two languages
324
+ self.assertIn('Python', result)
325
+ self.assertIn('JavaScript', result)
326
+ self.assertEqual(result['Python']['issue_count'], 1)
327
+ self.assertEqual(result['JavaScript']['issue_count'], 1)
328
+
329
+ # Verify the method calls
330
+ self.analyzer.analyze_python_code.assert_called_once_with(self.test_repo_path)
331
+ self.analyzer.analyze_javascript_code.assert_called_once_with(self.test_repo_path)
332
+
333
+ @patch('os.walk')
334
+ def test_find_files(self, mock_walk):
335
+ """Test _find_files method"""
336
+ # Set up the mock
337
+ mock_walk.return_value = [
338
+ ('/test/repo', ['dir1'], ['file1.py', 'file2.js']),
339
+ ('/test/repo/dir1', [], ['file3.py'])
340
+ ]
341
+
342
+ # Call the method
343
+ python_files = self.analyzer._find_files(self.test_repo_path, '.py')
344
+
345
+ # Verify the result
346
+ self.assertEqual(len(python_files), 2)
347
+ self.assertIn('/test/repo/file1.py', python_files)
348
+ self.assertIn('/test/repo/dir1/file3.py', python_files)
349
+
350
+ @patch('os.path.exists')
351
+ def test_check_tool_availability(self, mock_exists):
352
+ """Test _check_tool_availability method"""
353
+ # Set up the mock
354
+ mock_exists.side_effect = [True, False] # First tool exists, second doesn't
355
+
356
+ # Call the method
357
+ result1 = self.analyzer._check_tool_availability('tool1')
358
+ result2 = self.analyzer._check_tool_availability('tool2')
359
+
360
+ # Verify the result
361
+ self.assertTrue(result1)
362
+ self.assertFalse(result2)
363
+
364
+ @patch('subprocess.run')
365
+ def test_run_command(self, mock_run):
366
+ """Test _run_command method"""
367
+ # Set up the mock
368
+ mock_process = MagicMock()
369
+ mock_process.returncode = 0
370
+ mock_process.stdout = "Test output"
371
+ mock_run.return_value = mock_process
372
+
373
+ # Call the method
374
+ returncode, output = self.analyzer._run_command(['test', 'command'])
375
+
376
+ # Verify the result
377
+ self.assertEqual(returncode, 0)
378
+ self.assertEqual(output, "Test output")
379
+ mock_run.assert_called_once()
380
+
381
+
382
+ if __name__ == "__main__":
383
+ unittest.main()
tests/test_language_detector.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Language Detector
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Add the project root directory to the Python path
15
+ project_root = Path(__file__).resolve().parent.parent
16
+ sys.path.insert(0, str(project_root))
17
+
18
+ from src.core.language_detector import LanguageDetector
19
+
20
+
21
+ class TestLanguageDetector(unittest.TestCase):
22
+ """Test cases for the LanguageDetector class"""
23
+
24
+ def setUp(self):
25
+ """Set up test fixtures"""
26
+ self.detector = LanguageDetector()
27
+
28
+ # Create a mock repository structure
29
+ self.repo_path = "/test/repo"
30
+ self.mock_files = [
31
+ "/test/repo/main.py",
32
+ "/test/repo/utils.py",
33
+ "/test/repo/static/script.js",
34
+ "/test/repo/static/style.css",
35
+ "/test/repo/src/app.js",
36
+ "/test/repo/src/components/Button.jsx",
37
+ "/test/repo/src/components/Form.tsx",
38
+ "/test/repo/docs/index.html",
39
+ "/test/repo/README.md",
40
+ "/test/repo/package.json",
41
+ "/test/repo/Dockerfile",
42
+ "/test/repo/.gitignore"
43
+ ]
44
+
45
+ def test_get_language_from_extension(self):
46
+ """Test _get_language_from_extension method"""
47
+ # Test common extensions
48
+ self.assertEqual(self.detector._get_language_from_extension(".py"), "Python")
49
+ self.assertEqual(self.detector._get_language_from_extension(".js"), "JavaScript")
50
+ self.assertEqual(self.detector._get_language_from_extension(".jsx"), "JavaScript")
51
+ self.assertEqual(self.detector._get_language_from_extension(".ts"), "TypeScript")
52
+ self.assertEqual(self.detector._get_language_from_extension(".tsx"), "TypeScript")
53
+ self.assertEqual(self.detector._get_language_from_extension(".java"), "Java")
54
+ self.assertEqual(self.detector._get_language_from_extension(".go"), "Go")
55
+ self.assertEqual(self.detector._get_language_from_extension(".rs"), "Rust")
56
+ self.assertEqual(self.detector._get_language_from_extension(".html"), "HTML")
57
+ self.assertEqual(self.detector._get_language_from_extension(".css"), "CSS")
58
+ self.assertEqual(self.detector._get_language_from_extension(".md"), "Markdown")
59
+
60
+ # Test unknown extension
61
+ self.assertEqual(self.detector._get_language_from_extension(".unknown"), "Other")
62
+
63
+ def test_get_language_from_filename(self):
64
+ """Test _get_language_from_filename method"""
65
+ # Test common filenames
66
+ self.assertEqual(self.detector._get_language_from_filename("Dockerfile"), "Dockerfile")
67
+ self.assertEqual(self.detector._get_language_from_filename(".gitignore"), "Git")
68
+ self.assertEqual(self.detector._get_language_from_filename("package.json"), "JSON")
69
+ self.assertEqual(self.detector._get_language_from_filename("README.md"), "Markdown")
70
+
71
+ # Test unknown filename
72
+ self.assertEqual(self.detector._get_language_from_filename("unknown"), None)
73
+
74
+ @patch('os.walk')
75
+ def test_detect_languages(self, mock_walk):
76
+ """Test detect_languages method"""
77
+ # Mock os.walk to return our mock files
78
+ mock_walk.return_value = [
79
+ ("/test/repo", ["static", "src", "docs"], ["main.py", "utils.py", "README.md", "package.json", ".gitignore"]),
80
+ ("/test/repo/static", [], ["script.js", "style.css"]),
81
+ ("/test/repo/src", ["components"], ["app.js"]),
82
+ ("/test/repo/src/components", [], ["Button.jsx", "Form.tsx"]),
83
+ ("/test/repo/docs", [], ["index.html"]),
84
+ ]
85
+
86
+ # Test the method
87
+ languages = self.detector.detect_languages(self.repo_path)
88
+
89
+ # Verify the result
90
+ self.assertIn("Python", languages)
91
+ self.assertIn("JavaScript", languages)
92
+ self.assertIn("TypeScript", languages)
93
+ self.assertIn("HTML", languages)
94
+ self.assertIn("CSS", languages)
95
+ self.assertIn("Markdown", languages)
96
+ self.assertIn("JSON", languages)
97
+ self.assertIn("Git", languages)
98
+
99
+ @patch('os.walk')
100
+ @patch('builtins.open', new_callable=mock_open, read_data="line1\nline2\nline3\n")
101
+ def test_get_language_breakdown(self, mock_file, mock_walk):
102
+ """Test get_language_breakdown method"""
103
+ # Mock os.walk to return our mock files
104
+ mock_walk.return_value = [
105
+ ("/test/repo", ["static", "src"], ["main.py", "utils.py", "README.md"]),
106
+ ("/test/repo/static", [], ["script.js"]),
107
+ ("/test/repo/src", [], ["app.js"]),
108
+ ]
109
+
110
+ # Test the method
111
+ breakdown = self.detector.get_language_breakdown(self.repo_path)
112
+
113
+ # Verify the result
114
+ self.assertIn("Python", breakdown)
115
+ self.assertIn("JavaScript", breakdown)
116
+ self.assertIn("Markdown", breakdown)
117
+
118
+ # Each file has 4 lines (including the newline at the end)
119
+ self.assertEqual(breakdown["Python"]["files"], 2)
120
+ self.assertEqual(breakdown["Python"]["lines"], 8) # 2 files * 4 lines
121
+ self.assertEqual(breakdown["JavaScript"]["files"], 2)
122
+ self.assertEqual(breakdown["JavaScript"]["lines"], 8) # 2 files * 4 lines
123
+ self.assertEqual(breakdown["Markdown"]["files"], 1)
124
+ self.assertEqual(breakdown["Markdown"]["lines"], 4) # 1 file * 4 lines
125
+
126
+ # Check percentages
127
+ total_lines = 20 # 5 files * 4 lines
128
+ self.assertEqual(breakdown["Python"]["percentage"], 40) # 8/20 * 100
129
+ self.assertEqual(breakdown["JavaScript"]["percentage"], 40) # 8/20 * 100
130
+ self.assertEqual(breakdown["Markdown"]["percentage"], 20) # 4/20 * 100
131
+
132
+ @patch('os.path.isfile')
133
+ def test_is_binary_file(self, mock_isfile):
134
+ """Test _is_binary_file method"""
135
+ # Mock isfile to always return True
136
+ mock_isfile.return_value = True
137
+
138
+ # Test with text file extensions
139
+ self.assertFalse(self.detector._is_binary_file("test.py"))
140
+ self.assertFalse(self.detector._is_binary_file("test.js"))
141
+ self.assertFalse(self.detector._is_binary_file("test.html"))
142
+ self.assertFalse(self.detector._is_binary_file("test.css"))
143
+ self.assertFalse(self.detector._is_binary_file("test.md"))
144
+
145
+ # Test with binary file extensions
146
+ self.assertTrue(self.detector._is_binary_file("test.png"))
147
+ self.assertTrue(self.detector._is_binary_file("test.jpg"))
148
+ self.assertTrue(self.detector._is_binary_file("test.gif"))
149
+ self.assertTrue(self.detector._is_binary_file("test.pdf"))
150
+ self.assertTrue(self.detector._is_binary_file("test.zip"))
151
+
152
+ # Test with non-existent file
153
+ mock_isfile.return_value = False
154
+ self.assertFalse(self.detector._is_binary_file("nonexistent.py"))
155
+
156
+ @patch('os.path.isdir')
157
+ def test_should_ignore_directory(self, mock_isdir):
158
+ """Test _should_ignore_directory method"""
159
+ # Mock isdir to always return True
160
+ mock_isdir.return_value = True
161
+
162
+ # Test with common directories to ignore
163
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/node_modules"))
164
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/.git"))
165
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/__pycache__"))
166
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/venv"))
167
+ self.assertTrue(self.detector._should_ignore_directory("/test/repo/.vscode"))
168
+
169
+ # Test with directories not to ignore
170
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/src"))
171
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/app"))
172
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/docs"))
173
+
174
+ # Test with non-existent directory
175
+ mock_isdir.return_value = False
176
+ self.assertFalse(self.detector._should_ignore_directory("/test/repo/nonexistent"))
177
+
178
+ def test_should_ignore_file(self):
179
+ """Test _should_ignore_file method"""
180
+ # Test with common files to ignore
181
+ self.assertTrue(self.detector._should_ignore_file("/test/repo/.DS_Store"))
182
+ self.assertTrue(self.detector._should_ignore_file("/test/repo/Thumbs.db"))
183
+ self.assertTrue(self.detector._should_ignore_file("/test/repo/.env"))
184
+
185
+ # Test with files not to ignore
186
+ self.assertFalse(self.detector._should_ignore_file("/test/repo/main.py"))
187
+ self.assertFalse(self.detector._should_ignore_file("/test/repo/app.js"))
188
+ self.assertFalse(self.detector._should_ignore_file("/test/repo/README.md"))
189
+
190
+
191
+ if __name__ == "__main__":
192
+ unittest.main()
tests/test_performance_analyzer.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Performance Analyzer service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import re
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.performance_analyzer import PerformanceAnalyzer
20
+
21
+
22
+ class TestPerformanceAnalyzer(unittest.TestCase):
23
+ """Test cases for the PerformanceAnalyzer class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ self.analyzer = PerformanceAnalyzer()
28
+ self.test_repo_path = "/test/repo"
29
+
30
+ def test_analyze_python_performance(self):
31
+ """Test analyze_python_performance method"""
32
+ # Create a sample Python file content with performance issues
33
+ python_code = """
34
+ def slow_function():
35
+ # This is a slow list comprehension with nested loops
36
+ result = [x * y for x in range(1000) for y in range(1000)]
37
+
38
+ # Inefficient string concatenation in a loop
39
+ s = ""
40
+ for i in range(1000):
41
+ s += str(i)
42
+
43
+ # Using a list where a set would be more efficient
44
+ items = [1, 2, 3, 4, 5]
45
+ if 3 in items: # O(n) operation
46
+ print("Found")
47
+ """
48
+
49
+ # Mock the file discovery and reading
50
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.py']), \
51
+ patch('builtins.open', mock_open(read_data=python_code)):
52
+
53
+ # Call the method
54
+ result = self.analyzer.analyze_python_performance(self.test_repo_path)
55
+
56
+ # Verify the result
57
+ self.assertGreater(len(result['issues']), 0)
58
+ self.assertGreater(result['issue_count'], 0)
59
+
60
+ # Check for specific issues
61
+ nested_loop_issue = next((issue for issue in result['issues']
62
+ if 'nested loop' in issue['message'].lower()), None)
63
+ string_concat_issue = next((issue for issue in result['issues']
64
+ if 'string concatenation' in issue['message'].lower()), None)
65
+ list_vs_set_issue = next((issue for issue in result['issues']
66
+ if 'list' in issue['message'].lower() and 'set' in issue['message'].lower()), None)
67
+
68
+ self.assertIsNotNone(nested_loop_issue)
69
+ self.assertIsNotNone(string_concat_issue)
70
+ self.assertIsNotNone(list_vs_set_issue)
71
+
72
+ def test_analyze_javascript_performance(self):
73
+ """Test analyze_javascript_performance method"""
74
+ # Create a sample JavaScript file content with performance issues
75
+ js_code = """
76
+ function slowFunction() {
77
+ // Inefficient DOM manipulation in a loop
78
+ for (let i = 0; i < 1000; i++) {
79
+ document.getElementById('myElement').innerHTML += 'item ' + i;
80
+ }
81
+
82
+ // Memory leak with event listeners
83
+ document.getElementById('button').addEventListener('click', function() {
84
+ console.log('clicked');
85
+ });
86
+
87
+ // Blocking the main thread
88
+ let start = Date.now();
89
+ while (Date.now() - start < 1000) {
90
+ // Busy wait for 1 second
91
+ }
92
+ }
93
+ """
94
+
95
+ # Mock the file discovery and reading
96
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.js']), \
97
+ patch('builtins.open', mock_open(read_data=js_code)):
98
+
99
+ # Call the method
100
+ result = self.analyzer.analyze_javascript_performance(self.test_repo_path)
101
+
102
+ # Verify the result
103
+ self.assertGreater(len(result['issues']), 0)
104
+ self.assertGreater(result['issue_count'], 0)
105
+
106
+ # Check for specific issues
107
+ dom_issue = next((issue for issue in result['issues']
108
+ if 'dom' in issue['message'].lower()), None)
109
+ memory_leak_issue = next((issue for issue in result['issues']
110
+ if 'memory leak' in issue['message'].lower() or 'event listener' in issue['message'].lower()), None)
111
+ blocking_issue = next((issue for issue in result['issues']
112
+ if 'blocking' in issue['message'].lower() or 'main thread' in issue['message'].lower()), None)
113
+
114
+ self.assertIsNotNone(dom_issue)
115
+ self.assertIsNotNone(memory_leak_issue)
116
+ self.assertIsNotNone(blocking_issue)
117
+
118
+ def test_analyze_typescript_performance(self):
119
+ """Test analyze_typescript_performance method"""
120
+ # Create a sample TypeScript file content with performance issues
121
+ ts_code = """
122
+ function slowFunction(): void {
123
+ // Inefficient array operations
124
+ const array: number[] = [];
125
+ for (let i = 0; i < 1000; i++) {
126
+ array.unshift(i); // O(n) operation
127
+ }
128
+
129
+ // Excessive type casting
130
+ let value: any = "123";
131
+ let num: number = <number><any>value;
132
+
133
+ // Inefficient async/await usage
134
+ async function fetchData(): Promise<void> {
135
+ const promises = [];
136
+ for (let i = 0; i < 10; i++) {
137
+ const result = await fetch(`https://api.example.com/data/${i}`); // Sequential fetches
138
+ promises.push(result);
139
+ }
140
+ }
141
+ }
142
+ """
143
+
144
+ # Mock the file discovery and reading
145
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/test.ts']), \
146
+ patch('builtins.open', mock_open(read_data=ts_code)):
147
+
148
+ # Call the method
149
+ result = self.analyzer.analyze_typescript_performance(self.test_repo_path)
150
+
151
+ # Verify the result
152
+ self.assertGreater(len(result['issues']), 0)
153
+ self.assertGreater(result['issue_count'], 0)
154
+
155
+ # Check for specific issues
156
+ array_issue = next((issue for issue in result['issues']
157
+ if 'array' in issue['message'].lower() and 'unshift' in issue['message'].lower()), None)
158
+ type_casting_issue = next((issue for issue in result['issues']
159
+ if 'type casting' in issue['message'].lower()), None)
160
+ async_issue = next((issue for issue in result['issues']
161
+ if 'async' in issue['message'].lower() or 'await' in issue['message'].lower()), None)
162
+
163
+ self.assertIsNotNone(array_issue)
164
+ self.assertIsNotNone(type_casting_issue)
165
+ self.assertIsNotNone(async_issue)
166
+
167
+ def test_analyze_java_performance(self):
168
+ """Test analyze_java_performance method"""
169
+ # Create a sample Java file content with performance issues
170
+ java_code = """
171
+ public class SlowClass {
172
+ public void slowMethod() {
173
+ // Inefficient string concatenation
174
+ String result = "";
175
+ for (int i = 0; i < 1000; i++) {
176
+ result += i; // Creates a new string each time
177
+ }
178
+
179
+ // Using ArrayList where HashSet would be more efficient for lookups
180
+ ArrayList<Integer> list = new ArrayList<>();
181
+ for (int i = 0; i < 1000; i++) {
182
+ list.add(i);
183
+ }
184
+ boolean contains = list.contains(500); // O(n) operation
185
+
186
+ // Excessive object creation
187
+ for (int i = 0; i < 1000; i++) {
188
+ Integer obj = new Integer(i); // Creates 1000 objects
189
+ }
190
+ }
191
+ }
192
+ """
193
+
194
+ # Mock the file discovery and reading
195
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/SlowClass.java']), \
196
+ patch('builtins.open', mock_open(read_data=java_code)):
197
+
198
+ # Call the method
199
+ result = self.analyzer.analyze_java_performance(self.test_repo_path)
200
+
201
+ # Verify the result
202
+ self.assertGreater(len(result['issues']), 0)
203
+ self.assertGreater(result['issue_count'], 0)
204
+
205
+ # Check for specific issues
206
+ string_concat_issue = next((issue for issue in result['issues']
207
+ if 'string concatenation' in issue['message'].lower()), None)
208
+ collection_issue = next((issue for issue in result['issues']
209
+ if 'arraylist' in issue['message'].lower() and 'hashset' in issue['message'].lower()), None)
210
+ object_creation_issue = next((issue for issue in result['issues']
211
+ if 'object creation' in issue['message'].lower()), None)
212
+
213
+ self.assertIsNotNone(string_concat_issue)
214
+ self.assertIsNotNone(collection_issue)
215
+ self.assertIsNotNone(object_creation_issue)
216
+
217
+ def test_analyze_go_performance(self):
218
+ """Test analyze_go_performance method"""
219
+ # Create a sample Go file content with performance issues
220
+ go_code = """
221
+ package main
222
+
223
+ import (
224
+ "fmt"
225
+ "sync"
226
+ )
227
+
228
+ func slowFunction() {
229
+ // Inefficient slice operations
230
+ slice := []int{}
231
+ for i := 0; i < 1000; i++ {
232
+ slice = append(slice, i) // May cause reallocation
233
+ }
234
+
235
+ // Mutex instead of atomic operations
236
+ var mu sync.Mutex
237
+ counter := 0
238
+ for i := 0; i < 1000; i++ {
239
+ mu.Lock()
240
+ counter++
241
+ mu.Unlock()
242
+ }
243
+
244
+ // Inefficient string concatenation
245
+ result := ""
246
+ for i := 0; i < 1000; i++ {
247
+ result += fmt.Sprintf("%d", i) // Creates a new string each time
248
+ }
249
+ }
250
+ """
251
+
252
+ # Mock the file discovery and reading
253
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/main.go']), \
254
+ patch('builtins.open', mock_open(read_data=go_code)):
255
+
256
+ # Call the method
257
+ result = self.analyzer.analyze_go_performance(self.test_repo_path)
258
+
259
+ # Verify the result
260
+ self.assertGreater(len(result['issues']), 0)
261
+ self.assertGreater(result['issue_count'], 0)
262
+
263
+ # Check for specific issues
264
+ slice_issue = next((issue for issue in result['issues']
265
+ if 'slice' in issue['message'].lower() and 'append' in issue['message'].lower()), None)
266
+ mutex_issue = next((issue for issue in result['issues']
267
+ if 'mutex' in issue['message'].lower() or 'atomic' in issue['message'].lower()), None)
268
+ string_concat_issue = next((issue for issue in result['issues']
269
+ if 'string concatenation' in issue['message'].lower()), None)
270
+
271
+ self.assertIsNotNone(slice_issue)
272
+ self.assertIsNotNone(mutex_issue)
273
+ self.assertIsNotNone(string_concat_issue)
274
+
275
+ def test_analyze_rust_performance(self):
276
+ """Test analyze_rust_performance method"""
277
+ # Create a sample Rust file content with performance issues
278
+ rust_code = """
279
+ fn slow_function() {
280
+ // Inefficient string operations
281
+ let mut result = String::new();
282
+ for i in 0..1000 {
283
+ result.push_str(&i.to_string()); // Allocates a new string each time
284
+ }
285
+
286
+ // Excessive cloning
287
+ let data = vec![1, 2, 3, 4, 5];
288
+ let copied = data.clone(); // Clones the entire vector
289
+
290
+ // Inefficient iteration
291
+ let mut sum = 0;
292
+ for i in 0..data.len() {
293
+ sum += data[i]; // Bounds checking on each access
294
+ }
295
+ }
296
+ """
297
+
298
+ # Mock the file discovery and reading
299
+ with patch.object(self.analyzer, '_find_files', return_value=['/test/repo/main.rs']), \
300
+ patch('builtins.open', mock_open(read_data=rust_code)):
301
+
302
+ # Call the method
303
+ result = self.analyzer.analyze_rust_performance(self.test_repo_path)
304
+
305
+ # Verify the result
306
+ self.assertGreater(len(result['issues']), 0)
307
+ self.assertGreater(result['issue_count'], 0)
308
+
309
+ # Check for specific issues
310
+ string_issue = next((issue for issue in result['issues']
311
+ if 'string' in issue['message'].lower()), None)
312
+ clone_issue = next((issue for issue in result['issues']
313
+ if 'clone' in issue['message'].lower()), None)
314
+ iteration_issue = next((issue for issue in result['issues']
315
+ if 'iteration' in issue['message'].lower() or 'bounds checking' in issue['message'].lower()), None)
316
+
317
+ self.assertIsNotNone(string_issue)
318
+ self.assertIsNotNone(clone_issue)
319
+ self.assertIsNotNone(iteration_issue)
320
+
321
+ def test_analyze_repository(self):
322
+ """Test analyze_repository method"""
323
+ # Mock the language-specific analysis methods
324
+ self.analyzer.analyze_python_performance = MagicMock(return_value={
325
+ 'issues': [
326
+ {'file': 'file1.py', 'line': 10, 'message': 'Inefficient list comprehension'},
327
+ {'file': 'file1.py', 'line': 20, 'message': 'Inefficient string concatenation'}
328
+ ],
329
+ 'issue_count': 2
330
+ })
331
+ self.analyzer.analyze_javascript_performance = MagicMock(return_value={
332
+ 'issues': [
333
+ {'file': 'file1.js', 'line': 15, 'message': 'DOM manipulation in loop'}
334
+ ],
335
+ 'issue_count': 1
336
+ })
337
+
338
+ # Call the method
339
+ result = self.analyzer.analyze_repository(self.test_repo_path, ['Python', 'JavaScript'])
340
+
341
+ # Verify the result
342
+ self.assertEqual(len(result['language_results']), 2) # Two languages
343
+ self.assertIn('Python', result['language_results'])
344
+ self.assertIn('JavaScript', result['language_results'])
345
+ self.assertEqual(result['language_results']['Python']['issue_count'], 2)
346
+ self.assertEqual(result['language_results']['JavaScript']['issue_count'], 1)
347
+
348
+ # Check hotspots
349
+ self.assertEqual(len(result['hotspots']), 1) # One file with multiple issues
350
+ self.assertEqual(result['hotspots'][0]['file'], 'file1.py')
351
+ self.assertEqual(result['hotspots'][0]['issue_count'], 2)
352
+
353
+ # Verify the method calls
354
+ self.analyzer.analyze_python_performance.assert_called_once_with(self.test_repo_path)
355
+ self.analyzer.analyze_javascript_performance.assert_called_once_with(self.test_repo_path)
356
+
357
+ def test_identify_hotspots(self):
358
+ """Test _identify_hotspots method"""
359
+ # Create sample language results
360
+ language_results = {
361
+ 'Python': {
362
+ 'issues': [
363
+ {'file': 'file1.py', 'line': 10, 'message': 'Issue 1'},
364
+ {'file': 'file1.py', 'line': 20, 'message': 'Issue 2'},
365
+ {'file': 'file2.py', 'line': 5, 'message': 'Issue 3'}
366
+ ],
367
+ 'issue_count': 3
368
+ },
369
+ 'JavaScript': {
370
+ 'issues': [
371
+ {'file': 'file1.js', 'line': 15, 'message': 'Issue 4'},
372
+ {'file': 'file3.js', 'line': 25, 'message': 'Issue 5'},
373
+ {'file': 'file3.js', 'line': 30, 'message': 'Issue 6'}
374
+ ],
375
+ 'issue_count': 3
376
+ }
377
+ }
378
+
379
+ # Call the method
380
+ hotspots = self.analyzer._identify_hotspots(language_results)
381
+
382
+ # Verify the result
383
+ self.assertEqual(len(hotspots), 2) # Two files with multiple issues
384
+
385
+ # Find the hotspots by file
386
+ file1_py_hotspot = next((h for h in hotspots if h['file'] == 'file1.py'), None)
387
+ file3_js_hotspot = next((h for h in hotspots if h['file'] == 'file3.js'), None)
388
+
389
+ self.assertIsNotNone(file1_py_hotspot)
390
+ self.assertIsNotNone(file3_js_hotspot)
391
+ self.assertEqual(file1_py_hotspot['issue_count'], 2)
392
+ self.assertEqual(file3_js_hotspot['issue_count'], 2)
393
+
394
+ @patch('os.walk')
395
+ def test_find_files(self, mock_walk):
396
+ """Test _find_files method"""
397
+ # Set up the mock
398
+ mock_walk.return_value = [
399
+ ('/test/repo', ['dir1'], ['file1.py', 'file2.js']),
400
+ ('/test/repo/dir1', [], ['file3.py'])
401
+ ]
402
+
403
+ # Call the method
404
+ python_files = self.analyzer._find_files(self.test_repo_path, '.py')
405
+
406
+ # Verify the result
407
+ self.assertEqual(len(python_files), 2)
408
+ self.assertIn('/test/repo/file1.py', python_files)
409
+ self.assertIn('/test/repo/dir1/file3.py', python_files)
410
+
411
+ def test_analyze_file_with_patterns(self):
412
+ """Test _analyze_file_with_patterns method"""
413
+ # Create sample file content and patterns
414
+ file_content = """
415
+ def slow_function():
416
+ # This is a slow list comprehension
417
+ result = [x * y for x in range(1000) for y in range(1000)]
418
+
419
+ # Inefficient string concatenation
420
+ s = ""
421
+ for i in range(1000):
422
+ s += str(i)
423
+ """
424
+
425
+ patterns = [
426
+ (re.compile(r'\[.*for.*for.*\]', re.MULTILINE), "Nested list comprehension can be inefficient"),
427
+ (re.compile(r'\s+s\s\+=\s', re.MULTILINE), "String concatenation in a loop is inefficient")
428
+ ]
429
+
430
+ # Call the method
431
+ issues = self.analyzer._analyze_file_with_patterns('/test/repo/test.py', file_content, patterns)
432
+
433
+ # Verify the result
434
+ self.assertEqual(len(issues), 2) # Two patterns matched
435
+ self.assertEqual(issues[0]['file'], 'test.py') # Should be relative path
436
+ self.assertEqual(issues[1]['file'], 'test.py')
437
+ self.assertIn('Nested list comprehension', issues[0]['message'])
438
+ self.assertIn('String concatenation', issues[1]['message'])
439
+
440
+
441
+ if __name__ == "__main__":
442
+ unittest.main()
tests/test_report_generator.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Report Generator Service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.report_generator import ReportGenerator
20
+
21
+
22
+ class TestReportGenerator(unittest.TestCase):
23
+ """Test cases for the ReportGenerator class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ # Create a temporary output directory for testing
28
+ self.test_output_dir = "test_reports"
29
+ self.generator = ReportGenerator(output_dir=self.test_output_dir)
30
+
31
+ # Sample test data
32
+ self.repo_name = "test-repo"
33
+ self.test_results = {
34
+ "repository_info": {
35
+ "branch": "main",
36
+ "commit": "abc123",
37
+ "remote_url": "https://github.com/test/test-repo",
38
+ "size": 1024,
39
+ "file_count": 10
40
+ },
41
+ "language_breakdown": {
42
+ "Python": {"files": 5, "lines": 500, "percentage": 70},
43
+ "JavaScript": {"files": 3, "lines": 200, "percentage": 30}
44
+ },
45
+ "code_analysis": {
46
+ "Python": {
47
+ "issue_count": 3,
48
+ "issues": [
49
+ {"severity": "high", "issue": "Unused variable", "file": "test.py", "line": 10, "description": "Variable 'x' is not used"},
50
+ {"severity": "medium", "issue": "Missing docstring", "file": "test.py", "line": 5, "description": "Function missing docstring"}
51
+ ]
52
+ },
53
+ "JavaScript": {
54
+ "issue_count": 2,
55
+ "issues": [
56
+ {"severity": "medium", "issue": "Unused variable", "file": "test.js", "line": 15, "description": "Variable 'y' is not used"}
57
+ ]
58
+ }
59
+ },
60
+ "security_scan": {
61
+ "Python": {
62
+ "vulnerability_count": 1,
63
+ "vulnerabilities": [
64
+ {"severity": "critical", "issue": "SQL Injection", "file": "db.py", "line": 25, "description": "Unsanitized SQL query"}
65
+ ]
66
+ },
67
+ "JavaScript": {
68
+ "vulnerability_count": 0,
69
+ "vulnerabilities": []
70
+ }
71
+ },
72
+ "performance_analysis": {
73
+ "language_results": {
74
+ "Python": {
75
+ "issue_count": 2,
76
+ "issues": [
77
+ {"issue": "Inefficient loop", "file": "test.py", "line": 20, "description": "Use list comprehension instead"}
78
+ ]
79
+ }
80
+ },
81
+ "hotspots": [
82
+ {"file": "test.py", "language": "Python", "issue_count": 2}
83
+ ]
84
+ },
85
+ "ai_review": {
86
+ "reviews": {
87
+ "test.py": {
88
+ "status": "success",
89
+ "review_text": "Code review for test.py",
90
+ "suggestions": [
91
+ {"section": "Code Quality", "line": 10, "description": "Variable 'x' is not used", "details": "Remove unused variable"}
92
+ ]
93
+ }
94
+ },
95
+ "summary": "Overall, the code quality is good but there are some issues to address."
96
+ }
97
+ }
98
+
99
+ def tearDown(self):
100
+ """Tear down test fixtures"""
101
+ # Clean up the test output directory
102
+ if os.path.exists(self.test_output_dir):
103
+ for file in os.listdir(self.test_output_dir):
104
+ os.remove(os.path.join(self.test_output_dir, file))
105
+ os.rmdir(self.test_output_dir)
106
+
107
+ def test_init(self):
108
+ """Test initialization of the generator"""
109
+ self.assertIsNotNone(self.generator)
110
+ self.assertEqual(self.generator.output_dir, self.test_output_dir)
111
+ self.assertTrue(os.path.exists(self.test_output_dir))
112
+
113
+ @patch('builtins.open', new_callable=mock_open)
114
+ @patch('json.dump')
115
+ def test_generate_json_report(self, mock_json_dump, mock_file_open):
116
+ """Test _generate_json_report method"""
117
+ # Call the method
118
+ report_content = {"test": "content"}
119
+ report_path = self.generator._generate_json_report("test_report", report_content)
120
+
121
+ # Verify the result
122
+ expected_path = os.path.join(self.test_output_dir, "test_report.json")
123
+ self.assertEqual(report_path, expected_path)
124
+ mock_file_open.assert_called_once_with(expected_path, "w", encoding="utf-8")
125
+ mock_json_dump.assert_called_once()
126
+
127
+ @patch('builtins.open', new_callable=mock_open)
128
+ @patch('markdown.markdown')
129
+ def test_generate_html_report(self, mock_markdown, mock_file_open):
130
+ """Test _generate_html_report method"""
131
+ # Mock markdown conversion
132
+ mock_markdown.return_value = "<h1>Test</h1>"
133
+
134
+ # Call the method
135
+ report_content = {"metadata": {"repository_name": "test-repo"}}
136
+ report_path = self.generator._generate_html_report("test_report", report_content)
137
+
138
+ # Verify the result
139
+ expected_path = os.path.join(self.test_output_dir, "test_report.html")
140
+ self.assertEqual(report_path, expected_path)
141
+ mock_file_open.assert_called_once_with(expected_path, "w", encoding="utf-8")
142
+ mock_markdown.assert_called_once()
143
+
144
+ @patch('pdfkit.from_file')
145
+ @patch('os.remove')
146
+ def test_generate_pdf_report(self, mock_remove, mock_pdfkit):
147
+ """Test _generate_pdf_report method"""
148
+ # Mock the HTML report generation
149
+ with patch.object(self.generator, '_generate_html_report') as mock_html_report:
150
+ mock_html_report.return_value = os.path.join(self.test_output_dir, "test_report_temp.html")
151
+
152
+ # Call the method
153
+ report_content = {"test": "content"}
154
+ report_path = self.generator._generate_pdf_report("test_report", report_content)
155
+
156
+ # Verify the result
157
+ expected_path = os.path.join(self.test_output_dir, "test_report.pdf")
158
+ self.assertEqual(report_path, expected_path)
159
+ mock_html_report.assert_called_once_with("test_report_temp", report_content)
160
+ mock_pdfkit.assert_called_once_with(
161
+ os.path.join(self.test_output_dir, "test_report_temp.html"),
162
+ expected_path
163
+ )
164
+ mock_remove.assert_called_once_with(os.path.join(self.test_output_dir, "test_report_temp.html"))
165
+
166
+ @patch('builtins.open', new_callable=mock_open)
167
+ @patch('csv.DictWriter')
168
+ def test_generate_csv_report(self, mock_csv_writer, mock_file_open):
169
+ """Test _generate_csv_report method"""
170
+ # Mock CSV writer
171
+ mock_writer = MagicMock()
172
+ mock_csv_writer.return_value = mock_writer
173
+
174
+ # Call the method
175
+ report_content = {
176
+ "code_quality": {"issues_by_language": {}},
177
+ "security": {"vulnerabilities_by_language": {}},
178
+ "performance": {"issues_by_language": {}},
179
+ "ai_review": {"file_reviews": {}}
180
+ }
181
+ report_path = self.generator._generate_csv_report("test_report", report_content)
182
+
183
+ # Verify the result
184
+ expected_path = os.path.join(self.test_output_dir, "test_report.csv")
185
+ self.assertEqual(report_path, expected_path)
186
+ mock_file_open.assert_called_once_with(expected_path, "w", newline="", encoding="utf-8")
187
+ mock_writer.writeheader.assert_called_once()
188
+ mock_writer.writerows.assert_called_once()
189
+
190
+ def test_calculate_summary_metrics(self):
191
+ """Test _calculate_summary_metrics method"""
192
+ # Call the method
193
+ metrics = self.generator._calculate_summary_metrics(self.test_results)
194
+
195
+ # Verify the result
196
+ self.assertEqual(metrics["total_files"], 10)
197
+ self.assertEqual(metrics["repository_size"], 1024)
198
+ self.assertEqual(metrics["total_code_issues"], 5) # 3 Python + 2 JavaScript
199
+ self.assertEqual(metrics["critical_code_issues"], 1) # 1 high severity issue
200
+ self.assertEqual(metrics["total_vulnerabilities"], 1) # 1 Python vulnerability
201
+ self.assertEqual(metrics["critical_vulnerabilities"], 1) # 1 critical vulnerability
202
+ self.assertEqual(metrics["total_performance_issues"], 2) # 2 Python performance issues
203
+ self.assertEqual(metrics["performance_hotspots"], 1) # 1 hotspot
204
+ self.assertIn("overall_score", metrics)
205
+ self.assertIn("quality_rating", metrics)
206
+
207
+ def test_extract_top_issues(self):
208
+ """Test _extract_top_issues method"""
209
+ # Call the method
210
+ top_issues = self.generator._extract_top_issues(self.test_results["code_analysis"])
211
+
212
+ # Verify the result
213
+ self.assertEqual(len(top_issues), 3) # Total issues in the test data
214
+ self.assertEqual(top_issues[0]["severity"], "high") # First issue should be high severity
215
+
216
+ def test_extract_critical_vulnerabilities(self):
217
+ """Test _extract_critical_vulnerabilities method"""
218
+ # Call the method
219
+ critical_vulns = self.generator._extract_critical_vulnerabilities(self.test_results["security_scan"])
220
+
221
+ # Verify the result
222
+ self.assertEqual(len(critical_vulns), 1) # Only one vulnerability in the test data
223
+ self.assertEqual(critical_vulns[0]["severity"], "critical")
224
+
225
+ def test_generate_recommendations(self):
226
+ """Test _generate_recommendations method"""
227
+ # Call the method
228
+ recommendations = self.generator._generate_recommendations(self.test_results)
229
+
230
+ # Verify the result
231
+ self.assertIn("high_priority", recommendations)
232
+ self.assertIn("medium_priority", recommendations)
233
+ self.assertIn("low_priority", recommendations)
234
+ self.assertEqual(len(recommendations["high_priority"]), 1) # One critical security vulnerability
235
+ self.assertGreaterEqual(len(recommendations["medium_priority"]), 1) # At least one high code issue
236
+
237
+ @patch('os.path.exists')
238
+ @patch('os.listdir')
239
+ def test_generate_report(self, mock_listdir, mock_exists):
240
+ """Test generate_report method"""
241
+ # Mock the report generation methods
242
+ with patch.object(self.generator, '_create_report_content') as mock_create_content, \
243
+ patch.object(self.generator, '_generate_json_report') as mock_json_report, \
244
+ patch.object(self.generator, '_generate_html_report') as mock_html_report, \
245
+ patch.object(self.generator, '_generate_pdf_report') as mock_pdf_report, \
246
+ patch.object(self.generator, '_generate_csv_report') as mock_csv_report:
247
+
248
+ # Set up the mocks
249
+ mock_create_content.return_value = {"test": "content"}
250
+ mock_json_report.return_value = "json_path"
251
+ mock_html_report.return_value = "html_path"
252
+ mock_pdf_report.return_value = "pdf_path"
253
+ mock_csv_report.return_value = "csv_path"
254
+
255
+ # Call the method with all formats
256
+ report_paths = self.generator.generate_report(self.repo_name, self.test_results, "all")
257
+
258
+ # Verify the result
259
+ self.assertEqual(report_paths["json"], "json_path")
260
+ self.assertEqual(report_paths["html"], "html_path")
261
+ self.assertEqual(report_paths["pdf"], "pdf_path")
262
+ self.assertEqual(report_paths["csv"], "csv_path")
263
+ mock_create_content.assert_called_once_with(self.repo_name, self.test_results)
264
+
265
+ # Call the method with specific format
266
+ report_paths = self.generator.generate_report(self.repo_name, self.test_results, "json")
267
+
268
+ # Verify the result
269
+ self.assertEqual(len(report_paths), 1)
270
+ self.assertEqual(report_paths["json"], "json_path")
271
+
272
+
273
+ if __name__ == "__main__":
274
+ unittest.main()
tests/test_repository_service.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Repository Service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock
10
+ import os
11
+ import sys
12
+ import shutil
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.repository_service import (
20
+ validate_github_url,
21
+ normalize_github_url,
22
+ extract_repo_name,
23
+ clone_repository,
24
+ get_repository_info,
25
+ cleanup_repository,
26
+ cleanup_all_repositories
27
+ )
28
+
29
+
30
+ class TestRepositoryService(unittest.TestCase):
31
+ """Test cases for the repository service functions"""
32
+
33
+ def setUp(self):
34
+ """Set up test fixtures"""
35
+ self.test_repo_dir = "test_repos"
36
+ os.makedirs(self.test_repo_dir, exist_ok=True)
37
+
38
+ def tearDown(self):
39
+ """Tear down test fixtures"""
40
+ if os.path.exists(self.test_repo_dir):
41
+ shutil.rmtree(self.test_repo_dir)
42
+
43
+ def test_validate_github_url(self):
44
+ """Test validate_github_url function"""
45
+ # Valid URLs
46
+ self.assertTrue(validate_github_url("https://github.com/user/repo"))
47
+ self.assertTrue(validate_github_url("https://github.com/user/repo.git"))
48
+ self.assertTrue(validate_github_url("git@github.com:user/repo.git"))
49
+ self.assertTrue(validate_github_url("https://github.com/user/repo-with-dash"))
50
+ self.assertTrue(validate_github_url("https://github.com/user/repo_with_underscore"))
51
+
52
+ # Invalid URLs
53
+ self.assertFalse(validate_github_url("https://gitlab.com/user/repo"))
54
+ self.assertFalse(validate_github_url("https://github.com"))
55
+ self.assertFalse(validate_github_url("https://github.com/user"))
56
+ self.assertFalse(validate_github_url("not a url"))
57
+
58
+ def test_normalize_github_url(self):
59
+ """Test normalize_github_url function"""
60
+ # HTTPS URLs
61
+ self.assertEqual(
62
+ normalize_github_url("https://github.com/user/repo"),
63
+ "https://github.com/user/repo.git"
64
+ )
65
+ self.assertEqual(
66
+ normalize_github_url("https://github.com/user/repo.git"),
67
+ "https://github.com/user/repo.git"
68
+ )
69
+
70
+ # SSH URLs
71
+ self.assertEqual(
72
+ normalize_github_url("git@github.com:user/repo.git"),
73
+ "https://github.com/user/repo.git"
74
+ )
75
+ self.assertEqual(
76
+ normalize_github_url("git@github.com:user/repo"),
77
+ "https://github.com/user/repo.git"
78
+ )
79
+
80
+ # URLs with trailing slashes
81
+ self.assertEqual(
82
+ normalize_github_url("https://github.com/user/repo/"),
83
+ "https://github.com/user/repo.git"
84
+ )
85
+
86
+ # Invalid URLs should return None
87
+ self.assertIsNone(normalize_github_url("https://gitlab.com/user/repo"))
88
+ self.assertIsNone(normalize_github_url("not a url"))
89
+
90
+ def test_extract_repo_name(self):
91
+ """Test extract_repo_name function"""
92
+ self.assertEqual(extract_repo_name("https://github.com/user/repo"), "repo")
93
+ self.assertEqual(extract_repo_name("https://github.com/user/repo.git"), "repo")
94
+ self.assertEqual(extract_repo_name("git@github.com:user/repo.git"), "repo")
95
+ self.assertEqual(extract_repo_name("https://github.com/user/repo-with-dash"), "repo-with-dash")
96
+
97
+ # Invalid URLs should return None
98
+ self.assertIsNone(extract_repo_name("https://github.com"))
99
+ self.assertIsNone(extract_repo_name("not a url"))
100
+
101
+ @patch('git.Repo.clone_from')
102
+ def test_clone_repository(self, mock_clone_from):
103
+ """Test clone_repository function"""
104
+ # Mock the Git clone operation
105
+ mock_repo = MagicMock()
106
+ mock_clone_from.return_value = mock_repo
107
+
108
+ # Test with default branch
109
+ repo_path = clone_repository(
110
+ "https://github.com/user/repo",
111
+ output_dir=self.test_repo_dir
112
+ )
113
+
114
+ # Verify the result
115
+ expected_path = os.path.join(self.test_repo_dir, "repo")
116
+ self.assertEqual(repo_path, expected_path)
117
+ mock_clone_from.assert_called_once()
118
+
119
+ # Test with specific branch
120
+ mock_clone_from.reset_mock()
121
+ repo_path = clone_repository(
122
+ "https://github.com/user/repo",
123
+ branch="dev",
124
+ output_dir=self.test_repo_dir
125
+ )
126
+
127
+ # Verify the result
128
+ self.assertEqual(repo_path, expected_path)
129
+ mock_clone_from.assert_called_once()
130
+
131
+ # Test with invalid URL
132
+ with self.assertRaises(ValueError):
133
+ clone_repository(
134
+ "not a url",
135
+ output_dir=self.test_repo_dir
136
+ )
137
+
138
+ @patch('git.Repo')
139
+ @patch('os.path.getsize')
140
+ @patch('os.walk')
141
+ def test_get_repository_info(self, mock_walk, mock_getsize, mock_repo):
142
+ """Test get_repository_info function"""
143
+ # Mock the Git repository
144
+ mock_repo_instance = MagicMock()
145
+ mock_repo.return_value = mock_repo_instance
146
+
147
+ # Mock the active branch
148
+ mock_branch = MagicMock()
149
+ mock_branch.name = "main"
150
+ mock_repo_instance.active_branch = mock_branch
151
+
152
+ # Mock the head commit
153
+ mock_commit = MagicMock()
154
+ mock_commit.hexsha = "abc123"
155
+ mock_repo_instance.head.commit = mock_commit
156
+
157
+ # Mock the remote URL
158
+ mock_remote = MagicMock()
159
+ mock_remote.url = "https://github.com/user/repo.git"
160
+ mock_repo_instance.remotes.origin = mock_remote
161
+
162
+ # Mock the repository size
163
+ mock_getsize.return_value = 1024
164
+
165
+ # Mock the file count
166
+ mock_walk.return_value = [
167
+ ("/test/repo", ["dir1"], ["file1.py", "file2.py"]),
168
+ ("/test/repo/dir1", [], ["file3.py"])
169
+ ]
170
+
171
+ # Test the function
172
+ repo_info = get_repository_info("/test/repo")
173
+
174
+ # Verify the result
175
+ self.assertEqual(repo_info["branch"], "main")
176
+ self.assertEqual(repo_info["commit"], "abc123")
177
+ self.assertEqual(repo_info["remote_url"], "https://github.com/user/repo.git")
178
+ self.assertEqual(repo_info["size"], 1024)
179
+ self.assertEqual(repo_info["file_count"], 3)
180
+
181
+ @patch('shutil.rmtree')
182
+ @patch('os.path.exists')
183
+ def test_cleanup_repository(self, mock_exists, mock_rmtree):
184
+ """Test cleanup_repository function"""
185
+ # Mock the path exists check
186
+ mock_exists.return_value = True
187
+
188
+ # Test the function
189
+ cleanup_repository("/test/repo")
190
+
191
+ # Verify the result
192
+ mock_exists.assert_called_once_with("/test/repo")
193
+ mock_rmtree.assert_called_once_with("/test/repo")
194
+
195
+ # Test with non-existent path
196
+ mock_exists.reset_mock()
197
+ mock_rmtree.reset_mock()
198
+ mock_exists.return_value = False
199
+
200
+ cleanup_repository("/test/repo")
201
+
202
+ mock_exists.assert_called_once_with("/test/repo")
203
+ mock_rmtree.assert_not_called()
204
+
205
+ @patch('os.listdir')
206
+ @patch('os.path.isdir')
207
+ @patch('shutil.rmtree')
208
+ def test_cleanup_all_repositories(self, mock_rmtree, mock_isdir, mock_listdir):
209
+ """Test cleanup_all_repositories function"""
210
+ # Mock the directory listing
211
+ mock_listdir.return_value = ["repo1", "repo2", "file.txt"]
212
+
213
+ # Mock the isdir check
214
+ mock_isdir.side_effect = lambda path: path.endswith("repo1") or path.endswith("repo2")
215
+
216
+ # Test the function
217
+ cleanup_all_repositories(self.test_repo_dir)
218
+
219
+ # Verify the result
220
+ mock_listdir.assert_called_once_with(self.test_repo_dir)
221
+ self.assertEqual(mock_isdir.call_count, 3) # Called for each item in the directory
222
+ self.assertEqual(mock_rmtree.call_count, 2) # Called for each directory
223
+
224
+
225
+ if __name__ == "__main__":
226
+ unittest.main()
tests/test_security_scanner.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Unit tests for the Security Scanner service
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import patch, MagicMock, mock_open
10
+ import os
11
+ import sys
12
+ import json
13
+ from pathlib import Path
14
+
15
+ # Add the project root directory to the Python path
16
+ project_root = Path(__file__).resolve().parent.parent
17
+ sys.path.insert(0, str(project_root))
18
+
19
+ from src.services.security_scanner import SecurityScanner
20
+
21
+
22
+ class TestSecurityScanner(unittest.TestCase):
23
+ """Test cases for the SecurityScanner class"""
24
+
25
+ def setUp(self):
26
+ """Set up test fixtures"""
27
+ self.scanner = SecurityScanner()
28
+ self.test_repo_path = "/test/repo"
29
+
30
+ @patch('os.path.exists')
31
+ @patch('subprocess.run')
32
+ def test_scan_python_dependencies(self, mock_run, mock_exists):
33
+ """Test scan_python_dependencies method"""
34
+ # Set up the mocks
35
+ mock_exists.return_value = True
36
+
37
+ # Mock the requirements.txt file
38
+ with patch('builtins.open', mock_open(read_data="requests==2.25.1\ndjango==2.2.0\n")):
39
+ # Mock the subprocess.run result
40
+ mock_process = MagicMock()
41
+ mock_process.returncode = 0
42
+ mock_process.stdout = json.dumps({
43
+ "vulnerabilities": [
44
+ {
45
+ "package_name": "django",
46
+ "vulnerable_spec": "<2.2.28",
47
+ "installed_version": "2.2.0",
48
+ "description": "Django before 2.2.28 has a potential directory traversal via ../ in the file name.",
49
+ "id": "CVE-2022-34265",
50
+ "cvss_v3_score": "7.5"
51
+ }
52
+ ]
53
+ })
54
+ mock_run.return_value = mock_process
55
+
56
+ # Call the method
57
+ result = self.scanner.scan_python_dependencies(self.test_repo_path)
58
+
59
+ # Verify the result
60
+ self.assertEqual(len(result['vulnerabilities']), 1)
61
+ self.assertEqual(result['vulnerability_count'], 1)
62
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'django')
63
+ self.assertEqual(result['vulnerabilities'][0]['installed_version'], '2.2.0')
64
+ self.assertEqual(result['vulnerabilities'][0]['vulnerability_id'], 'CVE-2022-34265')
65
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high') # 7.5 maps to high
66
+
67
+ @patch('os.path.exists')
68
+ @patch('subprocess.run')
69
+ def test_scan_javascript_dependencies(self, mock_run, mock_exists):
70
+ """Test scan_javascript_dependencies method"""
71
+ # Set up the mocks
72
+ mock_exists.return_value = True
73
+
74
+ # Mock the subprocess.run result
75
+ mock_process = MagicMock()
76
+ mock_process.returncode = 0
77
+ mock_process.stdout = json.dumps({
78
+ "vulnerabilities": {
79
+ "lodash": [
80
+ {
81
+ "name": "lodash",
82
+ "severity": "high",
83
+ "via": [
84
+ {
85
+ "source": 1065,
86
+ "name": "lodash",
87
+ "dependency": "lodash",
88
+ "title": "Prototype Pollution",
89
+ "url": "https://npmjs.com/advisories/1065",
90
+ "severity": "high",
91
+ "range": "<4.17.12"
92
+ }
93
+ ],
94
+ "effects": [],
95
+ "range": "<4.17.12",
96
+ "nodes": ["node_modules/lodash"],
97
+ "fixAvailable": true
98
+ }
99
+ ]
100
+ }
101
+ })
102
+ mock_run.return_value = mock_process
103
+
104
+ # Call the method
105
+ result = self.scanner.scan_javascript_dependencies(self.test_repo_path)
106
+
107
+ # Verify the result
108
+ self.assertEqual(len(result['vulnerabilities']), 1)
109
+ self.assertEqual(result['vulnerability_count'], 1)
110
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'lodash')
111
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high')
112
+ self.assertEqual(result['vulnerabilities'][0]['title'], 'Prototype Pollution')
113
+
114
+ @patch('os.path.exists')
115
+ @patch('subprocess.run')
116
+ def test_scan_go_dependencies(self, mock_run, mock_exists):
117
+ """Test scan_go_dependencies method"""
118
+ # Set up the mocks
119
+ mock_exists.return_value = True
120
+
121
+ # Mock the subprocess.run result
122
+ mock_process = MagicMock()
123
+ mock_process.returncode = 0
124
+ mock_process.stdout = json.dumps({
125
+ "Vulns": [
126
+ {
127
+ "ID": "GO-2020-0015",
128
+ "Details": "Improper certificate validation in crypto/x509",
129
+ "Affected": [
130
+ {
131
+ "Module": {
132
+ "Path": "golang.org/x/crypto",
133
+ "Versions": [
134
+ {
135
+ "Fixed": "v0.0.0-20200221170555-0f29369cfe45"
136
+ }
137
+ ]
138
+ },
139
+ "Packages": [
140
+ {
141
+ "Path": "golang.org/x/crypto/cryptobyte",
142
+ "Symbols": ["String.ReadASN1"]
143
+ }
144
+ ]
145
+ }
146
+ ],
147
+ "References": [
148
+ {
149
+ "Type": "FIX",
150
+ "URL": "https://go.dev/cl/219877"
151
+ },
152
+ {
153
+ "Type": "REPORT",
154
+ "URL": "https://go.dev/issue/36837"
155
+ },
156
+ {
157
+ "Type": "WEB",
158
+ "URL": "https://nvd.nist.gov/vuln/detail/CVE-2020-7919"
159
+ }
160
+ ],
161
+ "Description": "Due to improper bounds checking, maliciously crafted X.509 certificates can cause a panic in certificate verification.",
162
+ "CVEs": ["CVE-2020-7919"],
163
+ "Severity": "MODERATE"
164
+ }
165
+ ]
166
+ })
167
+ mock_run.return_value = mock_process
168
+
169
+ # Call the method
170
+ result = self.scanner.scan_go_dependencies(self.test_repo_path)
171
+
172
+ # Verify the result
173
+ self.assertEqual(len(result['vulnerabilities']), 1)
174
+ self.assertEqual(result['vulnerability_count'], 1)
175
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'golang.org/x/crypto')
176
+ self.assertEqual(result['vulnerabilities'][0]['vulnerability_id'], 'GO-2020-0015')
177
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'medium') # MODERATE maps to medium
178
+
179
+ @patch('os.path.exists')
180
+ @patch('subprocess.run')
181
+ def test_scan_rust_dependencies(self, mock_run, mock_exists):
182
+ """Test scan_rust_dependencies method"""
183
+ # Set up the mocks
184
+ mock_exists.return_value = True
185
+
186
+ # Mock the subprocess.run result
187
+ mock_process = MagicMock()
188
+ mock_process.returncode = 0
189
+ mock_process.stdout = json.dumps({
190
+ "vulnerabilities": {
191
+ "RUSTSEC-2020-0071": {
192
+ "advisory": {
193
+ "id": "RUSTSEC-2020-0071",
194
+ "package": "smallvec",
195
+ "title": "Buffer overflow in SmallVec::insert_many",
196
+ "description": "Affected versions of smallvec did not properly calculate capacity when inserting multiple elements, which could result in a buffer overflow.",
197
+ "date": "2020-12-02",
198
+ "aliases": ["CVE-2021-25900"],
199
+ "categories": ["memory-corruption"],
200
+ "keywords": ["buffer-overflow", "heap-overflow"],
201
+ "cvss": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
202
+ "related": []
203
+ },
204
+ "versions": {
205
+ "patched": [">=1.6.1"],
206
+ "unaffected": ["<1.0.0"]
207
+ },
208
+ "affected": {
209
+ "arch": [],
210
+ "os": [],
211
+ "functions": ["smallvec::SmallVec::insert_many"]
212
+ }
213
+ }
214
+ },
215
+ "warnings": []
216
+ })
217
+ mock_run.return_value = mock_process
218
+
219
+ # Call the method
220
+ result = self.scanner.scan_rust_dependencies(self.test_repo_path)
221
+
222
+ # Verify the result
223
+ self.assertEqual(len(result['vulnerabilities']), 1)
224
+ self.assertEqual(result['vulnerability_count'], 1)
225
+ self.assertEqual(result['vulnerabilities'][0]['package'], 'smallvec')
226
+ self.assertEqual(result['vulnerabilities'][0]['vulnerability_id'], 'RUSTSEC-2020-0071')
227
+ self.assertEqual(result['vulnerabilities'][0]['title'], 'Buffer overflow in SmallVec::insert_many')
228
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'critical') # CVSS 9.8 maps to critical
229
+
230
+ @patch('os.path.exists')
231
+ @patch('subprocess.run')
232
+ def test_scan_python_code(self, mock_run, mock_exists):
233
+ """Test scan_python_code method"""
234
+ # Set up the mocks
235
+ mock_exists.return_value = True
236
+
237
+ # Mock the subprocess.run result
238
+ mock_process = MagicMock()
239
+ mock_process.returncode = 0
240
+ mock_process.stdout = json.dumps({
241
+ "results": [
242
+ {
243
+ "filename": "test.py",
244
+ "line_number": 42,
245
+ "issue_severity": "HIGH",
246
+ "issue_confidence": "HIGH",
247
+ "issue_text": "Possible hardcoded password: 'super_secret'",
248
+ "test_id": "B105",
249
+ "test_name": "hardcoded_password_string"
250
+ }
251
+ ]
252
+ })
253
+ mock_run.return_value = mock_process
254
+
255
+ # Mock the file discovery
256
+ with patch.object(self.scanner, '_find_files', return_value=['/test/repo/test.py']):
257
+ # Call the method
258
+ result = self.scanner.scan_python_code(self.test_repo_path)
259
+
260
+ # Verify the result
261
+ self.assertEqual(len(result['vulnerabilities']), 1)
262
+ self.assertEqual(result['vulnerability_count'], 1)
263
+ self.assertEqual(result['vulnerabilities'][0]['file'], 'test.py')
264
+ self.assertEqual(result['vulnerabilities'][0]['line'], 42)
265
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high')
266
+ self.assertEqual(result['vulnerabilities'][0]['message'], "Possible hardcoded password: 'super_secret'")
267
+
268
+ @patch('os.path.exists')
269
+ @patch('subprocess.run')
270
+ def test_scan_javascript_code(self, mock_run, mock_exists):
271
+ """Test scan_javascript_code method"""
272
+ # Set up the mocks
273
+ mock_exists.return_value = True
274
+
275
+ # Mock the subprocess.run result
276
+ mock_process = MagicMock()
277
+ mock_process.returncode = 0
278
+ mock_process.stdout = json.dumps([
279
+ {
280
+ "filePath": "/test/repo/test.js",
281
+ "messages": [
282
+ {
283
+ "ruleId": "security/detect-eval-with-expression",
284
+ "severity": 2,
285
+ "message": "eval() with variable content can allow an attacker to run arbitrary code.",
286
+ "line": 10,
287
+ "column": 1,
288
+ "nodeType": "CallExpression"
289
+ }
290
+ ],
291
+ "errorCount": 1,
292
+ "warningCount": 0,
293
+ "fixableErrorCount": 0,
294
+ "fixableWarningCount": 0
295
+ }
296
+ ])
297
+ mock_run.return_value = mock_process
298
+
299
+ # Mock the file discovery
300
+ with patch.object(self.scanner, '_find_files', return_value=['/test/repo/test.js']):
301
+ # Call the method
302
+ result = self.scanner.scan_javascript_code(self.test_repo_path)
303
+
304
+ # Verify the result
305
+ self.assertEqual(len(result['vulnerabilities']), 1)
306
+ self.assertEqual(result['vulnerability_count'], 1)
307
+ self.assertEqual(result['vulnerabilities'][0]['file'], 'test.js')
308
+ self.assertEqual(result['vulnerabilities'][0]['line'], 10)
309
+ self.assertEqual(result['vulnerabilities'][0]['severity'], 'high') # Severity 2 maps to high
310
+ self.assertEqual(result['vulnerabilities'][0]['message'], "eval() with variable content can allow an attacker to run arbitrary code.")
311
+
312
+ def test_scan_repository(self):
313
+ """Test scan_repository method"""
314
+ # Mock the language-specific scanning methods
315
+ self.scanner.scan_python_dependencies = MagicMock(return_value={
316
+ 'vulnerabilities': [{'package': 'django', 'vulnerability_id': 'CVE-2022-34265', 'severity': 'high'}],
317
+ 'vulnerability_count': 1
318
+ })
319
+ self.scanner.scan_python_code = MagicMock(return_value={
320
+ 'vulnerabilities': [{'file': 'test.py', 'line': 42, 'severity': 'high'}],
321
+ 'vulnerability_count': 1
322
+ })
323
+ self.scanner.scan_javascript_dependencies = MagicMock(return_value={
324
+ 'vulnerabilities': [{'package': 'lodash', 'severity': 'high'}],
325
+ 'vulnerability_count': 1
326
+ })
327
+ self.scanner.scan_javascript_code = MagicMock(return_value={
328
+ 'vulnerabilities': [{'file': 'test.js', 'line': 10, 'severity': 'high'}],
329
+ 'vulnerability_count': 1
330
+ })
331
+
332
+ # Call the method
333
+ result = self.scanner.scan_repository(self.test_repo_path, ['Python', 'JavaScript'])
334
+
335
+ # Verify the result
336
+ self.assertEqual(len(result), 2) # Two languages
337
+ self.assertIn('Python', result)
338
+ self.assertIn('JavaScript', result)
339
+
340
+ # Check Python results
341
+ self.assertEqual(result['Python']['dependency_vulnerabilities']['vulnerability_count'], 1)
342
+ self.assertEqual(result['Python']['code_vulnerabilities']['vulnerability_count'], 1)
343
+ self.assertEqual(result['Python']['total_vulnerabilities'], 2)
344
+
345
+ # Check JavaScript results
346
+ self.assertEqual(result['JavaScript']['dependency_vulnerabilities']['vulnerability_count'], 1)
347
+ self.assertEqual(result['JavaScript']['code_vulnerabilities']['vulnerability_count'], 1)
348
+ self.assertEqual(result['JavaScript']['total_vulnerabilities'], 2)
349
+
350
+ # Verify the method calls
351
+ self.scanner.scan_python_dependencies.assert_called_once_with(self.test_repo_path)
352
+ self.scanner.scan_python_code.assert_called_once_with(self.test_repo_path)
353
+ self.scanner.scan_javascript_dependencies.assert_called_once_with(self.test_repo_path)
354
+ self.scanner.scan_javascript_code.assert_called_once_with(self.test_repo_path)
355
+
356
+ @patch('os.walk')
357
+ def test_find_files(self, mock_walk):
358
+ """Test _find_files method"""
359
+ # Set up the mock
360
+ mock_walk.return_value = [
361
+ ('/test/repo', ['dir1'], ['file1.py', 'file2.js']),
362
+ ('/test/repo/dir1', [], ['file3.py'])
363
+ ]
364
+
365
+ # Call the method
366
+ python_files = self.scanner._find_files(self.test_repo_path, '.py')
367
+
368
+ # Verify the result
369
+ self.assertEqual(len(python_files), 2)
370
+ self.assertIn('/test/repo/file1.py', python_files)
371
+ self.assertIn('/test/repo/dir1/file3.py', python_files)
372
+
373
+ @patch('os.path.exists')
374
+ def test_check_tool_availability(self, mock_exists):
375
+ """Test _check_tool_availability method"""
376
+ # Set up the mock
377
+ mock_exists.side_effect = [True, False] # First tool exists, second doesn't
378
+
379
+ # Call the method
380
+ result1 = self.scanner._check_tool_availability('tool1')
381
+ result2 = self.scanner._check_tool_availability('tool2')
382
+
383
+ # Verify the result
384
+ self.assertTrue(result1)
385
+ self.assertFalse(result2)
386
+
387
+ @patch('subprocess.run')
388
+ def test_run_command(self, mock_run):
389
+ """Test _run_command method"""
390
+ # Set up the mock
391
+ mock_process = MagicMock()
392
+ mock_process.returncode = 0
393
+ mock_process.stdout = "Test output"
394
+ mock_run.return_value = mock_process
395
+
396
+ # Call the method
397
+ returncode, output = self.scanner._run_command(['test', 'command'])
398
+
399
+ # Verify the result
400
+ self.assertEqual(returncode, 0)
401
+ self.assertEqual(output, "Test output")
402
+ mock_run.assert_called_once()
403
+
404
+ def test_map_cvss_to_severity(self):
405
+ """Test _map_cvss_to_severity method"""
406
+ # Call the method with different CVSS scores
407
+ low = self.scanner._map_cvss_to_severity(3.5)
408
+ medium = self.scanner._map_cvss_to_severity(5.5)
409
+ high = self.scanner._map_cvss_to_severity(8.0)
410
+ critical = self.scanner._map_cvss_to_severity(9.5)
411
+
412
+ # Verify the results
413
+ self.assertEqual(low, 'low')
414
+ self.assertEqual(medium, 'medium')
415
+ self.assertEqual(high, 'high')
416
+ self.assertEqual(critical, 'critical')
417
+
418
+
419
+ if __name__ == "__main__":
420
+ unittest.main()