File size: 7,540 Bytes
90c062f
 
 
 
314cdcc
90c062f
 
 
 
 
 
 
dba755a
b485e94
90c062f
b485e94
 
90c062f
 
 
f660680
90c062f
 
62e5f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90c062f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62e5f0b
90c062f
 
b485e94
04ceab8
b485e94
dba755a
b485e94
 
 
 
 
 
 
dba755a
b485e94
90c062f
b485e94
 
 
 
 
 
 
 
 
90c062f
b485e94
 
 
90c062f
 
087c4c8
90c062f
f660680
90c062f
 
 
 
4a6dde5
8a1273c
90c062f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import json 
import gradio as gr
import os
import requests
from huggingface_hub import AsyncInferenceClient

HF_TOKEN = os.getenv('HF_TOKEN')
api_url = os.getenv('API_URL')
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
client = AsyncInferenceClient(api_url)


system_prompt = """
### Instruction:
Refactor the provided Python code to improve its maintainability and efficiency and reduce complexity. Include the refactored code along with the comments on the changes made for improving the metrics.
### Input:

"""
title = "Python Refactoring"
description = """
Please give it 4 to 5 minutes for the model to load and Run , consider using Python code with less than 120 lines of code due to GPU constrainst
"""
css = """.toast-wrap { display: none !important } """
examples=[ [""" 
import sys
import os
import someDatabaseLib

# Global variables
config = {"db": "localhost", "user": "admin", "password": "admin"}
connection = None

def dbConnect():
    global connection
    try:
        connection = someDatabaseLib.connect(config["db"], config["user"], config["password"])
    except Exception as e:
        print(e)
        sys.exit(1)

def fetchData():
    global connection
    if connection is None:
        print("Not connected to DB")
        return None
    try:
        cursor = connection.cursor()
        cursor.execute("SELECT * FROM someTable WHERE someColumn='someValue'")
        return cursor.fetchall()
    except Exception as e:
        print("Failed to fetch data: ", e)
        return None

def processData(data):
    if data is None:
        print("No data provided")
        return None
    result = []
    for row in data:
        # Processing logic here
        result.append(row)
    return result

def main():
    dbConnect()
    data = fetchData()
    if data is None:
        print("No data fetched")
        sys.exit(1)
    processedData = processData(data)
    print("Data processed")

if __name__ == "__main__":
    main()

# Additional functions and logic mixed together without clear separation or modularisation
def someOtherFunction():
    pass

# Hardcoded paths and configuration details
path_to_files = "/path/to/some/files"
for file_name in os.listdir(path_to_files):
    with open(os.path.join(path_to_files, file_name), 'r') as file:
        data = file.read()
        # Do something with the data

# Poor error handling and mixing of concerns (e.g., UI logic with business logic)
def userInterfaceFunction():
    choice = input("Enter your choice: ")
    if choice == "1":
        print("User chose 1")
        # Proceed with option 1
    elif choice == "2":
        print("User chose 2")
        # Proceed with option 2
    else:
        print("Invalid choice")

# Direct database access mixed with business logic without any abstraction layer
def directDBAccess():
    global config
    try:
        conn = someDatabaseLib.connect(config["db"], config["user"], config["password"])
        cursor = conn.cursor()
        cursor.execute("UPDATE someTable SET someColumn='newValue' WHERE anotherColumn='value'")
    except Exception as e:
        print("Database operation failed: ", e)

# Mixing of different levels of abstraction, lack of consistent error handling, and no use of classes or functions to encapsulate related operations

"""] ,
                         ["""
def analyze_sales_data(sales_records):
    active_sales = filter(lambda record: record['status'] == 'active', sales_records)
    sales_by_category = {}
    for record in active_sales:
        category = record['category']
        total_sales = record['units_sold'] * record['price_per_unit']
        if category not in sales_by_category:
            sales_by_category[category] = {'total_sales': 0, 'total_units': 0}
        sales_by_category[category]['total_sales'] += total_sales
        sales_by_category[category]['total_units'] += record['units_sold']
    average_sales_data = []
    for category, data in sales_by_category.items():
        average_sales = data['total_sales'] / data['total_units']
        sales_by_category[category]['average_sales'] = average_sales
        average_sales_data.append((category, average_sales))
    average_sales_data.sort(key=lambda x: x[1], reverse=True)
    for rank, (category, _) in enumerate(average_sales_data, start=1):
        sales_by_category[category]['rank'] = rank
    return sales_by_category
"""] , 
["""  
import pandas as pd
import re
import ast
from code_bert_score import score
import numpy as np
def preprocess_code(source_text):
   
    def remove_comments_and_docstrings(source_code):
        source_code = re.sub(r'#.*', '', source_code)
        source_code = re.sub(r'(\'\'\'(.*?)\'\'\'|\"\"\"(.*?)\"\"\")', '', source_code, flags=re.DOTALL)
        return source_code
    pattern = r"```python\s+(.+?)\s+```"
    matches = re.findall(pattern, source_text, re.DOTALL)
    code_to_process = '\n'.join(matches) if matches else source_text
    cleaned_code = remove_comments_and_docstrings(code_to_process)
    return cleaned_code
def evaluate_dataframe(df):
   
    results = {'P': [], 'R': [], 'F1': [], 'F3': []}
    for index, row in df.iterrows():
        try:
            cands = [preprocess_code(row['generated_text'])]
            refs = [preprocess_code(row['output'])]
            P, R, F1, F3 = score(cands, refs, lang='python')
            results['P'].append(P[0])
            results['R'].append(R[0])
            results['F1'].append(F1[0])
            results['F3'].append(F3[0])
        except Exception as e:
            print(f"Error processing row {index}: {e}")
            for key in results.keys():
                results[key].append(None)
    df_metrics = pd.DataFrame(results)
    return df_metrics
def evaluate_dataframe_multiple_runs(df, runs=3):
    
    all_results = []
    for run in range(runs):
        df_metrics = evaluate_dataframe(df)
        all_results.append(df_metrics)
    # Calculate mean and std deviation of metrics across runs
    df_metrics_mean = pd.concat(all_results).groupby(level=0).mean()
    df_metrics_std = pd.concat(all_results).groupby(level=0).std()
    return df_metrics_mean, df_metrics_std
""" ] ]


# Stream text - stream tokens with InferenceClient from TGI
async def predict(message, chatbot,  temperature=0.1, max_new_tokens=4096, top_p=0.6, repetition_penalty=1.15,):
    
    
    
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    
  
    input_prompt = system_prompt + str(message) + " [/INST] "

    partial_message = ""
    async for token in await client.text_generation(prompt=input_prompt, 
                                    max_new_tokens=max_new_tokens, 
                                    stream=True, 
                                    best_of=1, 
                                    temperature=temperature, 
                                    top_p=top_p, 
                                    do_sample=True, 
                                    repetition_penalty=repetition_penalty):
        partial_message = partial_message + token 
        yield partial_message
        



gr.ChatInterface(
    predict,
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(lines=10,  label="Python Code" , placeholder="Enter or Paste your Python code here..."),
    title=title,
    description=description,
    theme="abidlabs/Lime",
    examples=examples,
    cache_examples=False,
    submit_btn = "Submit_code",
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
).queue().launch()