garvit2205 commited on
Commit
ef1b4ea
·
verified ·
1 Parent(s): 35fdb19

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ from sentence_transformers import SentenceTransformer
4
+ import gradio as gr
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from groq import Groq
7
+
8
+
9
+ load_dotenv()
10
+
11
+ api = os.getenv("groq_api_key")
12
+
13
+ def create_metadata_embeddings():
14
+ student = """
15
+
16
+ Table: student
17
+ Columns:
18
+ - student_id: an integer representing the unique ID of a student.
19
+ - first_name: a string containing the first name of the student.
20
+ - last_name: a string containing the last name of the student.
21
+ - date_of_birth: a date representing the student's birthdate.
22
+ - email: a string for the student's email address.
23
+ - phone_number: a string for the student's contact number.
24
+ - major: a string representing the student's major field of study.
25
+ - year_of_enrollment: an integer for the year the student enrolled.
26
+ - course_id: an integer representing the ID of the course the student is taking (foreign key referencing course_info.course_id).
27
+ """
28
+ professor = """
29
+
30
+ Table: professor
31
+ Columns:
32
+ - professor_id: an integer representing the unique ID of a professor.
33
+ - first_name: a string containing the first name of the professor.
34
+ - last_name: a string containing the last name of the professor.
35
+ - email: a string for the professor's email address.
36
+ - department: a string for the department the professor belongs to.
37
+ - position: a string representing the professor's job title.
38
+ - salary: a float representing the professor's salary.
39
+ - date_of_joining: a date for when the professor joined the college.
40
+ """
41
+
42
+ course = """
43
+
44
+ Table: course_info
45
+ Columns:
46
+ - course_id: an integer representing the unique ID of the course.
47
+ - course_name: a string containing the course's name.
48
+ - course_code: a string for the course's unique code.
49
+ - instructor_id: an integer representing the ID of the professor teaching the course (foreign key referencing professor.professor_id).
50
+ - department: a string for the department offering the course.
51
+ - credits: an integer representing the course credits.
52
+ - semester: a string for the semester when the course is offered.
53
+ """
54
+
55
+
56
+
57
+ metadata_list = [student, professor, course]
58
+
59
+ model = SentenceTransformer('all-MiniLM-L6-v2')
60
+
61
+ embeddings = model.encode(metadata_list)
62
+
63
+ return embeddings,model,student,professor,course
64
+
65
+
66
+ def find_best_fit(embeddings,model,user_query,student,professor,course):
67
+ query_embedding = model.encode([user_query])
68
+ similarities = cosine_similarity(query_embedding, embeddings)
69
+
70
+ table_metadata=""" """
71
+ threshold=similarities[0][similarities.argmax()]*0.8
72
+ table_metadata+=str(threshold)
73
+ if(similarities[0][0]>threshold):
74
+ table_metadata+=student
75
+ if(similarities[0][1]>threshold):
76
+ table_metadata+=professor
77
+ if(similarities[0][2]>threshold):
78
+ table_metadata+=course
79
+
80
+
81
+
82
+ return table_metadata
83
+
84
+
85
+ def create_prompt(user_query,table_metadata):
86
+ system_prompt="""
87
+ You are a versatile SQL query generator capable of handling natural language queries that may involve a single table or multiple tables joined together. Your task is to accurately interpret user intent and generate valid SQL queries based on the provided metadata.
88
+
89
+ Rules:
90
+ Dynamic Table Handling: Support both single-table and multi-table queries, ensuring correctness in joins, filters, and aggregations.
91
+ Metadata Validation: Always validate the query against the provided table names, columns, and data types to ensure it is accurate and relevant.
92
+ User Intent: Precisely understand the user's requirements, such as filtering, sorting, aggregations, grouping, or joins, as expressed in natural language.
93
+ Joins: If the query involves multiple tables, use appropriate join conditions based on foreign keys or shared columns provided in the metadata.
94
+ SQL Syntax: Generate SQL queries in standard SQL syntax, ensuring compatibility with most relational database systems.
95
+
96
+ Output Format:
97
+ Output only the SQL query. Do not include any explanations, comments, or additional text.
98
+ Ensure the entire query is formatted in a single line for simplicity.
99
+
100
+ Input Format:
101
+ User Query: The natural language description of the required SQL query.
102
+ Table Metadata: The structure of the relevant tables, including table names, column names, data types, and relationships (if applicable).
103
+
104
+ Output Format:
105
+ A single-line SQL query that adheres to the rules and matches the user's intent.
106
+ You are ready to generate SQL queries based on the user input and provided table metadata.
107
+ """
108
+
109
+
110
+ user_prompt=f"""
111
+ User Query: {user_query}
112
+ Table Metadata: {table_metadata}
113
+ """
114
+
115
+ return system_prompt,user_prompt
116
+
117
+
118
+ def generate_output(system_prompt,user_prompt):
119
+ client = Groq(api_key=userdata.get('groq_api_2'),)
120
+ chat_completion = client.chat.completions.create(messages=[
121
+ {"role": "system", "content": system_prompt},
122
+ {"role": "user","content": user_prompt,}],model="llama3-70b-8192",)
123
+ res = chat_completion.choices[0].message.content
124
+
125
+ select=res[0:6].lower()
126
+ if(select=="select"):
127
+ output=res
128
+ else:
129
+ output="Can't perform the task at the moment."
130
+
131
+
132
+ return output
133
+
134
+ def response(user_query):
135
+ embeddings,model,student,professor,course=create_metadata_embeddings()
136
+
137
+ table_metadata=find_best_fit(embeddings,model,user_query,student,professor,course)
138
+
139
+ system_prompt,user_prompt=create_prompt(user_query,table_metadata)
140
+
141
+ output=generate_output(system_prompt,user_prompt)
142
+
143
+ return output
144
+
145
+
146
+ desc="""
147
+ There are three tables in the database:
148
+
149
+ Student Table:
150
+ This table contains information about students, including the student's unique ID, first name, last name, date of birth, email address, phone number, major field of study, year of enrollment, and the ID of the course they are enrolled in. The course_id serves as a foreign key referencing the course_info table, linking each student to a single course.
151
+
152
+ Professor Table:
153
+ This table includes details about professors, such as the professor's unique ID, first name, last name, email address, department, job position, salary, and date of joining.
154
+
155
+ Course Info Table:
156
+ This table stores details about courses, including the course's unique ID, name, course code, instructor ID, department offering the course, number of credits, and the semester in which the course is offered. The instructor_id is a foreign key referencing the professor table, associating each course with the professor who teaches it.
157
+
158
+ """
159
+
160
+
161
+ demo = gr.Interface(
162
+ fn=response,
163
+ inputs=gr.Textbox(label="Please provide the natural language query"),
164
+ outputs=gr.Textbox(label="SQL Query"),
165
+ title="SQL Query generator",
166
+ description=desc
167
+ )
168
+
169
+ demo.launch(share="True")