import pandas as pd import copy import os import gradio as gr from collections import Counter import random import re from datetime import date from websockets import asyncio import supabase import json ###### OG FUNCTIONS TO GENERATE SCHEDULES ###### # CONSTANTS NAME_COL = 'Juggler_Name' NUM_WORKSHOPS_COL = 'Num_Workshops' AVAIL_COL = 'Availability' DESCRIP_COL = 'Workshop_Descriptions' DELIMITER = ';' class Schedule: def __init__(self, timeslots: dict): self.num_timeslots_filled = 0 self.total_num_workshops = 0 for time,instructors in timeslots.items(): curr_len = len(instructors) if curr_len > 0: self.num_timeslots_filled += 1 self.total_num_workshops += curr_len self.timeslots = timeslots def add(self, person: str, time: str): self.total_num_workshops += 1 if len(self.timeslots[time]) == 0: self.num_timeslots_filled += 1 self.timeslots[time].append(person) def remove(self, person: str, time: str): self.total_num_workshops -= 1 if len(self.timeslots[time]) == 1: self.num_timeslots_filled -= 1 self.timeslots[time].remove(person) def print(self): print(f"# timeslots filled: {self.num_timeslots_filled}") print(f"# workshops: {self.total_num_workshops}") for time,instructors in self.timeslots.items(): print(f"{time}: {', '.join(instructors)}") # Returns True if the person can teach during the slot, and False otherwise def can_teach(person: str, slot: list, capacity: int) -> bool: if len(slot) == capacity or len(slot) > capacity: return False # No one can teach two workshops at once if person in slot: return False return True # Extracts relevant information from the df with availability and puts it into a useable format def convert_df(df, num_timeslots: int): # Key: person's name # Value: a list of their availability availability = {} # Key: person's name # Value: how many workshops they want to teach pref_dict = {} # Instructors who can teach anytime completely_available = [] for row in range(len(df)): name = df.loc[row, NAME_COL] curr_avail = df.loc[row, AVAIL_COL] curr_avail = curr_avail.split(DELIMITER) if len(curr_avail) == num_timeslots: completely_available.append(name) else: curr_avail = [elem.strip() for elem in curr_avail] availability[name] = curr_avail pref_dict[name] = df.loc[row, NUM_WORKSHOPS_COL] # Sorts a dictionary by length of the values such that the # key associated with the shortest value is first in the list {orders} order = sorted(availability, key=lambda k: len(availability[k])) # The idea is start with people who are the LEAST available to teach, # then put the more available instructors into the available slots new_avail_dict = {} for instructor in order: new_avail_dict[instructor] = availability[instructor] # Sorts the dict such that people who want to teach less are first in the dict pref_dict = {k: v for k, v in sorted(pref_dict.items(), key=lambda item: item[1])} people = [] for name,number in pref_dict.items(): if number == 1: people.append(name) # Add people who are teaching multiple workshops to the list more than once else: for i in range(number): people.append(name) return {'people': people, 'availability': new_avail_dict, 'completely_available': completely_available} # Makes a dictionary where each key is a timeslot and each value is a list. # If there's no partial schedule, each list will be empty. # If there's a partial schedule, each list will include the people teaching during that slot. def initialize_timeslots(df) -> dict: all_timeslots = set() availability = df[AVAIL_COL] for elem in availability: curr_list = elem.split(DELIMITER) for inner in curr_list: all_timeslots.add(inner.strip()) to_return = {} for slot in all_timeslots: to_return[slot] = [] return to_return # Recursive function that generates all possible schedules def find_all_schedules(people: list, availability: dict, schedule_obj: Schedule, capacity: int, schedules: list, max_timeslots_list: list, max_workshops_list: list) -> None: if schedule_obj.num_timeslots_filled > max_timeslots_list[0] or schedule_obj.num_timeslots_filled == max_timeslots_list[0]: schedules.append(copy.deepcopy(schedule_obj)) max_timeslots_list[0] = schedule_obj.num_timeslots_filled # Keep track of total number of workshops taught if schedule_obj.total_num_workshops > max_workshops_list[0] or schedule_obj.total_num_workshops == max_workshops_list[0]: max_workshops_list[0] = schedule_obj.total_num_workshops # Base case if len(people) == 0: return # Recursive cases person = people[0] for time in availability[person]: if can_teach(person, schedule_obj.timeslots[time], capacity): # Choose (put that person in that timeslot) schedule_obj.add(person, time) # Explore (assign everyone else to timeslots based on that decision) if len(people) == 1: find_all_schedules([], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) else: find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) # Unchoose (remove that person from the timeslot) schedule_obj.remove(person, time) # NOTE: this will not generate a full timeslot, but could still lead to a good schedule else: if len(people) == 1: find_all_schedules([], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) else: find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) return # Puts the schedule in the correct order def my_sort(curr_sched: dict, og_slots: list): # example {'4 pm': ['logan', 'andrew'], '1 pm': ['graham', 'joyce'], '3 pm': ['logan', 'dan'], '2 pm': ['graham', 'dan']} to_return = {} for elem in og_slots: if elem in curr_sched: to_return[elem] = curr_sched[elem] else: to_return[elem] = [] return to_return # Makes an organized DataFrame given a list of schedules def make_df(schedules: list, descrip_dict: dict, og_slots: list): all_times = [] all_instructors = [] count = 1 for i in range (len(schedules)): curr_sched = schedules[i] #sorted_dict = dict(sorted(curr_sched.items(), key=lambda item: item[0])) sorted_dict = my_sort(curr_sched, og_slots) curr_times = sorted_dict.keys() curr_instructors = sorted_dict.values() # Include an empty row between schedules if count != 1: all_times.append("") all_instructors.append("") if len(schedules) > 1 or len(schedules) == 1: all_times.append(f"Schedule #{count}") all_instructors.append("") count += 1 for slot in curr_times: all_times.append(slot) for instructors in curr_instructors: if len(descrip_dict) == 0: all_instructors.append("; ". join(instructors)) if len(descrip_dict) > 0: big_str = "" for person in instructors: if person in descrip_dict: descrip = descrip_dict[person] else: descrip = "Workshop" # {descrip} is a list bc they want to teach multiple workshops if '\n' in descrip: new_str = f"\n\n- {person}:\n{descrip}" else: new_str = f"\n\n- {person}: {descrip}" big_str += new_str all_instructors.append(big_str.strip()) if len(curr_instructors) == 0: all_instructors.append('N/A') new_df = pd.DataFrame({ "Schedule": all_times, "Instructor(s)": all_instructors }) new_df['Instructor(s)'] = new_df['Instructor(s)'].astype(str) return new_df, count - 1 # Makes a dictionary where each key is the instructor's name and # the value is the workshop(s) they're teaching def get_description_dict(df): new_dict = {} for row in range(len(df)): name = df.loc[row, NAME_COL] new_dict[name] = df.loc[row, DESCRIP_COL] return new_dict # Classifies schedules into two categories: complete and incomplete: # Complete = everyone is teaching desired number of timeslots and each timeslot has at least one workshop # NOTE: I'm using "valid" instead of "complete" as a variable name so that I don't mix it up # Incomplete = not complete def classify_schedules(people: list, schedules: list, partial_names: list, total_timeslots: int, max_timeslots_filled: int) -> tuple: valid_schedules = [] # Key: score # Value: schedules with that score incomplete_schedules = {} # Get frequency of items in the list # Key: person # Value: number of workshops they WANT to teach pref_dict = Counter(people) pref_dict.update(Counter(partial_names)) all_names = pref_dict.keys() ## Evaluate each schedule ## overall_max = 0 # changes throughout the function for sched in schedules: if sched.num_timeslots_filled != max_timeslots_filled: continue # Key: person # Value: how many workshops they're ACTUALLY teaching in this schedule freq_dict = {} for name in all_names: freq_dict[name] = 0 for timeslot, instructor_list in sched.timeslots.items(): for instructor in instructor_list: if instructor in freq_dict: freq_dict[instructor] += 1 else: print("there is a serious issue!!!!") # See if everyone is teaching their desired number of workshops everyone_is_teaching = True for teacher, freq in freq_dict.items(): if freq != pref_dict[teacher]: #print(f"teacher: {teacher}. preference: {pref_dict[teacher]}. actual frequency: {freq}") everyone_is_teaching = False break filled_all_timeslots = (sched.num_timeslots_filled == total_timeslots) if everyone_is_teaching and filled_all_timeslots: valid_schedules.append(sched) else: # No need to add to incomplete_schedules if there's at least one valid schedule if len(valid_schedules) > 0: continue #print(f"teaching desired number of timeslots: {everyone_is_teaching}. At least one workshop per slot: {filled_all_timeslots}.\n{sched}\n") if sched.num_timeslots_filled > overall_max or sched.num_timeslots_filled == overall_max: overall_max = sched.num_timeslots_filled if sched.num_timeslots_filled not in incomplete_schedules: incomplete_schedules[sched.num_timeslots_filled] = [] incomplete_schedules[sched.num_timeslots_filled].append(sched) if len(valid_schedules) > 0: return valid_schedules, [] else: return [], incomplete_schedules[overall_max] # Parameters: schedules that have the max number of timeslots filled # Max number of workshops taught in filled timeslots # Returns: a list of all schedules that have the max number of workshops # To make it less overwhelming, it will return {cutoff} randomly def get_best_schedules(schedules: list, cutoff: str, max_workshops: int) -> list: cutoff = int(cutoff) seen = [] best_schedules = [] for sched in schedules: if sched.total_num_workshops != max_workshops: continue if sched in seen: continue else: seen.append(sched) best_schedules.append(sched.timeslots) if cutoff == -1: return best_schedules else: if len(best_schedules) > cutoff: # Sample without replacement return random.sample(best_schedules, cutoff) else: return best_schedules # Big wrapper function that calls the other functions def main(df, capacity:int, num_results: int, og_slots: list): descrip_dict = get_description_dict(df) partial_names = [] timeslots = initialize_timeslots(df) total_timeslots = len(timeslots) print(total_timeslots) schedules = [] schedule_obj = Schedule(timeslots) # Convert the df with everyone's availability to a usable format res = convert_df(df, total_timeslots) people = res['people'] availability = res['availability'] completely_available = res['completely_available'] print(', '.join(people)) print(availability) print(f"These instructors are completely avaialable: {', '.join(completely_available)}") # Get the bare minimum of workshops that will be taught distinct_slots = set() for slots in availability.values(): for elem in slots: distinct_slots.add(elem) num_distinct_slots = len(distinct_slots) print(num_distinct_slots) max_timeslots_list = [num_distinct_slots] max_workshops_list = [num_distinct_slots] find_all_schedules(people, availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) res = classify_schedules(people, schedules, partial_names, total_timeslots, max_timeslots_list[0]) valid_schedules = res[0] decent_schedules = res[1] # Return schedules if len(valid_schedules) > 0: best_schedules = get_best_schedules(valid_schedules, num_results, max_workshops_list[0]) res = make_df(best_schedules, descrip_dict, og_slots) new_df = res[0] count = res[1] if count == 1: results = "Good news! I was able to make a complete schedule." else: results = "Good news! I was able to make multiple complete schedules." else: best_schedules = get_best_schedules(decent_schedules, num_results, max_workshops_list[0]) res = make_df(best_schedules, descrip_dict, og_slots) new_df = res[0] count = res[1] beginning = "Here" if count == 1: results = f"{beginning} is the best option." else: results = f"{beginning} are the best options." directory = os.path.abspath(os.getcwd()) path = directory + "/schedule.csv" new_df.to_csv(path, index=False) return results, new_df, path ##### ALL THE NEW STUFF WITH SUPABASE ETC. ##### ### CONSTANTS ### NAME_COL = 'Juggler_Name' NUM_WORKSHOPS_COL = 'Num_Workshops' AVAIL_COL = 'Availability' DESCRIP_COL = 'Workshop_Descriptions' EMAIL_COL = 'Email' DELIMITER = ';' ALERT_TIME = None # leave warnings on screen indefinitely FORM_NOT_FOUND = 'Form not found' INCORRECT_PASSWORD = "The password is incorrect. Please check the password and try again. If you don't remember your password, please email jugglinggym@gmail.com." NUM_ROWS = 1 NUM_COLS_SCHEDULES = 2 NUM_COLS_ALL_RESPONSES = 4 NUM_RESULTS = 10 # randomly get {NUM_RESULTS} results theme = gr.themes.Soft( primary_hue="cyan", secondary_hue="pink", font=[gr.themes.GoogleFont('sans-serif'), 'ui-sans-serif', 'system-ui', 'Montserrat'], ) ### Connect to Supabase ### # URL = os.environ['URL'] # TODO URL = 'https://ubngctgvhjgxkvimdmri.supabase.co' #API_KEY = os.environ['API_KEY'] API_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InVibmdjdGd2aGpneGt2aW1kbXJpIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MzQ5MjAwOTQsImV4cCI6MjA1MDQ5NjA5NH0.NtGdfP8GYNuYdPdsaLW5GjgfB0_7Q1kNBIDJtPhO8nY' client = supabase.create_client(URL, API_KEY) ### DEFINE FUNCTIONS ### ## Multi-purpose function ## ''' Returns a lowercased and stripped version of the schedule name. Returns: str ''' def standardize(schedule_name: str): return schedule_name.lower().strip() ## Functions to manage/generate schedules ## ''' Uses the name and password to get the form. Makes the buttons and other elements visible on the page. Returns: gr.Button: corresponds to find_form_btn gr.Column: corresponds to all_responses_group gr.Column: generate_schedules_explanation gr.Row: corresponds to generate_btns gr.Column: corresponds to open_close_btn_col gr.Button: corresponds to open_close_btn ''' def make_visible(schedule_name:str, password: str): skip_output = gr.Button(), gr.Column(), gr.Column(), gr.Row(), gr.Column(), gr.Button() if len(schedule_name) == 0: gr.Warning('Please enter the form name.', ALERT_TIME) return skip_output if len(password) == 0: gr.Warning('Please enter the password.', ALERT_TIME) return skip_output response = client.table('Forms').select('password', 'status').eq('form_name', standardize(schedule_name)).execute() data = response.data if len(data) > 0: my_dict = data[0] if password != my_dict['password']: gr.Warning(INCORRECT_PASSWORD, ALERT_TIME) return skip_output else: if my_dict['status'] == 'open': gr.Info('', ALERT_TIME, title='Btw, the form is currently OPEN.') return gr.Button(variant='secondary'), gr.Column(visible=True), gr.Column(visible=True), gr.Row(visible=True), gr.Column(visible=True), gr.Button("Close Form", visible=True) elif my_dict['status'] == 'closed': gr.Info('', ALERT_TIME, title='Btw, the form is currently CLOSED.') return gr.Button(variant='secondary'), gr.Column(visible=True), gr.Column(visible=True), gr.Row(visible=True),gr.Column(visible=True), gr.Button("Open Form", visible=True) else: gr.Warning(f"There is no form called \"{schedule_name}\". Please check the spelling and try again.", ALERT_TIME) return skip_output ''' Makes a blank schedule that we can return to prevent things from breaking. Returns: tuple with 3 elements: 0: str indicating that the form wasn't found 1: the DataFrame 2: the path to the DataFrame ''' def make_blank_schedule(): df = pd.DataFrame({ 'Schedule': [], 'Instructors': [] }) directory = os.path.abspath(os.getcwd()) path = directory + "/schedule.csv" df.to_csv(path, index=False) return FORM_NOT_FOUND, df, path ''' Gets a the form responses from Supabase and converts them to a DataFrame Returns: if found: a dictionary with three keys: capacity (int), df (DataFrame), and slots (list) if not found: a string indicating the form was not found ''' def get_df_from_db(schedule_name: str, password: str): response = client.table('Forms').select('password', 'capacity', 'responses', 'slots').eq('form_name', standardize(schedule_name)).execute() data = response.data if len(data) > 0: my_dict = data[0] if password != my_dict['password']: gr.Warning(INCORRECT_PASSWORD, ALERT_TIME) return FORM_NOT_FOUND # Convert to df df = pd.DataFrame(json.loads(my_dict['responses'])) return {'capacity': my_dict['capacity'], 'df': df, 'slots': my_dict['slots']} else: gr.Warning(f"There is no form called \"{schedule_name}\". Please check the spelling and try again.", ALERT_TIME) return FORM_NOT_FOUND ''' Puts all of the form responses into a DataFrame. Returns this DF along with the filepath. ''' def get_all_responses(schedule_name:str, password:str): res = get_df_from_db(schedule_name, password) if res == FORM_NOT_FOUND: df = pd.DataFrame({ NAME_COL: [], EMAIL_COL: [], NUM_WORKSHOPS_COL: [], AVAIL_COL: [], DESCRIP_COL: [] }) else: df = res['df'] df[AVAIL_COL] = [elem.replace(DELIMITER, f"{DELIMITER} ") for elem in df[AVAIL_COL].to_list()] directory = os.path.abspath(os.getcwd()) path = directory + "/all responses.csv" df.to_csv(path, index=False) if len(df) == 0: gr.Warning('', ALERT_TIME, title='No one has filled out the form yet.') return gr.DataFrame(df, visible=True), gr.File(path, visible=True) ''' Calls the algorithm to generate the best possible schedules, and returns a random subset of the results. (The same as generate_schedules_wrapper_all_results, except that this function only returns a subset of them. I had to make it into two separate functions in order to work with Gradio). Returns: DataFrame Filepath to DF (str) ''' def generate_schedules_wrapper_subset_results(schedule_name: str, password: str): res = get_df_from_db(schedule_name, password) # Return blank schedule (should be impossible to get to this condition btw) if res == FORM_NOT_FOUND: to_return = make_blank_schedule() gr.Warning(FORM_NOT_FOUND, ALERT_TIME) else: df = res['df'] if len(df) == 0: gr.Warning('', ALERT_TIME, title='No one has filled out the form yet.') to_return = make_blank_schedule() else: gr.Info('', ALERT_TIME, title='Working on generating schedules! Please DO NOT click anything on this page.') to_return = main(df, res['capacity'], NUM_RESULTS, res['slots']) gr.Info('', ALERT_TIME, title=to_return[0]) return gr.Textbox(to_return[0]), gr.DataFrame(to_return[1], visible=True), gr.File(to_return[2], visible=True) ''' Calls the algorithm to generate the best possible schedules, and returns ALL of the results. (The same as generate_schedules_wrapper_subset_results, except that this function returns all of them. I had to make it into two separate functions in order to work with Gradio). Returns: DataFrame Filepath to DF (str) ''' def generate_schedules_wrapper_all_results(schedule_name: str, password: str): res = get_df_from_db(schedule_name, password) # Return blank schedule (should be impossible to get to this condition btw) if res == FORM_NOT_FOUND: to_return = make_blank_schedule() gr.Warning(FORM_NOT_FOUND, ALERT_TIME) else: df = res['df'] if len(df) == 0: gr.Warning('', ALERT_TIME, title='No one has filled out the form yet.') to_return = make_blank_schedule() else: gr.Info('', ALERT_TIME, title='Working on generating schedules! Please DO NOT click anything on this page.') placeholder = -1 to_return = main(df, res['capacity'], placeholder, res['slots']) gr.Info('', ALERT_TIME, title=to_return[0]) return gr.Textbox(to_return[0]), gr.DataFrame(to_return[1], visible=True), gr.File(to_return[2], visible=True) ''' Opens/closes a form and changes the button after opening/closing the form. Returns: gr.Button ''' def toggle_btn(schedule_name:str, password:str): response = client.table('Forms').select('password', 'capacity', 'status').eq('form_name', standardize(schedule_name)).execute() data = response.data if len(data) > 0: my_dict = data[0] if password != my_dict['password']: gr.Warning(INCORRECT_PASSWORD, ALERT_TIME) return FORM_NOT_FOUND curr_status = my_dict['status'] if curr_status == 'open': client.table('Forms').update({'status': 'closed'}).eq('form_name', standardize(schedule_name)).execute() gr.Info('', ALERT_TIME, title="The form was closed successfully!") return gr.Button('Open Form') elif curr_status == 'closed': client.table('Forms').update({'status': 'open'}).eq('form_name', standardize(schedule_name)).execute() gr.Info('', ALERT_TIME, title="The form was opened successfully!") return gr.Button('Close Form') else: gr.Error('', ALERT_TIME, 'An unexpected error has ocurred.') return gr.Button() else: gr.Warning('', ALERT_TIME, title=f"There was no form called \"{schedule_name}\". Please check the spelling and try again.") return gr.Button() ### GRADIO ### with gr.Blocks() as demo: ### VIEW FORM RESULTS ### with gr.Tab('View Form Results'): with gr.Column() as btn_group: schedule_name = gr.Textbox(label="Form Name") password = gr.Textbox(label="Password") find_form_btn = gr.Button('Find Form', variant='primary') # 1. Get all responses with gr.Column(visible=False) as all_responses_col: gr.Markdown('# Download All Form Responses') gr.Markdown("Download everyone's responses to the form.") all_responses_btn = gr.Button('Download All Form Responses', variant='primary') with gr.Row() as all_responses_output_row: df_out = gr.DataFrame(row_count = (NUM_ROWS, "dynamic"),col_count = (NUM_COLS_ALL_RESPONSES, "dynamic"),headers=[NAME_COL, NUM_WORKSHOPS_COL, AVAIL_COL, DESCRIP_COL],wrap=True,scale=4,visible=False) file_out = gr.File(label = "Downloadable file", scale=1, visible=False) all_responses_btn.click(fn=get_all_responses, inputs=[schedule_name, password], outputs=[df_out, file_out]) # 2. Generate schedules with gr.Column(visible=False) as generate_schedules_explanation_col: gr.Markdown('# Create Schedules based on Everyone\'s Preferences.') with gr.Row(visible=False) as generate_btns_row: generate_ten_results_btn = gr.Button('Generate a Subset of Schedules', variant='primary', visible=True) generate_all_results_btn = gr.Button('Generate All Possible Schedules', visible=True) with gr.Row(visible=True) as generated_schedules_output: text_out = gr.Textbox(label='Results') generated_df_out = gr.DataFrame(row_count = (NUM_ROWS, "dynamic"),col_count = (NUM_COLS_SCHEDULES, "dynamic"),headers=["Schedule", "Instructors"],wrap=True,scale=3, visible=False) generated_file_out = gr.File(label = "Downloadable schedule file", scale=1, visible=False) generate_ten_results_btn.click(fn=generate_schedules_wrapper_subset_results, inputs=[schedule_name, password], outputs=[text_out, generated_df_out, generated_file_out], api_name='generate_random_schedules') generate_all_results_btn.click(fn=generate_schedules_wrapper_all_results, inputs=[schedule_name, password], outputs=[text_out, generated_df_out, generated_file_out], api_name='generate_all_schedules') # 3. Open/close button with gr.Column(visible=False) as open_close_btn_col: gr.Markdown('# Open or Close Form') open_close_btn = gr.Button(variant='primary') open_close_btn.click(fn=toggle_btn, inputs=[schedule_name, password], outputs=[open_close_btn]) find_form_btn.click(fn=make_visible, inputs=[schedule_name, password], outputs=[find_form_btn, all_responses_col, generate_schedules_explanation_col, generate_btns_row, open_close_btn_col, open_close_btn]) directory = os.path.abspath(os.getcwd()) allowed = directory #+ "/schedules" demo.launch(allowed_paths=[allowed], show_error=True)