prthm11 commited on
Commit
3134223
·
verified ·
1 Parent(s): 80dc18c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -65
app.py CHANGED
@@ -2096,39 +2096,38 @@ def upscale_image(image: Image.Image, scale: int = 2) -> Image.Image:
2096
  except Exception as e:
2097
  logger.error(f"❌ Error during image upscaling: {str(e)}")
2098
  return image
2099
-
2100
- def create_sb3_archive(project_folder, project_id):
2101
  """
2102
  Zips the project folder and renames it to an .sb3 file.
2103
-
2104
  Args:
2105
- project_folder (str): The path to the directory containing the project.json and assets.
2106
  project_id (str): The unique ID for the project, used for naming the .sb3 file.
2107
-
2108
  Returns:
2109
- str: The path to the created .sb3 file, or None if an error occurred.
2110
  """
2111
- # output_filename = os.path.join("outputs", project_id)
2112
- # output_filename = OUTPUT_DIR / project_id
2113
- output_filename = GEN_PROJECT_DIR / project_id
2114
  zip_path = None
2115
  sb3_path = None
2116
  try:
2117
- zip_path = shutil.make_archive(output_filename, 'zip', root_dir=project_folder)
 
 
2118
  logger.info(f"Project folder zipped to: {zip_path}")
2119
-
2120
  # 2. Rename the .zip file to .sb3
2121
- sb3_path = f"{output_filename}.sb3"
2122
  os.rename(zip_path, sb3_path)
2123
  logger.info(f"Renamed {zip_path} to {sb3_path}")
2124
 
2125
  return sb3_path
2126
  except Exception as e:
2127
- logger.error(f"Error creating SB3 archive for {project_id}: {e}")
2128
  # Clean up any partial files if an error occurs
2129
- if zip_path and os.path.exists(zip_path):
2130
  os.remove(zip_path)
2131
- if sb3_path and os.path.exists(sb3_path):
2132
  os.remove(sb3_path)
2133
  return None
2134
 
@@ -2143,28 +2142,29 @@ def download_sb3(project_id):
2143
  Allows users to download the generated .sb3 Scratch project file.
2144
  """
2145
  sb3_filename = f"{project_id}.sb3"
2146
- sb3_filepath = os.path.join("generated_projects", sb3_filename)
2147
 
2148
  try:
2149
- if os.path.exists(sb3_filepath):
2150
  logger.info(f"Serving SB3 file for project ID: {project_id}")
2151
- # send_from_directory serves the file and handles content-disposition for download
2152
  return send_from_directory(
2153
- directory="generated_projects",
2154
  path=sb3_filename,
2155
- as_attachment=True, # This makes the browser download the file
2156
- download_name=sb3_filename # This sets the filename for the download
2157
  )
2158
  else:
2159
- logger.warning(f"SB3 file not found for ID: {project_id}")
2160
  return jsonify({"error": "Scratch project file not found"}), 404
2161
  except Exception as e:
2162
- logger.error(f"Error serving SB3 file for ID {project_id}: {e}")
2163
  return jsonify({"error": "Failed to retrieve Scratch project file"}), 500
2164
 
2165
  # API endpoint
2166
  @app.route('/process_pdf', methods=['POST'])
2167
  def process_pdf():
 
 
2168
  try:
2169
  logger.info("Received request to process PDF.")
2170
  if 'pdf_file' not in request.files:
@@ -2176,72 +2176,80 @@ def process_pdf():
2176
  return jsonify({"error": "Empty filename"}), 400
2177
 
2178
  # ================================================= #
2179
- # Generate Random UUID for project folder name #
2180
  # ================================================= #
2181
  project_id = str(uuid.uuid4()).replace('-', '')
2182
- # project_folder = os.path.join("outputs", f"{project_id}")
2183
- project_folder = OUTPUT_DIR / project_id
2184
 
2185
  # =========================================================================== #
2186
- # Create empty json in project_{random_id} folder #
2187
  # =========================================================================== #
2188
- #os.makedirs(project_folder, exist_ok=True)
2189
-
 
 
2190
  # Save the uploaded PDF temporarily
2191
  filename = secure_filename(pdf_file.filename)
2192
- temp_dir = tempfile.mkdtemp()
2193
- saved_pdf_path = os.path.join(temp_dir, filename)
2194
  pdf_file.save(saved_pdf_path)
2195
 
2196
- # logger.info(f"Created project folder: {project_folder}")
2197
  logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
2198
 
2199
  # Extract & process
2200
- json_path = None
2201
- output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
 
2202
 
2203
  # Check extracted_sprites.json for "scratch block" in any 'name'
2204
- extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
2205
- extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
2206
-
2207
- if not os.path.exists(extracted_sprites_json):
 
 
 
 
 
 
 
 
 
 
 
2208
  return jsonify({"error": "No extracted_sprites.json found"}), 500
2209
 
2210
- with open(extracted_sprites_json, 'r') as f:
2211
  sprite_data = json.load(f)
2212
 
2213
- project_output = similarity_matching(output_path, project_folder)
2214
- logger.info("Received request to process PDF.")
 
2215
 
2216
  with open(project_output, 'r') as f:
2217
  project_skeleton = json.load(f)
2218
 
2219
-
2220
- images = convert_from_path(saved_pdf_path, dpi=300)
2221
- print(type)
2222
  page = images[0]
2223
- # img_base64 = base64.b64encode(images).decode("utf-8")
2224
  buf = BytesIO()
2225
  page.save(buf, format="PNG")
2226
  img_bytes = buf.getvalue()
2227
  img_b64 = base64.b64encode(img_bytes).decode("utf-8")
2228
- #image_paths = await convert_pdf_to_images_async(saved_pdf_path)
2229
 
2230
- #updating logic here [Dev Patel]
2231
- initial_state_dict = {
2232
- "project_json": project_skeleton,
2233
- "description": "The pseudo code for the script",
2234
- "project_id": project_id,
2235
- "project_image": img_b64,
2236
- "action_plan": {},
2237
- "pseudo_code": {},
2238
- "temporary_node": {},
2239
- }
2240
 
2241
- final_state_dict = app_graph.invoke(initial_state_dict) # Pass dictionary
2242
 
2243
- final_project_json = final_state_dict['project_json'] # Access as dict
2244
- # final_project_json = project_skeleton
2245
 
2246
  # Save the *final* filled project JSON, overwriting the skeleton
2247
  with open(project_output, "w") as f:
@@ -2252,17 +2260,43 @@ def process_pdf():
2252
  sb3_file_path = create_sb3_archive(project_folder, project_id)
2253
  if sb3_file_path:
2254
  logger.info(f"Successfully created SB3 file: {sb3_file_path}")
2255
- # Instead of returning the local path, return a URL to the download endpoint
2256
- download_url = f"https://prthm11-scratch-vlm-v1.hf.space/download_sb3/{project_id}"
2257
  print(f"DOWNLOAD_URL: {download_url}")
2258
- return jsonify({"message": "Procesed PDF and Game sb3 generated successfully", "project_id": project_id, "download_url": download_url})
2259
  else:
2260
  return jsonify(error="Failed to create SB3 archive"), 500
2261
 
2262
  except Exception as e:
2263
  logger.error(f"Error during processing the pdf workflow for project ID {project_id}: {e}", exc_info=True)
2264
- return jsonify({"error": f"❌ Failed to process PDF: {str(e)}"}), 500
2265
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2266
  if __name__ == '__main__':
2267
- # os.makedirs("outputs", exist_ok=True) #== commented by P
2268
  app.run(host='0.0.0.0', port=7860, debug=True)
 
2096
  except Exception as e:
2097
  logger.error(f"❌ Error during image upscaling: {str(e)}")
2098
  return image
2099
+
2100
+ def create_sb3_archive(project_folder: Path, project_id: str) -> Path | None:
2101
  """
2102
  Zips the project folder and renames it to an .sb3 file.
 
2103
  Args:
2104
+ project_folder (Path): The path to the directory containing the project.json and assets.
2105
  project_id (str): The unique ID for the project, used for naming the .sb3 file.
 
2106
  Returns:
2107
+ Path: The path to the created .sb3 file, or None if an error occurred.
2108
  """
2109
+ # Use Path objects for consistency
2110
+ output_base_name = GEN_PROJECT_DIR / project_id
 
2111
  zip_path = None
2112
  sb3_path = None
2113
  try:
2114
+ # shutil.make_archive automatically adds .zip extension
2115
+ zip_path_str = shutil.make_archive(str(output_base_name), 'zip', root_dir=str(project_folder))
2116
+ zip_path = Path(zip_path_str) # Convert back to Path object
2117
  logger.info(f"Project folder zipped to: {zip_path}")
2118
+
2119
  # 2. Rename the .zip file to .sb3
2120
+ sb3_path = GEN_PROJECT_DIR / f"{project_id}.sb3"
2121
  os.rename(zip_path, sb3_path)
2122
  logger.info(f"Renamed {zip_path} to {sb3_path}")
2123
 
2124
  return sb3_path
2125
  except Exception as e:
2126
+ logger.error(f"Error creating SB3 archive for {project_id}: {e}", exc_info=True)
2127
  # Clean up any partial files if an error occurs
2128
+ if zip_path and zip_path.exists():
2129
  os.remove(zip_path)
2130
+ if sb3_path and sb3_path.exists():
2131
  os.remove(sb3_path)
2132
  return None
2133
 
 
2142
  Allows users to download the generated .sb3 Scratch project file.
2143
  """
2144
  sb3_filename = f"{project_id}.sb3"
2145
+ sb3_filepath = GEN_PROJECT_DIR / sb3_filename # Use Path object consistent with creation
2146
 
2147
  try:
2148
+ if sb3_filepath.exists():
2149
  logger.info(f"Serving SB3 file for project ID: {project_id}")
 
2150
  return send_from_directory(
2151
+ directory=GEN_PROJECT_DIR, # Pass the Path object here
2152
  path=sb3_filename,
2153
+ as_attachment=True,
2154
+ download_name=sb3_filename
2155
  )
2156
  else:
2157
+ logger.warning(f"SB3 file not found for ID: {project_id} at {sb3_filepath}")
2158
  return jsonify({"error": "Scratch project file not found"}), 404
2159
  except Exception as e:
2160
+ logger.error(f"Error serving SB3 file for ID {project_id}: {e}", exc_info=True)
2161
  return jsonify({"error": "Failed to retrieve Scratch project file"}), 500
2162
 
2163
  # API endpoint
2164
  @app.route('/process_pdf', methods=['POST'])
2165
  def process_pdf():
2166
+ project_id = None # Initialize project_id outside try-block for error logging
2167
+ project_folder = None
2168
  try:
2169
  logger.info("Received request to process PDF.")
2170
  if 'pdf_file' not in request.files:
 
2176
  return jsonify({"error": "Empty filename"}), 400
2177
 
2178
  # ================================================= #
2179
+ # Generate Random UUID for project folder name #
2180
  # ================================================= #
2181
  project_id = str(uuid.uuid4()).replace('-', '')
2182
+ project_folder = OUTPUT_DIR / project_id # Use Path object
 
2183
 
2184
  # =========================================================================== #
2185
+ # Create empty json in project_{random_id} folder #
2186
  # =========================================================================== #
2187
+ # THIS WAS COMMENTED OUT - CRITICAL FIX!
2188
+ project_folder.mkdir(parents=True, exist_ok=True)
2189
+ logger.info(f"Created project folder: {project_folder}")
2190
+
2191
  # Save the uploaded PDF temporarily
2192
  filename = secure_filename(pdf_file.filename)
2193
+ temp_dir = Path(tempfile.mkdtemp()) # Use Path for temp dir
2194
+ saved_pdf_path = temp_dir / filename
2195
  pdf_file.save(saved_pdf_path)
2196
 
 
2197
  logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
2198
 
2199
  # Extract & process
2200
+ # Ensure extract_images_from_pdf can handle Path objects or convert before passing
2201
+ json_path = None # As per original code, json_path is None
2202
+ extracted_output_dir, result = extract_images_from_pdf(saved_pdf_path, json_path)
2203
 
2204
  # Check extracted_sprites.json for "scratch block" in any 'name'
2205
+ # Ensure JSON_DIR is a Path object or use os.path.join consistently
2206
+ extracted_sprites_json = JSON_DIR / Path(filename).stem / "extracted_sprites.json"
2207
+
2208
+ # NOTE: The original `extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])`
2209
+ # and `extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")`
2210
+ # implies `extract_images_from_pdf` puts stuff in JSON_DIR.
2211
+ # Ensure `extract_images_from_pdf` actually creates this path.
2212
+ # For this example, I'm assuming `extracted_output_dir` (from `extract_images_from_pdf`)
2213
+ # contains the `extracted_sprites.json`. Adjust based on your `extract_images_from_pdf`
2214
+ # implementation. Let's use `extracted_output_dir` if it's correct.
2215
+ extracted_sprites_json_path = extracted_output_dir / "extracted_sprites.json"
2216
+
2217
+
2218
+ if not extracted_sprites_json_path.exists():
2219
+ logger.error(f"No extracted_sprites.json found at {extracted_sprites_json_path}")
2220
  return jsonify({"error": "No extracted_sprites.json found"}), 500
2221
 
2222
+ with open(extracted_sprites_json_path, 'r') as f:
2223
  sprite_data = json.load(f)
2224
 
2225
+ # similarity_matching should return the path to the project.json within project_folder
2226
+ project_output = similarity_matching(extracted_output_dir, project_folder)
2227
+ logger.info("Similarity matching completed.") # Removed duplicate "Received request to process PDF."
2228
 
2229
  with open(project_output, 'r') as f:
2230
  project_skeleton = json.load(f)
2231
 
2232
+ images = convert_from_path(saved_pdf_path, dpi=300)
2233
+ # print(type) # This `print(type)` line seems like a leftover debug statement and will print the `type` built-in function.
 
2234
  page = images[0]
 
2235
  buf = BytesIO()
2236
  page.save(buf, format="PNG")
2237
  img_bytes = buf.getvalue()
2238
  img_b64 = base64.b64encode(img_bytes).decode("utf-8")
 
2239
 
2240
+ # initial_state_dict = {
2241
+ # "project_json": project_skeleton,
2242
+ # "description": "The pseudo code for the script",
2243
+ # "project_id": project_id,
2244
+ # "project_image": img_b64,
2245
+ # "action_plan": {},
2246
+ # "pseudo_code": {},
2247
+ # "temporary_node": {},
2248
+ # }
 
2249
 
2250
+ # final_state_dict = app_graph.invoke(initial_state_dict)
2251
 
2252
+ final_project_json = project_skeleton#final_state_dict['project_json']
 
2253
 
2254
  # Save the *final* filled project JSON, overwriting the skeleton
2255
  with open(project_output, "w") as f:
 
2260
  sb3_file_path = create_sb3_archive(project_folder, project_id)
2261
  if sb3_file_path:
2262
  logger.info(f"Successfully created SB3 file: {sb3_file_path}")
2263
+ download_url = f"/download_sb3/{project_id}" # Use relative path for internal Flask app
 
2264
  print(f"DOWNLOAD_URL: {download_url}")
2265
+ return jsonify({"message": "Processed PDF and Game sb3 generated successfully", "project_id": project_id, "download_url": download_url})
2266
  else:
2267
  return jsonify(error="Failed to create SB3 archive"), 500
2268
 
2269
  except Exception as e:
2270
  logger.error(f"Error during processing the pdf workflow for project ID {project_id}: {e}", exc_info=True)
2271
+ return jsonify({"error": f"❌ Failed to process PDF: {str(e)}"}), 500
2272
+ finally:
2273
+ # Clean up temporary PDF directory
2274
+ if 'temp_dir' in locals() and temp_dir.exists():
2275
+ shutil.rmtree(temp_dir)
2276
+ logger.info(f"Cleaned up temporary directory: {temp_dir}")
2277
+ # Optionally, clean up the main project_folder if an error occurred before SB3 creation
2278
+ # (Be careful with this if you want to inspect failed project folders)
2279
+ # if project_folder and project_folder.exists() and sb3_file_path is None:
2280
+ # shutil.rmtree(project_folder)
2281
+ # logger.info(f"Cleaned up partial project folder: {project_folder}")
2282
+
2283
+ @app.route('/list_projects', methods=['GET'])
2284
+ def list_projects():
2285
+ """
2286
+ Lists all generated .sb3 files and their download URLs for debugging.
2287
+ """
2288
+ try:
2289
+ sb3_files = []
2290
+ for file_path in GEN_PROJECT_DIR.iterdir():
2291
+ if file_path.suffix == '.sb3' and file_path.is_file():
2292
+ project_id = file_path.stem
2293
+ download_url = f"/download_sb3/{project_id}"
2294
+ sb3_files.append({"project_id": project_id, "download_url": download_url, "filename": file_path.name})
2295
+
2296
+ return jsonify({"generated_sb3_files": sb3_files}), 200
2297
+ except Exception as e:
2298
+ logger.error(f"Error listing projects: {e}", exc_info=True)
2299
+ return jsonify({"error": "Failed to list generated projects"}), 500
2300
+
2301
  if __name__ == '__main__':
 
2302
  app.run(host='0.0.0.0', port=7860, debug=True)