katsukiai commited on
Commit
630bd17
·
verified ·
1 Parent(s): 7af91bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -55
app.py CHANGED
@@ -3,95 +3,115 @@ import logging
3
  import os
4
  import datetime
5
  import gradio as gr
6
- from huggingface_hub import HfApi, HfFolder
7
- # Set up logging
8
- logging.basicConfig(level=logging.INFO)
9
 
 
 
10
  logger = logging.getLogger(__name__)
11
 
12
- # Define the function to convert text to JSON
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def text_to_json(text):
14
- lines = text.strip().split('\n')
15
  data = [{"text": line} for line in lines]
16
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
17
  filename = f"output_{timestamp}.json"
18
- with open(filename, "a") as f:
 
19
  json.dump(data, f, indent=4)
 
 
20
  return filename
21
 
22
- # Define the function to generate and upload the JSON file
23
  def generate_and_upload(text):
24
  try:
25
- if not text:
26
  raise ValueError("Text input is empty.")
27
 
28
  logger.info(f"Received text input: {text}")
29
-
30
- # Convert text to JSON and save to file
31
- json_file = text_to_json(text)
32
- logger.info(f"JSON file created: {json_file}")
33
-
34
- # Authenticate with Hugging Face Hub
35
- api = HfApi()
36
- token = os.environ['HUGGINGFACE_API_TOKEN']
37
- if token is None:
 
 
38
  raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN environment variable.")
39
-
40
- # Upload the file to the dataset repository
 
41
  repo_id = "katsukiai/DeepFocus-X3"
42
  upload_info = api.upload_file(
43
  path_or_fileobj=json_file,
44
- path_in_repo="convert/"+os.path.basename(json_file),
45
  repo_id=repo_id,
46
  repo_type="dataset",
47
  token=token
48
  )
49
- logger.info(f"Upload info: {upload_info}")
50
- message = f"Upload successful! Filename: {os.path.basename(json_file)}"
51
- return message, json_file
52
- except Exception as e:
53
- logger.error(f"Error uploading file: {e}")
54
- return f"Error: {e}", None
55
 
 
 
 
56
 
 
57
 
 
 
 
58
 
59
- # Create the Gradio interface
60
  with gr.Blocks() as demo:
61
  with gr.Tab("About"):
62
  gr.Markdown("""
63
- # Text to JSON uploader
64
- This app allows you to input text, convert it to JSON format, and upload it to the Hugging Face dataset repository.
65
-
66
- ## Instructions
67
- 1. Enter your text in the "Generate" tab.
68
- 2. Click the "Generate and Upload" button.
69
- 3. Download the JSON file if desired.
70
- 4. Check the message for upload status.
71
-
72
- ## Requirements
73
- - Hugging Face API token set as environment variable `HUGGINGFACE_API_TOKEN`.
74
-
75
- ## Obtaining Hugging Face API Token
76
- 1. Log in to your Hugging Face account.
77
- 2. Go to your profile settings.
78
- 3. Generate a new token or use an existing one.
79
- 4. Set the token as an environment variable named `HUGGINGFACE_API_TOKEN`.
80
-
81
- ## Setting Environment Variable
82
- - **Windows**: Set it in System Properties > Advanced > Environment Variables.
83
- - **macOS/Linux**: Add `export HUGGINGFACE_API_TOKEN=your_token` to your shell profile (e.g., `.bashrc`, `.zshrc`).
84
  """)
85
-
86
  with gr.Tab("Generate"):
87
  text_input = gr.Textbox(label="Enter text")
88
  output_message = gr.Textbox(label="Status message")
89
- json_file_downloader = gr.File(label="Download JSON", interactive=False)
90
  generate_button = gr.Button("Generate and Upload")
91
- generate_button.click(fn=generate_and_upload, inputs=text_input, outputs=[output_message, json_file_downloader])
92
-
93
- # Launch the Gradio app
94
- demo.launch()
95
-
96
 
 
 
 
 
 
97
 
 
 
 
3
  import os
4
  import datetime
5
  import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
+ from huggingface_hub import HfApi
9
 
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
12
  logger = logging.getLogger(__name__)
13
 
14
+ # Load DeepSeek-V3 model and tokenizer for CPU
15
+ MODEL_NAME = "deepseek-ai/deepseek-v3"
16
+ logger.info(f"Loading model: {MODEL_NAME} (CPU mode)")
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32, device_map="cpu")
19
+
20
+ # Function to process text with DeepSeek-V3
21
+ def process_text_with_model(text):
22
+ logger.info("Processing text with DeepSeek-V3 model (CPU)...")
23
+ inputs = tokenizer(text, return_tensors="pt").to("cpu") # Ensures CPU usage
24
+ outputs = model.generate(**inputs, max_length=200)
25
+ processed_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
+ return processed_text
27
+
28
+ # Function to convert text to JSON
29
  def text_to_json(text):
30
+ lines = text.strip().split("\n")
31
  data = [{"text": line} for line in lines]
32
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
33
  filename = f"output_{timestamp}.json"
34
+
35
+ with open(filename, "w") as f:
36
  json.dump(data, f, indent=4)
37
+
38
+ logger.info(f"JSON file created: {filename}")
39
  return filename
40
 
41
+ # Function to generate JSON and upload to Hugging Face
42
  def generate_and_upload(text):
43
  try:
44
+ if not text.strip():
45
  raise ValueError("Text input is empty.")
46
 
47
  logger.info(f"Received text input: {text}")
48
+
49
+ # Process text with DeepSeek-V3
50
+ processed_text = process_text_with_model(text)
51
+ logger.info(f"Processed text: {processed_text}")
52
+
53
+ # Convert processed text to JSON
54
+ json_file = text_to_json(processed_text)
55
+
56
+ # Get Hugging Face API token
57
+ token = os.getenv("HUGGINGFACE_API_TOKEN")
58
+ if not token:
59
  raise ValueError("Hugging Face API token not found. Please set HUGGINGFACE_API_TOKEN environment variable.")
60
+
61
+ # Upload file to Hugging Face
62
+ api = HfApi()
63
  repo_id = "katsukiai/DeepFocus-X3"
64
  upload_info = api.upload_file(
65
  path_or_fileobj=json_file,
66
+ path_in_repo=f"convert/{os.path.basename(json_file)}",
67
  repo_id=repo_id,
68
  repo_type="dataset",
69
  token=token
70
  )
71
+
72
+ logger.info(f"File uploaded successfully: {upload_info}")
 
 
 
 
73
 
74
+ # Delete local JSON file after upload
75
+ os.remove(json_file)
76
+ logger.info(f"Deleted local file: {json_file}")
77
 
78
+ return f"Upload successful! Filename: {os.path.basename(json_file)}", None
79
 
80
+ except Exception as e:
81
+ logger.error(f"Error: {e}")
82
+ return f"Error: {str(e)}", None
83
 
84
+ # Create Gradio UI
85
  with gr.Blocks() as demo:
86
  with gr.Tab("About"):
87
  gr.Markdown("""
88
+ # Text Processor with DeepSeek-V3 (CPU)
89
+ - Processes text with DeepSeek-V3 Transformer
90
+ - Converts output to JSON
91
+ - Uploads to Hugging Face
92
+
93
+ ## Instructions:
94
+ 1. Enter text in the "Generate" tab.
95
+ 2. Click "Generate and Upload."
96
+ 3. Download JSON if needed.
97
+ 4. Check upload status.
98
+
99
+ ## Requirements:
100
+ - **Runs on CPU** (No GPU required).
101
+ - **Hugging Face API Token** (`HUGGINGFACE_API_TOKEN`) must be set.
 
 
 
 
 
 
 
102
  """)
103
+
104
  with gr.Tab("Generate"):
105
  text_input = gr.Textbox(label="Enter text")
106
  output_message = gr.Textbox(label="Status message")
107
+ json_file_downloader = gr.File(label="Download JSON", interactive=True)
108
  generate_button = gr.Button("Generate and Upload")
 
 
 
 
 
109
 
110
+ generate_button.click(
111
+ fn=generate_and_upload,
112
+ inputs=text_input,
113
+ outputs=[output_message, json_file_downloader]
114
+ )
115
 
116
+ # Launch Gradio app
117
+ demo.launch()