reshinthadith
/

local-repo-coder-v0

@@ -39,17 +39,62 @@ The model generates the repository in the following format, Code to parse it and
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import fire
-def main(model_path:str="./models_dir/repo_coder_v1"):
-    input_prompt =  "###Instruction: {prompt}".format(prompt="Generate a small python repo for matplotlib to visualize timeseries data to read from timeseries.csv file using pandas.")
     def load_model(model_path):
         """
         Load the model and tokenizer from the specified path.
         """
         tokenizer = AutoTokenizer.from_pretrained(model_path)
-        model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto")
         return model, tokenizer
@@ -57,12 +102,25 @@ def main(model_path:str="./models_dir/repo_coder_v1"):
     print(f"Loaded model from {model_path}.")
     input = tokenizer(input_prompt, return_tensors="pt").to(model.device)
-    print(input)
     with torch.no_grad():
         output = model.generate(**input, max_length=1024, do_sample=True, temperature=0.9, top_p=0.95, top_k=50)
-        output_text = tokenizer.decode(output[0], skip_special_tokens=True)
-        print(f"Generated text: {output_text}")
 if __name__ == "__main__":
     fire.Fire(main)
 ```

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import fire
+from pathlib import Path
+import os
+import re
+def generate_repo_from_string(input_str: str, output_dir: str) -> None:
+    """
+    Parse <output> tags in the input string and write files (and bashfiles) to the specified output directory.
+    - Searches for <output>...</output> section.
+    - Within that, finds all <fileX> or <bashfile> tags:
+      <file1>path/to/file.ext<content>...file content...</content></file1>
+      <bashfile>script.sh<content>...script content...</content></bashfile>
+    Args:
+        input_str: The full string containing <output> markup.
+        output_dir: Directory where files will be created. Existing files will be overwritten.
+    """
+    # Extract the content inside <output>...</output>
+    out_match = re.search(r"<output>(.*?)</output>", input_str, re.DOTALL)
+    if not out_match:
+        raise ValueError("No <output> section found in input.")
+    output_section = out_match.group(1)
+    # Regex to find file tags: file1, file2, file3, ... and bashfile
+    pattern = re.compile(
+        r"<(file\d+|bashfile)>([^<]+?)<content>(.*?)</content></\1>",
+        re.DOTALL
+    )
+    for tag, filename, content in pattern.findall(output_section):
+        # Determine full path
+        file_path = os.path.join(output_dir, filename.strip())
+        # Ensure parent directory exists
+        parent = os.path.dirname(file_path)
+        if parent:
+            os.makedirs(parent, exist_ok=True)
+        # Write content to file
+        with open(file_path, 'w', encoding='utf-8') as f:
+            # Strip only one leading newline if present
+            f.write(content.lstrip('\n'))
+    print(f"Repository generated at: {output_dir}")
+def main(model_path:str="./models_dir/repo_coder_v1",
+         prompt:str="Generate a small python repo for matplotlib to visualize timeseries data to read from timeseries.csv file using polars."
+         ,output_path="./output_dir/demo2"):
+    input_prompt =  "###Instruction: {prompt}".format(prompt=prompt)
     def load_model(model_path):
         """
         Load the model and tokenizer from the specified path.
         """
         tokenizer = AutoTokenizer.from_pretrained(model_path)
+        model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto").to("cuda:0")
+        model.eval()
         return model, tokenizer
     print(f"Loaded model from {model_path}.")
     input = tokenizer(input_prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output = model.generate(**input, max_length=1024, do_sample=True, temperature=0.9, top_p=0.95, top_k=50)
+        generated_code_repo = tokenizer.decode(output[0], skip_special_tokens=True)
+    print(f"Generated code repo: {generated_code_repo}")
+    Path(output_path).mkdir(parents=True, exist_ok=True)
+    generate_repo_from_string(generated_code_repo, output_path)
+    def list_files(startpath):
+        for root, dirs, files in os.walk(startpath):
+            level = root.replace(startpath, '').count(os.sep)
+            indent = ' ' * 4 * (level)
+            print('{}{}/'.format(indent, os.path.basename(root)))
+            subindent = ' ' * 4 * (level + 1)
+            for f in files:
+                print('{}{}'.format(subindent, f))
+    list_files(output_path)
 if __name__ == "__main__":
     fire.Fire(main)
 ```