Spaces:
Running
Running
Added recursion
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ def scrape_and_convert(url, depth):
|
|
32 |
|
33 |
# If depth > 0, extract links and process them
|
34 |
if current_depth > 0:
|
35 |
-
links = LinkExtractor.scrape_url(url, link_type=LinkType.INTERNAL)
|
36 |
for link in links:
|
37 |
markdown_content += f"\n\n## Extracted from: {link}\n"
|
38 |
markdown_content += recursive_scrape(link, current_depth - 1)
|
@@ -50,8 +50,8 @@ def scrape_and_convert(url, depth):
|
|
50 |
iface = gr.Interface(
|
51 |
fn=scrape_and_convert,
|
52 |
inputs=[
|
53 |
-
gr.Textbox(label="Enter URL"),
|
54 |
-
gr.Slider(minimum=0, maximum=3, step=1, label="Search Depth (0 = Only main page)")
|
55 |
],
|
56 |
outputs=gr.Code(label="Markdown Output", language="markdown"),
|
57 |
title="RAGScraper with Recursive Depth",
|
|
|
32 |
|
33 |
# If depth > 0, extract links and process them
|
34 |
if current_depth > 0:
|
35 |
+
links = LinkExtractor.scrape_url(url, link_type=LinkType.INTERNAL, depth=current_depth, visited_urls=visited_urls)
|
36 |
for link in links:
|
37 |
markdown_content += f"\n\n## Extracted from: {link}\n"
|
38 |
markdown_content += recursive_scrape(link, current_depth - 1)
|
|
|
50 |
iface = gr.Interface(
|
51 |
fn=scrape_and_convert,
|
52 |
inputs=[
|
53 |
+
gr.Textbox(label="Enter URL", placeholder="https://example.com"),
|
54 |
+
gr.Slider(minimum=0, maximum=3, step=1, value=0, label="Search Depth (0 = Only main page)")
|
55 |
],
|
56 |
outputs=gr.Code(label="Markdown Output", language="markdown"),
|
57 |
title="RAGScraper with Recursive Depth",
|