Update README.md
Browse files
README.md
CHANGED
@@ -105,7 +105,7 @@ from openai import OpenAI
|
|
105 |
from PIL import Image
|
106 |
from typhoon_ocr.ocr_utils import render_pdf_to_base64png, get_anchor_text
|
107 |
|
108 |
-
|
109 |
"default": lambda base_text: (f"Below is an image of a document page along with its dimensions. "
|
110 |
f"Simply return the markdown representation of this document, presenting tables in markdown format as they naturally appear.\n"
|
111 |
f"If the document contains images, use a placeholder like dummy.png for each image.\n"
|
@@ -128,7 +128,7 @@ def get_prompt(prompt_name: str) -> Callable[[str], str]:
|
|
128 |
:param prompt_name: The identifier for the desired prompt.
|
129 |
:return: The system prompt as a string.
|
130 |
"""
|
131 |
-
return
|
132 |
|
133 |
|
134 |
|
@@ -209,7 +209,7 @@ print(text_output[0])
|
|
209 |
This model only works with the specific prompts defined below, where `{base_text}` refers to information extracted from the PDF metadata using the `get_anchor_text` function from the `typhoon-ocr` package. It will not function correctly with any other prompts.
|
210 |
|
211 |
```python
|
212 |
-
|
213 |
"default": lambda base_text: (f"Below is an image of a document page along with its dimensions. "
|
214 |
f"Simply return the markdown representation of this document, presenting tables in markdown format as they naturally appear.\n"
|
215 |
f"If the document contains images, use a placeholder like dummy.png for each image.\n"
|
|
|
105 |
from PIL import Image
|
106 |
from typhoon_ocr.ocr_utils import render_pdf_to_base64png, get_anchor_text
|
107 |
|
108 |
+
PROMPTS = {
|
109 |
"default": lambda base_text: (f"Below is an image of a document page along with its dimensions. "
|
110 |
f"Simply return the markdown representation of this document, presenting tables in markdown format as they naturally appear.\n"
|
111 |
f"If the document contains images, use a placeholder like dummy.png for each image.\n"
|
|
|
128 |
:param prompt_name: The identifier for the desired prompt.
|
129 |
:return: The system prompt as a string.
|
130 |
"""
|
131 |
+
return PROMPTS.get(prompt_name, lambda x: "Invalid PROMPT_NAME provided.")
|
132 |
|
133 |
|
134 |
|
|
|
209 |
This model only works with the specific prompts defined below, where `{base_text}` refers to information extracted from the PDF metadata using the `get_anchor_text` function from the `typhoon-ocr` package. It will not function correctly with any other prompts.
|
210 |
|
211 |
```python
|
212 |
+
PROMPTS = {
|
213 |
"default": lambda base_text: (f"Below is an image of a document page along with its dimensions. "
|
214 |
f"Simply return the markdown representation of this document, presenting tables in markdown format as they naturally appear.\n"
|
215 |
f"If the document contains images, use a placeholder like dummy.png for each image.\n"
|