olmOCR-2-7B-1025-INT4
This is a 4-bit quantized version of allenai/olmOCR-2-7B-1025, created using the Auto-Round framework.
This model is designed for efficient inference on consumer hardware like NVIDIA RTX GPUs, where the original BF16 or FP8 models may not be feasible.
Model Details
- Base Model:
allenai/olmOCR-2-7B-1025 - Quantization Method: Auto-Round
- Precision: 4-bit
- Group Size: 128
- Scheme: Symmetric
Note: Quantization reduces memory and computational costs but may result in a slight accuracy degradation compared to the original model.
How to Use
Transformers
For standard usage with the transformers library, please follow the code examples provided on the original model card.
vLLM (Recommended)
For high-performance inference and deployment, we recommend using vLLM. We also provide a standalone script for efficiently processing multi-page PDF documents. This script operates independently and does not require the official olmOCR toolkit, offering a lightweight and fast way to perform OCR on entire documents.
import openai
import requests
import base64
import fitz
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
client = openai.OpenAI(api_key="sk-", base_url="http://ip:port/v1")
model = "winninghealth/olmOCR-2-7B-1025-INT4"
build_no_anchoring_v4_yaml_prompt = "Attached is one page of a document that you must process. Just return the plain text representation of this document as if you were reading it naturally. Convert equations to LateX and tables to HTML.\nIf there are any figures or charts, label them with the following markdown syntax \nReturn your output as markdown."
def render_pdf_to_base64png(doc: fitz.Document, page_num: int, target_longest_image_dim: int = 2048) -> str:
page = doc[page_num - 1] # PyMuPDF uses 0-based indexing
rect = page.rect
width, height = rect.width, rect.height
longest_dim = max(width, height)
# Calculate zoom factor to achieve target dimension
zoom = target_longest_image_dim / longest_dim
# Render page to pixmap
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
# Convert pixmap to PNG bytes
img_bytes = pix.tobytes("png")
return base64.b64encode(img_bytes).decode("utf-8")
def get_image_base64_from_url(image_url):
response = requests.get(image_url)
response.raise_for_status()
return base64.b64encode(response.content).decode("utf-8")
def ocr_page_with_nanonets_s(img_base64):
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{img_base64}"},
},
{
"type": "text",
"text": build_no_anchoring_v4_yaml_prompt,
},
],
}
],
temperature=0.0,
max_tokens=15000, # max 16192
)
return response.choices[0].message.content
def process_page(doc, page_num, page_count):
img_base64 = render_pdf_to_base64png(doc, page_num, target_longest_image_dim=1288)
content = ocr_page_with_nanonets_s(img_base64)
return page_num, content
# Process all pages concurrently and save to markdown
if len(sys.argv) < 2:
print("Usage: python olmOCR.py <pdf_file_path>")
sys.exit(1)
file_path = sys.argv[1]
output_path = file_path.replace(".pdf", ".md")
# Open PDF once for all operations
doc = fitz.open(file_path)
page_count = len(doc)
print(f"Total pages: {page_count}")
print("Starting OCR processing...\n")
completed_pages = 0
# Open output file for streaming write
with open(output_path, "w", encoding="utf-8") as f:
page_contents = {}
with ThreadPoolExecutor(max_workers=8) as executor:
futures = {
executor.submit(process_page, doc, page_num, page_count): page_num for page_num in range(1, page_count + 1)
}
for future in as_completed(futures):
page_num, content = future.result()
page_contents[page_num] = content
completed_pages += 1
# Display progress
progress = (completed_pages / page_count) * 100
print(f"Progress: {completed_pages}/{page_count} pages ({progress:.1f}%)")
# Sort by page number and write to file
for i in range(1, page_count + 1):
f.write(page_contents[i])
# if i < page_count:
# f.write("\n\n")
doc.close()
print(f"\nDone! Output saved to: {output_path}")
- Downloads last month
- 14,360