RDF Validation Deployment
commited on
Commit
·
e344fcd
1
Parent(s):
2cc7244
Initial deployment of RDF validation app to new mcp4rdf space
Browse files- MonographDCTAP/Monograph_AdminMetadata.tsv +3 -0
- MonographDCTAP/Monograph_Instance_Print.tsv +23 -0
- MonographDCTAP/Monograph_Prefixes.tsv +5 -0
- MonographDCTAP/Monograph_Work_Text.tsv +16 -0
- app.py +703 -0
- electronic_MonographDCTAP/Monograph_Instance_Electronic.tsv +24 -0
- requirements.txt +24 -0
- validator.py +313 -0
MonographDCTAP/Monograph_AdminMetadata.tsv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
shapeID shapeLabel target propertyID propertyLabel valueShape mandatory severity valueNodeType repeatable note
|
2 |
+
big:AdminMetadata Admin Metadata bf:AdminMetadata bf:creationDate Date Cataloged or Updated/Changed true Violation literal false
|
3 |
+
big:AdminMetadata Admin Metadata bf:AdminMetadata bf:assigner Cataloging institution true Violation IRI; bnode false
|
MonographDCTAP/Monograph_Instance_Print.tsv
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shapeID shapeLabel target propertyID propertyLabel valueShape mandatory severity valueNodeType repeatable note
|
2 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:instanceOf Instance of big:Monograph:Work true Violation IRI; bnode true
|
3 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:title Instance Title big:Title true Violation IRI; bnode true
|
4 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:editionStatement Edition Statement true Warning literal true
|
5 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:provisionActivity Provision Activity--Publication Information big:ProvisionActivity true Violation IRI; bnode true
|
6 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:seriesStatement Series Statement true Warning literal true
|
7 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:identifiedBy Identifiers true Warning IRI; bnode true e.g., ISBN
|
8 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:issuance Mode of Issuance true Violation IRI; bnode false
|
9 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:media Media type true Violation IRI; bnode true
|
10 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:carrier Carrier type true Violation IRI; bnode true
|
11 |
+
big:Monograph:Instance:Print Instance (Monograph) Print bf:Print bf:adminMetadata Administrative metadata true Violation IRI; bnode true *adminMetadata should be at the Work and Instance levels but the requirements are the same for both
|
12 |
+
big:Title Instance Title bf:Title bf:mainTitle Main Title true Violation literal false Move to Title Sheet and add other title types
|
13 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity bf:agent Agent big:Agent true Warning IRI; bnode true prefer use of bf:agent in Provision Activity but if no bf:agent exists, then use bf:simpleAgent (below)
|
14 |
+
big:Agent bf:Agent ; bf:Person ; bf:Family ; bf:Organization ; bf:Jurisdiction ; bf:Meeting rdfs:label Agent Label true Warning literal true
|
15 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity bflc:simpleAgent Agent Simple Label true Warning literal true prefer use of bf:agent (above) in Provision Activity but if no bf:agent exists, then use bf:simpleAgent
|
16 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity; bf:Distribution; bf:Manufacture; bf:Production; bf:Publication; bf:Modification bf:date Date true Warning literal true Pull both types of dates if they exist as the formatting may differ
|
17 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity; bf:Distribution; bf:Manufacture; bf:Production; bf:Publication; bf:Modification bflc:simpleDate Simple Date true Warning literal true Pull both types of dates if they exist as the formatting may differ
|
18 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity; bf:Distribution; bf:Manufacture; bf:Production; bf:Publication; bf:Modification bf:place Place big:Place true Warning IRI; bnode true Pull both bf:place and bf:simplePlace LC uses bf:place to indicate the country of publication
|
19 |
+
big:Place bf:Place rdfs:label Place Label true Warning literal true
|
20 |
+
ProvisionActivityShape Provision Activity bf:ProvisionActivity; bf:Distribution; bf:Manufacture; bf:Production; bf:Publication; bf:Modification bflc:simplePlace Place Simple Label true Warning literal true
|
21 |
+
|
22 |
+
|
23 |
+
|
MonographDCTAP/Monograph_Prefixes.tsv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Vocabulary Prefix Namespace
|
2 |
+
BIBFRAME bf: http://id.loc.gov/ontologies/bibframe/
|
3 |
+
BIBFRAME LC Extension Ontology bflc: http://id.loc.gov/ontologies/bflc/
|
4 |
+
Resource Description Framework Schema rdfs: http://www.w3.org/2000/01/rdf-schema#
|
5 |
+
BIBFRAME Interoperbility Group Shapes big: https://example.org/
|
MonographDCTAP/Monograph_Work_Text.tsv
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shapeID shapeLabel target propertyID propertyLabel valueShape mandatory severity valueNodeType repeatable note
|
2 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:title Work Title big:Title true Violation IRI ; bnode true Change to 'SeeTitle Sheet' per AdminMetadata
|
3 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:contribution Contribution big:Contribution true Warning IRI ; bnode true required if applicable. Should be included if it is there
|
4 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:genreForm Form/Genre of Work true Warning IRI ; bnode true From discussion - consider Work subclasses as sufficient
|
5 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:originDate Date of Work true Warning literal true
|
6 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:originPlace Place of Origin of the Work true Warning IRI ; bnode false
|
7 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:language Language true Violation IRI true
|
8 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:subject Subject of the Work true Warning IRI ; bnode true
|
9 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:classification Classification numbers true Warning IRI ; bnode true
|
10 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:content Content Type true Violation IRI ; bnode true *Thought to make this false, leveraging the subclass information for the same - will this fulfill this need? If not, how to address missing data here? uncontrolled labels . Essential for differentiation/identification.
|
11 |
+
big:Monograph:Work Work (Monograph) Text bf:Text ; bf:Monograph bf:adminMetadata Administrative metadata true Violation IRI ; bnode true See AdminMetadata Sheet
|
12 |
+
big:Title Monograph Title bf:Title bf:mainTitle Main Title true Violation literal false Move to Title Sheet and add other title types
|
13 |
+
big:Contribution Contribution bf:Contribution; bf:PrimaryContribution bf:agent Agent big:Agent true Warning IRI ; bnode true
|
14 |
+
big:Contribution Contribution bf:Contribution; bf:PrimaryContribution bf:role Role big:Role true Warning IRI ; bnode true
|
15 |
+
big:Agent Agent bf:Agent ; bf:Person ; bf:Family ; bf:Organization ; bf:Jurisdiction ; bf:Meeting rdfs:label Agent Label true Warning literal true
|
16 |
+
big:Role Role bf:Role rdfs:label Role Label true Warning literal true
|
app.py
ADDED
@@ -0,0 +1,703 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Hugging Face Gradio App for RDF Validation with MCP Server and Anthropic AI
|
4 |
+
|
5 |
+
This app serves both as a web interface and can expose MCP server functionality.
|
6 |
+
Deploy this on Hugging Face Spaces with your Anthropic API key.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import gradio as gr
|
10 |
+
import os
|
11 |
+
import json
|
12 |
+
import sys
|
13 |
+
import asyncio
|
14 |
+
import logging
|
15 |
+
import requests
|
16 |
+
from typing import Any, Dict, List, Optional
|
17 |
+
import threading
|
18 |
+
import time
|
19 |
+
|
20 |
+
# CRITICAL: FORCE OVERRIDE ALL ENVIRONMENT VARIABLES THAT COULD INTERFERE
|
21 |
+
print("🔧 FORCING ENVIRONMENT VARIABLE OVERRIDES...")
|
22 |
+
|
23 |
+
# Remove any HF environment variables that could cause URL concatenation
|
24 |
+
problematic_env_vars = [
|
25 |
+
'HF_API_URL',
|
26 |
+
'HF_INFERENCE_URL',
|
27 |
+
'HF_ENDPOINT_URL',
|
28 |
+
'HF_MODEL',
|
29 |
+
'HUGGINGFACE_API_URL',
|
30 |
+
'HUGGINGFACE_INFERENCE_URL'
|
31 |
+
]
|
32 |
+
|
33 |
+
for var in problematic_env_vars:
|
34 |
+
if var in os.environ:
|
35 |
+
old_value = os.environ[var]
|
36 |
+
del os.environ[var]
|
37 |
+
print(f"🗑️ Removed environment variable: {var} = {old_value}")
|
38 |
+
|
39 |
+
print("✅ Environment variables cleaned")
|
40 |
+
|
41 |
+
# Add current directory to path
|
42 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
43 |
+
|
44 |
+
# Import our validation logic
|
45 |
+
try:
|
46 |
+
from validator import validate_rdf
|
47 |
+
VALIDATOR_AVAILABLE = True
|
48 |
+
except ImportError:
|
49 |
+
VALIDATOR_AVAILABLE = False
|
50 |
+
print("⚠️ Warning: validator.py not found. Some features may be limited.")
|
51 |
+
|
52 |
+
# Optional: Check if OpenAI and requests are available
|
53 |
+
try:
|
54 |
+
from openai import OpenAI
|
55 |
+
OPENAI_AVAILABLE = True
|
56 |
+
except ImportError:
|
57 |
+
OPENAI_AVAILABLE = False
|
58 |
+
print("💡 Install 'openai' package for AI-powered corrections: pip install openai")
|
59 |
+
|
60 |
+
try:
|
61 |
+
import requests
|
62 |
+
HF_INFERENCE_AVAILABLE = True
|
63 |
+
except ImportError:
|
64 |
+
HF_INFERENCE_AVAILABLE = False
|
65 |
+
print("💡 Install 'requests' package for AI-powered corrections: pip install requests")
|
66 |
+
|
67 |
+
# Set up logging
|
68 |
+
logging.basicConfig(level=logging.INFO)
|
69 |
+
logger = logging.getLogger(__name__)
|
70 |
+
|
71 |
+
# Configuration - ABSOLUTELY HARDCODED VALUES (NO ENV VARS ALLOWED)
|
72 |
+
HF_API_KEY = os.getenv('HF_API_KEY', '') # Only this one env var is allowed
|
73 |
+
# FORCE HARDCODED VALUES - IGNORE ALL OTHER ENVIRONMENT VARIABLES
|
74 |
+
HF_ENDPOINT_URL = "https://evxgv66ksxjlfrts.us-east-1.aws.endpoints.huggingface.cloud/v1/"
|
75 |
+
HF_MODEL = "lmstudio-community/Llama-3.3-70B-Instruct-GGUF" # Correct model name for your endpoint
|
76 |
+
|
77 |
+
print(f"🔐 FORCED hardcoded endpoint: {HF_ENDPOINT_URL}")
|
78 |
+
print(f"🔐 FORCED hardcoded model: {HF_MODEL}")
|
79 |
+
print(f"🔑 HF_API_KEY configured: {'Yes' if HF_API_KEY else 'No'}")
|
80 |
+
|
81 |
+
# EXTRA PROTECTION: Override any modules that might have cached env vars
|
82 |
+
import sys
|
83 |
+
if 'requests' in sys.modules:
|
84 |
+
print("🔄 Requests module detected - ensuring no cached env vars")
|
85 |
+
if 'httpx' in sys.modules:
|
86 |
+
print("🔄 HTTPX module detected - ensuring no cached env vars")
|
87 |
+
|
88 |
+
# OpenAI client configuration for the endpoint
|
89 |
+
def get_openai_client():
|
90 |
+
"""Get configured OpenAI client for HF Inference Endpoint"""
|
91 |
+
if not HF_API_KEY:
|
92 |
+
print("❌ No HF_API_KEY available for OpenAI client")
|
93 |
+
return None
|
94 |
+
|
95 |
+
print(f"🔗 Creating OpenAI client with:")
|
96 |
+
print(f" base_url: {HF_ENDPOINT_URL}")
|
97 |
+
print(f" api_key: {'***' + HF_API_KEY[-4:] if len(HF_API_KEY) > 4 else 'HIDDEN'}")
|
98 |
+
|
99 |
+
return OpenAI(
|
100 |
+
base_url=HF_ENDPOINT_URL,
|
101 |
+
api_key=HF_API_KEY,
|
102 |
+
timeout=120.0 # Increase timeout for cold starts
|
103 |
+
)
|
104 |
+
|
105 |
+
# Sample RDF data for examples
|
106 |
+
SAMPLE_VALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
|
107 |
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
108 |
+
xmlns:bf="http://id.loc.gov/ontologies/bibframe/"
|
109 |
+
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
|
110 |
+
|
111 |
+
<bf:Work rdf:about="http://example.org/work/1">
|
112 |
+
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Text"/>
|
113 |
+
<bf:title>
|
114 |
+
<bf:Title>
|
115 |
+
<bf:mainTitle>Sample Monograph Title</bf:mainTitle>
|
116 |
+
</bf:Title>
|
117 |
+
</bf:title>
|
118 |
+
<bf:creator>
|
119 |
+
<bf:Agent>
|
120 |
+
<rdfs:label>Sample Author</rdfs:label>
|
121 |
+
</bf:Agent>
|
122 |
+
</bf:creator>
|
123 |
+
</bf:Work>
|
124 |
+
|
125 |
+
</rdf:RDF>'''
|
126 |
+
|
127 |
+
SAMPLE_INVALID_RDF = '''<?xml version="1.0" encoding="UTF-8"?>
|
128 |
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
129 |
+
<!-- Missing namespace declarations -->
|
130 |
+
<!-- Missing required properties -->
|
131 |
+
<bf:Work rdf:about="http://example.org/work/1">
|
132 |
+
<bf:title>Incomplete Title</bf:title>
|
133 |
+
<!-- Missing rdf:type -->
|
134 |
+
<!-- Missing proper title structure -->
|
135 |
+
</bf:Work>
|
136 |
+
</rdf:RDF>'''
|
137 |
+
|
138 |
+
# MCP Server Tools (can be used independently)
|
139 |
+
def validate_rdf_tool(rdf_content: str, template: str = "monograph") -> dict:
|
140 |
+
"""
|
141 |
+
Validate RDF/XML content against SHACL templates.
|
142 |
+
|
143 |
+
This tool validates RDF/XML data against predefined SHACL shapes to ensure
|
144 |
+
compliance with metadata standards like BIBFRAME. Returns detailed validation
|
145 |
+
results with conformance status and specific violation information.
|
146 |
+
|
147 |
+
Args:
|
148 |
+
rdf_content (str): The RDF/XML content to validate
|
149 |
+
template (str): Validation template to use ('monograph' or 'custom')
|
150 |
+
|
151 |
+
Returns:
|
152 |
+
dict: Validation results with conformance status and detailed feedback
|
153 |
+
"""
|
154 |
+
if not rdf_content:
|
155 |
+
return {"error": "No RDF/XML content provided", "conforms": False}
|
156 |
+
|
157 |
+
if not VALIDATOR_AVAILABLE:
|
158 |
+
return {
|
159 |
+
"error": "Validator not available - ensure validator.py is present",
|
160 |
+
"conforms": False
|
161 |
+
}
|
162 |
+
|
163 |
+
try:
|
164 |
+
conforms, results_text = validate_rdf(rdf_content.encode('utf-8'), template)
|
165 |
+
|
166 |
+
return {
|
167 |
+
"conforms": conforms,
|
168 |
+
"results": results_text,
|
169 |
+
"template": template,
|
170 |
+
"status": "✅ Valid RDF" if conforms else "❌ Invalid RDF"
|
171 |
+
}
|
172 |
+
|
173 |
+
except Exception as e:
|
174 |
+
logger.error(f"Validation error: {str(e)}")
|
175 |
+
return {
|
176 |
+
"error": f"Validation failed: {str(e)}",
|
177 |
+
"conforms": False
|
178 |
+
}
|
179 |
+
|
180 |
+
def get_ai_suggestions(validation_results: str, rdf_content: str) -> str:
|
181 |
+
"""
|
182 |
+
Generate AI-powered fix suggestions for invalid RDF/XML.
|
183 |
+
|
184 |
+
This tool analyzes validation results and provides actionable suggestions
|
185 |
+
for fixing RDF/XML validation errors using AI or rule-based analysis.
|
186 |
+
|
187 |
+
Args:
|
188 |
+
validation_results (str): The validation error messages
|
189 |
+
rdf_content (str): The original RDF/XML content that failed validation
|
190 |
+
|
191 |
+
Returns:
|
192 |
+
str: Detailed suggestions for fixing the RDF validation issues
|
193 |
+
"""
|
194 |
+
|
195 |
+
if not OPENAI_AVAILABLE:
|
196 |
+
return generate_manual_suggestions(validation_results)
|
197 |
+
|
198 |
+
# Get API key dynamically at runtime
|
199 |
+
current_api_key = os.getenv('HF_API_KEY', '')
|
200 |
+
if not current_api_key:
|
201 |
+
return f"""
|
202 |
+
🔑 **AI suggestions disabled**: Please set your Hugging Face API key as a Secret in your Space settings.
|
203 |
+
|
204 |
+
{generate_manual_suggestions(validation_results)}
|
205 |
+
"""
|
206 |
+
|
207 |
+
try:
|
208 |
+
# Use OpenAI client with your Hugging Face Inference Endpoint
|
209 |
+
print("🔍 Attempting to get OpenAI client for suggestions...")
|
210 |
+
client = get_openai_client()
|
211 |
+
if not client:
|
212 |
+
print("❌ OpenAI client is None for suggestions.")
|
213 |
+
return f"""
|
214 |
+
🔑 **AI suggestions disabled**: HF_API_KEY not configured or client creation failed.
|
215 |
+
|
216 |
+
{generate_manual_suggestions(validation_results)}
|
217 |
+
"""
|
218 |
+
print(f"✅ OpenAI client obtained for suggestions. Client timeout: {client.timeout}")
|
219 |
+
|
220 |
+
prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the following validation results and provide clear, actionable suggestions for fixing the RDF issues.
|
221 |
+
|
222 |
+
Validation Results:
|
223 |
+
{validation_results}
|
224 |
+
|
225 |
+
Original RDF (first 1000 chars):
|
226 |
+
{rdf_content[:1000]}...
|
227 |
+
|
228 |
+
Please provide:
|
229 |
+
1. A clear summary of what's wrong
|
230 |
+
2. Specific step-by-step instructions to fix each issue
|
231 |
+
3. Example corrections where applicable
|
232 |
+
4. Best practices to prevent similar issues
|
233 |
+
|
234 |
+
Format your response in a helpful, structured way using markdown."""
|
235 |
+
|
236 |
+
# Make API call using OpenAI client
|
237 |
+
print(f"🔄 Making SUGGESTION API call to: {HF_ENDPOINT_URL} with model: {HF_MODEL}")
|
238 |
+
print(f"🔄 Client base_url: {client.base_url}")
|
239 |
+
print("⏳ Attempting client.chat.completions.create() for suggestions...")
|
240 |
+
|
241 |
+
chat_completion = client.chat.completions.create(
|
242 |
+
model=HF_MODEL,
|
243 |
+
messages=[
|
244 |
+
{
|
245 |
+
"role": "user",
|
246 |
+
"content": prompt
|
247 |
+
}
|
248 |
+
],
|
249 |
+
max_tokens=1500,
|
250 |
+
temperature=0.7,
|
251 |
+
top_p=0.9
|
252 |
+
)
|
253 |
+
|
254 |
+
print(f"✅ client.chat.completions.create() returned for suggestions. Type: {type(chat_completion)}")
|
255 |
+
generated_text = chat_completion.choices[0].message.content
|
256 |
+
print("✅ Suggestion API call successful, content extracted.")
|
257 |
+
return f"🤖 **AI-Powered Suggestions:**\n\n{generated_text}"
|
258 |
+
|
259 |
+
except Exception as e:
|
260 |
+
logger.error(f"OpenAI/HF Inference Endpoint error (suggestions): {str(e)}", exc_info=True) # Added exc_info for full traceback
|
261 |
+
return f"""
|
262 |
+
❌ **AI suggestions error**: {str(e)}
|
263 |
+
|
264 |
+
{generate_manual_suggestions(validation_results)}
|
265 |
+
"""
|
266 |
+
|
267 |
+
def get_ai_correction(validation_results: str, rdf_content: str) -> str:
|
268 |
+
"""
|
269 |
+
Generate AI-powered corrected RDF/XML based on validation errors.
|
270 |
+
|
271 |
+
This tool takes invalid RDF/XML and validation results, then generates
|
272 |
+
a corrected version that addresses all identified validation issues.
|
273 |
+
|
274 |
+
Args:
|
275 |
+
validation_results (str): The validation error messages
|
276 |
+
rdf_content (str): The original invalid RDF/XML content
|
277 |
+
|
278 |
+
Returns:
|
279 |
+
str: Corrected RDF/XML that should pass validation
|
280 |
+
"""
|
281 |
+
|
282 |
+
if not OPENAI_AVAILABLE:
|
283 |
+
return generate_manual_correction_hints(validation_results, rdf_content)
|
284 |
+
|
285 |
+
# Get API key dynamically at runtime
|
286 |
+
current_api_key = os.getenv('HF_API_KEY', '')
|
287 |
+
if not current_api_key:
|
288 |
+
return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
|
289 |
+
|
290 |
+
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
291 |
+
|
292 |
+
try:
|
293 |
+
# Use OpenAI client with your Hugging Face Inference Endpoint
|
294 |
+
print("🔍 Attempting to get OpenAI client for correction...")
|
295 |
+
client = get_openai_client()
|
296 |
+
if not client:
|
297 |
+
print("❌ OpenAI client is None for correction.")
|
298 |
+
return f"""<!-- AI correction disabled: HF_API_KEY not configured or client creation failed. -->
|
299 |
+
|
300 |
+
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
301 |
+
print(f"✅ OpenAI client obtained for correction. Client timeout: {client.timeout}")
|
302 |
+
|
303 |
+
prompt = f"""You are an expert in RDF/XML. Fix the following RDF/XML based on the validation errors provided.
|
304 |
+
|
305 |
+
Validation Errors:
|
306 |
+
{validation_results}
|
307 |
+
|
308 |
+
Original RDF/XML:
|
309 |
+
{rdf_content}
|
310 |
+
|
311 |
+
Please provide the corrected RDF/XML that addresses all validation issues.
|
312 |
+
- Return only the corrected XML without additional explanation
|
313 |
+
- Maintain the original structure as much as possible while fixing errors
|
314 |
+
- Ensure all namespace declarations are present
|
315 |
+
- Add any missing required properties
|
316 |
+
- Fix any syntax or structural issues"""
|
317 |
+
|
318 |
+
# Make API call using OpenAI client
|
319 |
+
print(f"🔄 Making CORRECTION API call to: {HF_ENDPOINT_URL} with model: {HF_MODEL}")
|
320 |
+
print(f"🔄 Client base_url: {client.base_url}")
|
321 |
+
print("⏳ Attempting client.chat.completions.create() for correction...")
|
322 |
+
|
323 |
+
chat_completion = client.chat.completions.create(
|
324 |
+
model=HF_MODEL,
|
325 |
+
messages=[
|
326 |
+
{
|
327 |
+
"role": "user",
|
328 |
+
"content": prompt
|
329 |
+
}
|
330 |
+
],
|
331 |
+
max_tokens=2000,
|
332 |
+
temperature=0.3,
|
333 |
+
top_p=0.9
|
334 |
+
)
|
335 |
+
|
336 |
+
print(f"✅ client.chat.completions.create() returned for correction. Type: {type(chat_completion)}")
|
337 |
+
corrected_text = chat_completion.choices[0].message.content
|
338 |
+
print("✅ Correction API call successful, content extracted.")
|
339 |
+
return corrected_text
|
340 |
+
|
341 |
+
except Exception as e:
|
342 |
+
logger.error(f"OpenAI/HF Inference Endpoint error (correction): {str(e)}", exc_info=True) # Added exc_info for full traceback
|
343 |
+
return f"""<!-- AI correction error: {str(e)} -->
|
344 |
+
|
345 |
+
{generate_manual_correction_hints(validation_results, rdf_content)}"""
|
346 |
+
|
347 |
+
def generate_manual_suggestions(validation_results: str) -> str:
|
348 |
+
"""Generate rule-based suggestions when AI is not available"""
|
349 |
+
suggestions = []
|
350 |
+
|
351 |
+
if "Constraint Violation" in validation_results:
|
352 |
+
suggestions.append("• Fix SHACL constraint violations by ensuring required properties are present")
|
353 |
+
|
354 |
+
if "Missing property" in validation_results or "missing" in validation_results.lower():
|
355 |
+
suggestions.append("• Add missing required properties (check template requirements)")
|
356 |
+
|
357 |
+
if "datatype" in validation_results.lower():
|
358 |
+
suggestions.append("• Correct data type mismatches (ensure proper literal types)")
|
359 |
+
|
360 |
+
if "namespace" in validation_results.lower() or "prefix" in validation_results.lower():
|
361 |
+
suggestions.append("• Add missing namespace declarations at the top of your RDF")
|
362 |
+
|
363 |
+
if "XML" in validation_results or "syntax" in validation_results.lower():
|
364 |
+
suggestions.append("• Fix XML syntax errors (check for unclosed tags, invalid characters)")
|
365 |
+
|
366 |
+
if not suggestions:
|
367 |
+
suggestions.append("• Review detailed validation results for specific issues")
|
368 |
+
suggestions.append("• Ensure your RDF follows the selected template requirements")
|
369 |
+
|
370 |
+
suggestions_text = "\n".join(suggestions)
|
371 |
+
|
372 |
+
return f"""
|
373 |
+
📋 **Manual Analysis:**
|
374 |
+
|
375 |
+
{suggestions_text}
|
376 |
+
|
377 |
+
💡 **General Tips:**
|
378 |
+
• Check namespace declarations at the top of your RDF
|
379 |
+
• Ensure all required properties are present
|
380 |
+
• Verify data types match expected formats
|
381 |
+
• Make sure XML structure is well-formed
|
382 |
+
|
383 |
+
🔧 **Common Fixes:**
|
384 |
+
• Add missing namespace prefixes
|
385 |
+
• Include required properties like rdf:type
|
386 |
+
• Fix malformed URIs or literals
|
387 |
+
• Ensure proper XML syntax
|
388 |
+
"""
|
389 |
+
|
390 |
+
def generate_manual_correction_hints(validation_results: str, rdf_content: str) -> str:
|
391 |
+
"""Generate manual correction hints when AI is not available"""
|
392 |
+
return f"""<!-- Manual correction hints based on validation results -->
|
393 |
+
<!-- Set HF_API_KEY as a Secret in your Space settings for AI-powered corrections -->
|
394 |
+
|
395 |
+
{rdf_content}
|
396 |
+
|
397 |
+
<!--
|
398 |
+
VALIDATION ISSUES FOUND:
|
399 |
+
{validation_results[:500]}...
|
400 |
+
|
401 |
+
MANUAL CORRECTION STEPS:
|
402 |
+
1. Add missing namespace declarations
|
403 |
+
2. Include required properties (rdf:type, etc.)
|
404 |
+
3. Fix XML syntax errors
|
405 |
+
4. Ensure proper URI formats
|
406 |
+
5. Validate data types
|
407 |
+
-->"""
|
408 |
+
|
409 |
+
def validate_rdf_interface(rdf_content: str, template: str, use_ai: bool = True):
|
410 |
+
"""Main validation function for Gradio interface"""
|
411 |
+
if not rdf_content.strip():
|
412 |
+
return "❌ Error", "No RDF/XML data provided", "", ""
|
413 |
+
|
414 |
+
# Validate RDF
|
415 |
+
result = validate_rdf_tool(rdf_content, template)
|
416 |
+
|
417 |
+
if "error" in result:
|
418 |
+
return f"❌ Error: {result['error']}", "", "", ""
|
419 |
+
|
420 |
+
status = result["status"]
|
421 |
+
results_text = result["results"]
|
422 |
+
|
423 |
+
if result["conforms"]:
|
424 |
+
suggestions = "✅ No issues found! Your RDF/XML is valid according to the selected template."
|
425 |
+
corrected_rdf = "<!-- Already valid - no corrections needed -->\n" + rdf_content
|
426 |
+
else:
|
427 |
+
if use_ai:
|
428 |
+
suggestions = get_ai_suggestions(results_text, rdf_content)
|
429 |
+
corrected_rdf = get_ai_correction(results_text, rdf_content)
|
430 |
+
else:
|
431 |
+
suggestions = generate_manual_suggestions(results_text)
|
432 |
+
corrected_rdf = generate_manual_correction_hints(results_text, rdf_content)
|
433 |
+
|
434 |
+
return status, results_text, suggestions, corrected_rdf
|
435 |
+
|
436 |
+
def get_rdf_examples(example_type: str = "valid") -> str:
|
437 |
+
"""
|
438 |
+
Retrieve example RDF/XML snippets for testing and learning.
|
439 |
+
|
440 |
+
This tool provides sample RDF/XML content that can be used to test
|
441 |
+
the validation system or learn proper RDF structure.
|
442 |
+
|
443 |
+
Args:
|
444 |
+
example_type (str): Type of example ('valid', 'invalid', or 'bibframe')
|
445 |
+
|
446 |
+
Returns:
|
447 |
+
str: RDF/XML example content
|
448 |
+
"""
|
449 |
+
examples = {
|
450 |
+
"valid": SAMPLE_VALID_RDF,
|
451 |
+
"invalid": SAMPLE_INVALID_RDF,
|
452 |
+
"bibframe": '''<?xml version="1.0" encoding="UTF-8"?>
|
453 |
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
454 |
+
xmlns:bf="http://id.loc.gov/ontologies/bibframe/"
|
455 |
+
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
|
456 |
+
|
457 |
+
<bf:Instance rdf:about="http://example.org/instance/1">
|
458 |
+
<rdf:type rdf:resource="http://id.loc.gov/ontologies/bibframe/Print"/>
|
459 |
+
<bf:instanceOf rdf:resource="http://example.org/work/1"/>
|
460 |
+
<bf:title>
|
461 |
+
<bf:Title>
|
462 |
+
<bf:mainTitle>Example Book Title</bf:mainTitle>
|
463 |
+
</bf:Title>
|
464 |
+
</bf:title>
|
465 |
+
<bf:provisionActivity>
|
466 |
+
<bf:Publication>
|
467 |
+
<bf:date>2024</bf:date>
|
468 |
+
<bf:place>
|
469 |
+
<bf:Place>
|
470 |
+
<rdfs:label>New York</rdfs:label>
|
471 |
+
</bf:Place>
|
472 |
+
</bf:place>
|
473 |
+
</bf:Publication>
|
474 |
+
</bf:provisionActivity>
|
475 |
+
</bf:Instance>
|
476 |
+
|
477 |
+
</rdf:RDF>'''
|
478 |
+
}
|
479 |
+
|
480 |
+
return examples.get(example_type, examples["valid"])
|
481 |
+
|
482 |
+
# Create Gradio Interface
|
483 |
+
def create_interface():
|
484 |
+
"""Create the main Gradio interface"""
|
485 |
+
|
486 |
+
# Check API key status dynamically
|
487 |
+
current_api_key = os.getenv('HF_API_KEY', '')
|
488 |
+
api_status = "🔑 AI features enabled" if (OPENAI_AVAILABLE and current_api_key) else "⚠️ AI features disabled (set HF_API_KEY)"
|
489 |
+
|
490 |
+
with gr.Blocks(
|
491 |
+
title="RDF Validation Server with AI",
|
492 |
+
theme=gr.themes.Soft(),
|
493 |
+
css="""
|
494 |
+
.status-box {
|
495 |
+
font-weight: bold;
|
496 |
+
padding: 10px;
|
497 |
+
border-radius: 5px;
|
498 |
+
}
|
499 |
+
.header-text {
|
500 |
+
text-align: center;
|
501 |
+
padding: 20px;
|
502 |
+
}
|
503 |
+
"""
|
504 |
+
) as demo:
|
505 |
+
|
506 |
+
# Header
|
507 |
+
debug_info = f"""
|
508 |
+
Debug Info:
|
509 |
+
- OPENAI_AVAILABLE: {OPENAI_AVAILABLE}
|
510 |
+
- HF_INFERENCE_AVAILABLE: {HF_INFERENCE_AVAILABLE}
|
511 |
+
- HF_API_KEY set: {'Yes' if current_api_key else 'No'}
|
512 |
+
- HF_API_KEY length: {len(current_api_key) if current_api_key else 0}
|
513 |
+
- HF_ENDPOINT_URL: {HF_ENDPOINT_URL}
|
514 |
+
- HF_MODEL: {HF_MODEL}
|
515 |
+
"""
|
516 |
+
|
517 |
+
gr.HTML(f"""
|
518 |
+
<div class="header-text">
|
519 |
+
<h1>🔍 RDF Validation Server with AI</h1>
|
520 |
+
<p>Validate RDF/XML against SHACL schemas with AI-powered suggestions and corrections</p>
|
521 |
+
<p><strong>Status:</strong> {api_status}</p>
|
522 |
+
<details><summary>Debug Info</summary><pre>{debug_info}</pre></details>
|
523 |
+
</div>
|
524 |
+
""")
|
525 |
+
|
526 |
+
# Main interface
|
527 |
+
with gr.Row():
|
528 |
+
with gr.Column(scale=1):
|
529 |
+
gr.Markdown("### 📝 Input")
|
530 |
+
|
531 |
+
rdf_input = gr.Textbox(
|
532 |
+
label="RDF/XML Content",
|
533 |
+
placeholder="Paste your RDF/XML content here...",
|
534 |
+
lines=15,
|
535 |
+
show_copy_button=True
|
536 |
+
)
|
537 |
+
|
538 |
+
with gr.Row():
|
539 |
+
template_dropdown = gr.Dropdown(
|
540 |
+
label="Validation Template",
|
541 |
+
choices=["monograph", "custom"],
|
542 |
+
value="monograph",
|
543 |
+
info="Select the SHACL template to validate against"
|
544 |
+
)
|
545 |
+
|
546 |
+
use_ai_checkbox = gr.Checkbox(
|
547 |
+
label="Use AI Features",
|
548 |
+
value=True,
|
549 |
+
info="Enable AI-powered suggestions and corrections"
|
550 |
+
)
|
551 |
+
|
552 |
+
validate_btn = gr.Button("🔍 Validate RDF", variant="primary", size="lg")
|
553 |
+
|
554 |
+
# Results section
|
555 |
+
with gr.Row():
|
556 |
+
with gr.Column():
|
557 |
+
gr.Markdown("### 📊 Results")
|
558 |
+
|
559 |
+
status_output = gr.Textbox(
|
560 |
+
label="Validation Status",
|
561 |
+
interactive=False,
|
562 |
+
lines=1,
|
563 |
+
elem_classes=["status-box"]
|
564 |
+
)
|
565 |
+
|
566 |
+
results_output = gr.Textbox(
|
567 |
+
label="Detailed Validation Results",
|
568 |
+
interactive=False,
|
569 |
+
lines=8,
|
570 |
+
show_copy_button=True
|
571 |
+
)
|
572 |
+
|
573 |
+
suggestions_output = gr.Textbox(
|
574 |
+
label="💡 Fix Suggestions",
|
575 |
+
interactive=False,
|
576 |
+
lines=8,
|
577 |
+
show_copy_button=True
|
578 |
+
)
|
579 |
+
|
580 |
+
# Corrected RDF section
|
581 |
+
with gr.Row():
|
582 |
+
with gr.Column():
|
583 |
+
gr.Markdown("### 🛠️ AI-Generated Corrections")
|
584 |
+
|
585 |
+
corrected_output = gr.Textbox(
|
586 |
+
label="Corrected RDF/XML",
|
587 |
+
interactive=False,
|
588 |
+
lines=15,
|
589 |
+
show_copy_button=True,
|
590 |
+
placeholder="Corrected RDF will appear here after validation..."
|
591 |
+
)
|
592 |
+
|
593 |
+
# Examples and controls
|
594 |
+
with gr.Row():
|
595 |
+
gr.Markdown("### 📚 Examples & Tools")
|
596 |
+
|
597 |
+
with gr.Row():
|
598 |
+
example1_btn = gr.Button("✅ Valid RDF Example", variant="secondary")
|
599 |
+
example2_btn = gr.Button("❌ Invalid RDF Example", variant="secondary")
|
600 |
+
example3_btn = gr.Button("📖 BibFrame Example", variant="secondary")
|
601 |
+
clear_btn = gr.Button("🗑️ Clear All", variant="stop")
|
602 |
+
|
603 |
+
# Event handlers
|
604 |
+
validate_btn.click(
|
605 |
+
fn=validate_rdf_interface,
|
606 |
+
inputs=[rdf_input, template_dropdown, use_ai_checkbox],
|
607 |
+
outputs=[status_output, results_output, suggestions_output, corrected_output]
|
608 |
+
)
|
609 |
+
|
610 |
+
# Auto-validate on input change (debounced)
|
611 |
+
rdf_input.change(
|
612 |
+
fn=validate_rdf_interface,
|
613 |
+
inputs=[rdf_input, template_dropdown, use_ai_checkbox],
|
614 |
+
outputs=[status_output, results_output, suggestions_output, corrected_output]
|
615 |
+
)
|
616 |
+
|
617 |
+
# Example buttons
|
618 |
+
example1_btn.click(
|
619 |
+
lambda: get_rdf_examples("valid"),
|
620 |
+
outputs=[rdf_input]
|
621 |
+
)
|
622 |
+
|
623 |
+
example2_btn.click(
|
624 |
+
lambda: get_rdf_examples("invalid"),
|
625 |
+
outputs=[rdf_input]
|
626 |
+
)
|
627 |
+
|
628 |
+
example3_btn.click(
|
629 |
+
lambda: get_rdf_examples("bibframe"),
|
630 |
+
outputs=[rdf_input]
|
631 |
+
)
|
632 |
+
|
633 |
+
clear_btn.click(
|
634 |
+
lambda: ("", "", "", "", ""),
|
635 |
+
outputs=[rdf_input, status_output, results_output, suggestions_output, corrected_output]
|
636 |
+
)
|
637 |
+
|
638 |
+
# Footer with instructions
|
639 |
+
gr.Markdown("""
|
640 |
+
---
|
641 |
+
### 🚀 **Deployment Instructions for Hugging Face Spaces:**
|
642 |
+
|
643 |
+
1. **Create a new Space** on [Hugging Face](https://huggingface.co/spaces)
|
644 |
+
2. **Set up your Hugging Face Inference Endpoint** and get the endpoint URL
|
645 |
+
3. **Set your tokens** in Space settings (use Secrets for security):
|
646 |
+
- Go to Settings → Repository secrets
|
647 |
+
- Add: `HF_API_KEY` = `your_huggingface_api_key_here`
|
648 |
+
- Endpoint is now hardcoded to your specific Inference Endpoint
|
649 |
+
4. **Upload these files** to your Space repository
|
650 |
+
5. **Install requirements**: The Space will auto-install from `requirements.txt`
|
651 |
+
|
652 |
+
### 🔧 **MCP Server Mode:**
|
653 |
+
This app functions as both a web interface AND an MCP server for Claude Desktop and other MCP clients.
|
654 |
+
|
655 |
+
**Available MCP Tools (via SSE):**
|
656 |
+
- `validate_rdf_tool`: Validate RDF/XML against SHACL shapes
|
657 |
+
- `get_ai_suggestions`: Get AI-powered fix suggestions
|
658 |
+
- `get_ai_correction`: Generate corrected RDF/XML
|
659 |
+
- `get_rdf_examples`: Retrieve example RDF snippets
|
660 |
+
|
661 |
+
**MCP Connection:**
|
662 |
+
1. When deployed on Hugging Face Spaces, the MCP server is available at:
|
663 |
+
`https://your-space-id.hf.space/gradio_api/mcp/sse`
|
664 |
+
2. Use this URL in Claude Desktop's MCP configuration
|
665 |
+
3. The app automatically exposes functions with proper docstrings as MCP tools
|
666 |
+
|
667 |
+
### 💡 **Features:**
|
668 |
+
- ✅ Real-time RDF/XML validation against SHACL schemas
|
669 |
+
- 🤖 AI-powered error suggestions and corrections (with HF Inference Endpoint)
|
670 |
+
- 📚 Built-in examples and templates
|
671 |
+
- 🔄 Auto-validation as you type
|
672 |
+
- 📋 Copy results with one click
|
673 |
+
|
674 |
+
**Note:** AI features require a valid Hugging Face API key (HF_API_KEY) set as a Secret. Manual suggestions are provided as fallback.
|
675 |
+
""")
|
676 |
+
|
677 |
+
return demo
|
678 |
+
|
679 |
+
# Launch configuration
|
680 |
+
if __name__ == "__main__":
|
681 |
+
# Force verify environment is clean
|
682 |
+
print("🔍 FINAL CHECK: Verifying problematic environment variables are removed...")
|
683 |
+
for var in problematic_env_vars:
|
684 |
+
if var in os.environ:
|
685 |
+
print(f"⚠️ WARNING: {var} still exists! Value: {os.environ[var]}")
|
686 |
+
del os.environ[var]
|
687 |
+
print(f"🗑️ FORCE REMOVED: {var}")
|
688 |
+
else:
|
689 |
+
print(f"✅ {var} confirmed not in environment")
|
690 |
+
|
691 |
+
demo = create_interface()
|
692 |
+
|
693 |
+
# Configuration for different environments
|
694 |
+
port = int(os.getenv('PORT', 7860)) # Hugging Face uses PORT env variable
|
695 |
+
|
696 |
+
demo.launch(
|
697 |
+
server_name="0.0.0.0", # Important for external hosting
|
698 |
+
server_port=port, # Use environment PORT or default to 7860
|
699 |
+
share=False, # Don't create gradio.live links in production
|
700 |
+
show_error=True, # Show errors in the interface
|
701 |
+
show_api=True, # Enable API endpoints
|
702 |
+
allowed_paths=["."] # Allow serving files from current directory
|
703 |
+
)
|
electronic_MonographDCTAP/Monograph_Instance_Electronic.tsv
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shapeID shapeLabel target propertyID propertyLabel valueShape mandatory severity valueNodeType repeatable note
|
2 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:instanceOf Instance of big:Monograph:Work true Violation IRI; bnode true
|
3 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:title Instance Title big:Title true Violation IRI; bnode true
|
4 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:editionStatement Edition Statement true Warning literal true
|
5 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:provisionActivity Provision Activity--Publication Information big:ProvisionActivity true Violation IRI; bnode true
|
6 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:seriesStatement Series Statement true Warning literal true
|
7 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:identifiedBy Identifiers true Warning IRI; bnode true e.g., ISBN
|
8 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:issuance Mode of Issuance true Violation IRI; bnode false
|
9 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:media Media type true Violation IRI; bnode true
|
10 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:carrier Carrier type true Violation IRI; bnode true
|
11 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:electronicLocator Uniform Resource Locator for resource true Warning IRI; bnode true
|
12 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:digitalCharacteristic Digital Characteristic big:DigitalCharacteristic true Warning IRI; bnode false
|
13 |
+
big:Monograph:Instance:Electronic Instance (Monograph) Electronic bf:Electronic bf:adminMetadata Administrative metadata true Violation IRI; bnode true *adminMetadata should be at the Work and Instance levels but the requirements are the same for both
|
14 |
+
big:Title Instance Title bf:Title bf:mainTitle Main Title true Violation literal false Move to Title Sheet and add other title types
|
15 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity ; bf:Distribution ; bf:Manufacture ; bf:Production ; bf:Publication ; bf:Modification bf:agent Agent big:AgentShape true Warning IRI; bnode true prefer use of bf:agent in Provision Activity but if no bf:agent exists, then use bf:simpleAgent (below)
|
16 |
+
big:Agent bf:Agent ; bf:Person ; bf:Family ; bf:Organization ; bf:Jurisdiction ; bf:Meeting rdfs:label Agent Label true Warning literal true
|
17 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity ; bf:Distribution ; bf:Manufacture ; bf:Production ; bf:Publication ; bf:Modification bflc:simpleAgent Agent Simple Label big:Agent true Warning literal true prefer use of bf:agent (above) in Provision Activity but if no bf:agent exists, then use bf:simpleAgent
|
18 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity ; bf:Distribution ; bf:Manufacture ; bf:Production ; bf:Publication ; bf:Modification bf:date Date true Warning literal true Pull both types of dates if they exist as the formatting may differ
|
19 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity ; bf:Distribution ; bf:Manufacture ; bf:Production ; bf:Publication ; bf:Modification bflc:simpleDate Simple Date true Warning literal true Pull both types of dates if they exist as the formatting may differ
|
20 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity ; bf:Distribution ; bf:Manufacture ; bf:Production ; bf:Publication ; bf:Modification bf:place Place big:Place true Warning IRI; bnode true Pull both bf:place and bf:simplePlace LC uses bf:place to indicate the country of publication
|
21 |
+
big:Place bf:Place rdfs:label Place Label true Warning literal true
|
22 |
+
big:ProvisionActivity Provision Activity bf:ProvisionActivity ; bf:Distribution ; bf:Manufacture ; bf:Production ; bf:Publication ; bf:Modification bflc:simplePlace Place Simple Label true Warning literal true
|
23 |
+
big:DigitalCharacteristic Digital Characteristic bf:FileType bf:digitalCharacteristic File Type true Warning literal true
|
24 |
+
big:DigitalCharacteristic Digital Characteristic bf:EncodingFormat bf:digitalCharacteristic Encoding Format true Warning literal true
|
requirements.txt
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Requirements for Hugging Face Gradio App with MCP Server
|
2 |
+
# Core dependencies
|
3 |
+
gradio>=4.0.0
|
4 |
+
rdflib>=7.0.0
|
5 |
+
pySHACL>=0.25.0
|
6 |
+
pandas>=2.0.0
|
7 |
+
|
8 |
+
# AI integrations
|
9 |
+
huggingface_hub>=0.20.0
|
10 |
+
openai>=1.0.0
|
11 |
+
|
12 |
+
# MCP support (optional)
|
13 |
+
mcp>=0.9.0
|
14 |
+
|
15 |
+
# Web and utilities
|
16 |
+
flask>=2.3.0
|
17 |
+
flask-cors>=4.0.0
|
18 |
+
requests>=2.31.0
|
19 |
+
waitress>=2.1.0
|
20 |
+
|
21 |
+
# Additional utilities
|
22 |
+
python-dotenv>=1.0.0
|
23 |
+
aiofiles>=23.0.0
|
24 |
+
asyncio-mqtt>=0.13.0
|
validator.py
ADDED
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import pandas as pd
|
4 |
+
import rdflib
|
5 |
+
from rdflib import Namespace, Literal, BNode, RDF, RDFS
|
6 |
+
from pyshacl import validate
|
7 |
+
|
8 |
+
# Set up basic logging (use DEBUG level to see detailed output)
|
9 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s: %(message)s')
|
10 |
+
|
11 |
+
BASE_DIR = os.path.join(os.path.dirname(__file__), "MonographDCTAP")
|
12 |
+
|
13 |
+
TSV_FILES = [
|
14 |
+
"MonographDCTAP/Monograph_Work_Text.tsv",
|
15 |
+
"MonographDCTAP/Monograph_AdminMetadata.tsv",
|
16 |
+
"MonographDCTAP/Monograph_Instance_Print.tsv",
|
17 |
+
"electronic_MonographDCTAP/Monograph_Instance_Electronic.tsv",
|
18 |
+
]
|
19 |
+
PREFIX_FILE = "./MonographDCTAP/Monograph_Prefixes.tsv"
|
20 |
+
|
21 |
+
# Add a global constant for fixed prefixes.
|
22 |
+
FIXED_PREFIXES = {
|
23 |
+
"bf": "http://id.loc.gov/ontologies/bibframe/",
|
24 |
+
"bflc": "http://id.loc.gov/ontologies/bflc/",
|
25 |
+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
26 |
+
"big": "https://example.org/"
|
27 |
+
}
|
28 |
+
|
29 |
+
# Replace load_prefixes() with a simplified function:
|
30 |
+
def load_prefixes(prefixes_file):
|
31 |
+
logging.info("Using hardcoded prefixes:")
|
32 |
+
for p, ns in FIXED_PREFIXES.items():
|
33 |
+
logging.info(f"{p} -> {ns}")
|
34 |
+
return FIXED_PREFIXES
|
35 |
+
|
36 |
+
# Optionally simplify register_prefixes and _bind_namespaces:
|
37 |
+
def register_prefixes(graph, prefixes):
|
38 |
+
for prefix, uri in prefixes.items():
|
39 |
+
graph.bind(prefix, Namespace(uri), override=True)
|
40 |
+
|
41 |
+
def _bind_namespaces(graph: rdflib.Graph):
|
42 |
+
# Hard-code the fixed namespaces as well.
|
43 |
+
graph.namespace_manager.bind("bf", Namespace(FIXED_PREFIXES["bf"]))
|
44 |
+
graph.namespace_manager.bind("bflc", Namespace(FIXED_PREFIXES["bflc"]))
|
45 |
+
graph.namespace_manager.bind("rdfs", Namespace(FIXED_PREFIXES["rdfs"]))
|
46 |
+
graph.namespace_manager.bind("big", Namespace(FIXED_PREFIXES["big"]))
|
47 |
+
|
48 |
+
def _prop_id_to_uri(property_id, prefixes):
|
49 |
+
if ":" in property_id:
|
50 |
+
prefix, suffix = property_id.split(":", 1)
|
51 |
+
ns = prefixes.get(prefix.strip())
|
52 |
+
if ns:
|
53 |
+
return rdflib.URIRef(ns + suffix.strip())
|
54 |
+
if property_id.startswith("http"):
|
55 |
+
return rdflib.URIRef(property_id)
|
56 |
+
return Literal(property_id)
|
57 |
+
|
58 |
+
def add_shape_from_row(graph, row, prefixes):
|
59 |
+
shape_uri = rdflib.URIRef(row['shapeID'])
|
60 |
+
logging.info(f"Processing shape: {shape_uri}")
|
61 |
+
if (shape_uri, RDF.type, rdflib.URIRef("http://www.w3.org/ns/shacl#NodeShape")) not in graph:
|
62 |
+
graph.add((shape_uri, RDF.type, rdflib.URIRef("http://www.w3.org/ns/shacl#NodeShape")))
|
63 |
+
graph.add((shape_uri, RDFS.label, Literal(row['shapeLabel'])))
|
64 |
+
logging.info(f"Added NodeShape: {shape_uri} with label {row['shapeLabel']}")
|
65 |
+
targets = [t.strip() for t in str(row['target']).split(";")]
|
66 |
+
for target in targets:
|
67 |
+
target_uri = _prop_id_to_uri(target, prefixes)
|
68 |
+
graph.add((shape_uri, rdflib.URIRef("http://www.w3.org/ns/shacl#targetClass"), target_uri))
|
69 |
+
logging.info(f"Added target '{target_uri}' to shape {shape_uri}")
|
70 |
+
|
71 |
+
# If the property is mandatory, add a SPARQLTarget to force evaluation of nodes missing the property.
|
72 |
+
if str(row['mandatory']).strip().lower() == "true":
|
73 |
+
property_uri = _prop_id_to_uri(row['propertyID'], prefixes)
|
74 |
+
target_uris = [ _prop_id_to_uri(t, prefixes) for t in targets ]
|
75 |
+
union_clause = " UNION ".join([f"{{ ?this a <{uri}> }}" for uri in target_uris])
|
76 |
+
query = f"SELECT ?this WHERE {{ {union_clause} FILTER NOT EXISTS {{ ?this <{property_uri}> ?o }} }}"
|
77 |
+
bnode = BNode()
|
78 |
+
sh = rdflib.URIRef("http://www.w3.org/ns/shacl#")
|
79 |
+
# Use RDF.type triple to mark the bnode as a SPARQLTarget
|
80 |
+
graph.add((bnode, RDF.type, rdflib.URIRef("http://www.w3.org/ns/shacl#SPARQLTarget")))
|
81 |
+
graph.add((bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#select"), Literal(query)))
|
82 |
+
graph.add((shape_uri, rdflib.URIRef("http://www.w3.org/ns/shacl#target"), bnode))
|
83 |
+
logging.info(f"Added SPARQLTarget with query: {query} to shape {shape_uri}")
|
84 |
+
|
85 |
+
property_bnode = BNode()
|
86 |
+
graph.add((shape_uri, rdflib.URIRef("http://www.w3.org/ns/shacl#property"), property_bnode))
|
87 |
+
graph.add((property_bnode, RDF.type, rdflib.URIRef("http://www.w3.org/ns/shacl#PropertyShape")))
|
88 |
+
graph.add((property_bnode, RDFS.label, Literal(row['propertyLabel'])))
|
89 |
+
path_uri = _prop_id_to_uri(row['propertyID'], prefixes)
|
90 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#path"), path_uri))
|
91 |
+
logging.info(f"Added property shape for property {row['propertyID']} with label {row['propertyLabel']}")
|
92 |
+
if str(row['mandatory']).strip().lower() == "true":
|
93 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#minCount"), Literal(1)))
|
94 |
+
logging.info(f"Set minCount 1 for property {row['propertyID']}")
|
95 |
+
if str(row['repeatable']).strip().lower() == "false":
|
96 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#maxCount"), Literal(1)))
|
97 |
+
logging.info(f"Set maxCount 1 for property {row['propertyID']}")
|
98 |
+
severity = str(row.get("severity", "")).strip()
|
99 |
+
if severity:
|
100 |
+
sev_ns = rdflib.URIRef("http://www.w3.org/ns/shacl#")
|
101 |
+
if severity == "Violation":
|
102 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#severity"),
|
103 |
+
rdflib.URIRef(sev_ns + "Violation")))
|
104 |
+
elif severity == "Warning":
|
105 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#severity"),
|
106 |
+
rdflib.URIRef(sev_ns + "Warning")))
|
107 |
+
else:
|
108 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#severity"),
|
109 |
+
rdflib.URIRef(sev_ns + "Info")))
|
110 |
+
logging.info(f"Set severity {severity} for property {row['propertyID']}")
|
111 |
+
if pd.notna(row.get("valueShape")) and row["valueShape"].strip():
|
112 |
+
value_shape_uri = _prop_id_to_uri(row["valueShape"], prefixes)
|
113 |
+
graph.add((property_bnode, rdflib.URIRef("http://www.w3.org/ns/shacl#node"), value_shape_uri))
|
114 |
+
logging.info(f"Linked valueShape {value_shape_uri} for property {row['propertyID']}")
|
115 |
+
return graph
|
116 |
+
|
117 |
+
def build_shacl_graphs():
|
118 |
+
logging.info("Building individual SHACL graphs from TSV files")
|
119 |
+
module_graphs = {} # Initialize the dictionary for module graphs
|
120 |
+
prefixes = load_prefixes(PREFIX_FILE)
|
121 |
+
for tsv in TSV_FILES:
|
122 |
+
tsv_path = tsv # already an absolute path
|
123 |
+
if not os.path.exists(tsv_path):
|
124 |
+
logging.error(f"TSV file not found: {tsv_path}")
|
125 |
+
logging.info(f"Processing TSV file: {tsv_path}")
|
126 |
+
graph = rdflib.Graph()
|
127 |
+
register_prefixes(graph, prefixes)
|
128 |
+
_bind_namespaces(graph) # Bind fixed namespaces for the SHACL graph
|
129 |
+
df = pd.read_csv(tsv_path, sep='\t', comment='/')
|
130 |
+
for _, row in df.iterrows():
|
131 |
+
if pd.isna(row.get("shapeID")):
|
132 |
+
continue
|
133 |
+
add_shape_from_row(graph, row, prefixes)
|
134 |
+
module_graphs[tsv] = graph
|
135 |
+
logging.info("Completed building individual SHACL graphs")
|
136 |
+
return module_graphs
|
137 |
+
|
138 |
+
def parse_results_text(results_text: str) -> str:
|
139 |
+
"""
|
140 |
+
Parse and reformat raw results_text for nicer display.
|
141 |
+
Adjust the logic to suit your output format.
|
142 |
+
"""
|
143 |
+
lines = results_text.strip().splitlines()
|
144 |
+
formatted_lines = []
|
145 |
+
for line in lines:
|
146 |
+
line = line.strip()
|
147 |
+
if line.startswith("==="):
|
148 |
+
# Start of a module section
|
149 |
+
formatted_lines.append("\n" + line)
|
150 |
+
elif line.startswith("Validation Result"):
|
151 |
+
# Start a new violation
|
152 |
+
formatted_lines.append("\n" + line)
|
153 |
+
else:
|
154 |
+
formatted_lines.append("\t" + line)
|
155 |
+
return "\n".join(formatted_lines)
|
156 |
+
|
157 |
+
def validate_rdf(rdf_data, template):
|
158 |
+
logging.info("Starting validation")
|
159 |
+
data_graph = rdflib.Graph()
|
160 |
+
logging.info("Parsing RDF data")
|
161 |
+
try:
|
162 |
+
data_graph.parse(data=rdf_data, format='xml')
|
163 |
+
except Exception as e:
|
164 |
+
logging.error(f"Error parsing RDF data: {e}")
|
165 |
+
raise e
|
166 |
+
logging.info(f"Data graph has {len(data_graph)} triples.")
|
167 |
+
|
168 |
+
# Bind known namespaces explicitly from the input RDF/XML
|
169 |
+
namespaces = {
|
170 |
+
"bf": "http://id.loc.gov/ontologies/bibframe/",
|
171 |
+
"bflc": "http://id.loc.gov/ontologies/bflc/",
|
172 |
+
"bfsimple": "http://id.loc.gov/ontologies/bfsimple/",
|
173 |
+
"cc": "http://creativecommons.org/ns#",
|
174 |
+
"datatypes": "http://id.loc.gov/datatypes/",
|
175 |
+
"dcterms": "http://purl.org/dc/terms/",
|
176 |
+
"foaf": "http://xmlns.com/foaf/0.1/",
|
177 |
+
"lcc": "http://id.loc.gov/ontologies/lcc#",
|
178 |
+
"lclocal": "http://id.loc.gov/ontologies/lclocal/",
|
179 |
+
"madsrdf": "http://www.loc.gov/mads/rdf/v1#",
|
180 |
+
"mnotetype": "http://id.loc.gov/vocabulary/mnotetype/",
|
181 |
+
"mstatus": "https://id.loc.gov/vocabulary/mstatus/",
|
182 |
+
"owl": "http://www.w3.org/2002/07/owl#",
|
183 |
+
"pmo": "http://performedmusicontology.org/ontology/",
|
184 |
+
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
185 |
+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
186 |
+
"skos": "http://www.w3.org/2004/02/skos/core#",
|
187 |
+
"vartitletype": "http://id.loc.gov/vocabulary/vartitletype/",
|
188 |
+
"void": "http://rdfs.org/ns/void#",
|
189 |
+
"xsd": "http://www.w3.org/2001/XMLSchema#"
|
190 |
+
}
|
191 |
+
for prefix, uri in namespaces.items():
|
192 |
+
data_graph.bind(prefix, uri)
|
193 |
+
|
194 |
+
logging.info(f"Data graph has {len(data_graph)} triples.")
|
195 |
+
for s, p, o in list(data_graph)[:10]:
|
196 |
+
logging.debug(f"Parsed triple: {s} {p} {o}")
|
197 |
+
# New: Log the full RDF graph in turtle format
|
198 |
+
serialized_graph = data_graph.serialize(format='turtle')
|
199 |
+
logging.info("Full RDF graph:\n" + (serialized_graph.decode('utf-8') if isinstance(serialized_graph, bytes) else serialized_graph))
|
200 |
+
|
201 |
+
# Extra debugging: log all rdf:type values from the data graph
|
202 |
+
classes = set()
|
203 |
+
for s, o in data_graph.subject_objects(RDF.type):
|
204 |
+
classes.add(o)
|
205 |
+
logging.debug(f"Data graph contains these types: {list(classes)}")
|
206 |
+
|
207 |
+
# === Added debugging to check expected target class URIs ===
|
208 |
+
prefixes = load_prefixes(PREFIX_FILE)
|
209 |
+
# List your expected target class identifiers as they are used in your TSV
|
210 |
+
expected_targets = ["https:Agent", "big:Contribution"]
|
211 |
+
expanded_targets = [ _prop_id_to_uri(t, prefixes) for t in expected_targets ]
|
212 |
+
logging.debug(f"Expected target classes per TSV: {expanded_targets}")
|
213 |
+
|
214 |
+
if template.lower() == 'monograph':
|
215 |
+
logging.info("Using Monograph template; processing individual TSV modules")
|
216 |
+
module_graphs = build_shacl_graphs()
|
217 |
+
|
218 |
+
# Debug: inspect declared target classes in each module and query focus nodes.
|
219 |
+
for tsv, module in module_graphs.items():
|
220 |
+
logging.debug(f"Module {tsv} declared targets:")
|
221 |
+
for shape in module.subjects(RDF.type, rdflib.URIRef("http://www.w3.org/ns/shacl#NodeShape")):
|
222 |
+
for target in module.objects(shape, rdflib.URIRef("http://www.w3.org/ns/shacl#targetClass")):
|
223 |
+
logging.debug(f"Shape {shape} declares target: {target}")
|
224 |
+
q = f"SELECT ?x WHERE {{ ?x a <{target}> . }}"
|
225 |
+
matches = list(data_graph.query(q))
|
226 |
+
logging.debug(f"Found {len(matches)} focus node(s) for target {target}")
|
227 |
+
for match in matches:
|
228 |
+
logging.debug(f"Focus node: {match.x}")
|
229 |
+
|
230 |
+
all_results = []
|
231 |
+
overall_conforms = True
|
232 |
+
for tsv, graph in module_graphs.items():
|
233 |
+
shacl_text = graph.serialize(format='turtle')
|
234 |
+
logging.info(f"Module {tsv} SHACL shapes:")
|
235 |
+
logging.info(shacl_text.decode('utf-8') if isinstance(shacl_text, bytes) else shacl_text)
|
236 |
+
conforms, results_graph, results_text = validate(data_graph, shacl_graph=graph, inference='rdfs', debug=True)
|
237 |
+
# Override conform status if any violation has severity sh:Violation.
|
238 |
+
violation_query = """
|
239 |
+
PREFIX sh: <http://www.w3.org/ns/shacl#>
|
240 |
+
SELECT ?severity WHERE {
|
241 |
+
?vr a sh:ValidationResult ;
|
242 |
+
sh:resultSeverity ?severity .
|
243 |
+
}
|
244 |
+
"""
|
245 |
+
severities = [str(row.severity) for row in results_graph.query(violation_query)]
|
246 |
+
module_conforms = False if any("http://www.w3.org/ns/shacl#Violation" in s for s in severities) else True
|
247 |
+
logging.info(f"Module {tsv} - Overridden Conforms: {module_conforms}")
|
248 |
+
|
249 |
+
# Build a nicely formatted summary of the results.
|
250 |
+
query_formatted = """
|
251 |
+
PREFIX sh: <http://www.w3.org/ns/shacl#>
|
252 |
+
SELECT ?component ?severity ?sourceShape ?focus ?resultPath ?message
|
253 |
+
WHERE {
|
254 |
+
?vr a sh:ValidationResult ;
|
255 |
+
sh:sourceConstraintComponent ?component ;
|
256 |
+
sh:resultSeverity ?severity ;
|
257 |
+
sh:sourceShape ?sourceShape ;
|
258 |
+
sh:focusNode ?focus ;
|
259 |
+
sh:resultPath ?resultPath ;
|
260 |
+
sh:resultMessage ?message .
|
261 |
+
}
|
262 |
+
ORDER BY ?component
|
263 |
+
"""
|
264 |
+
formatted_results = ""
|
265 |
+
count = 0
|
266 |
+
for row in results_graph.query(query_formatted):
|
267 |
+
count += 1
|
268 |
+
formatted_results += f"Validation Result in {row.component}:\n"
|
269 |
+
formatted_results += f"\tSeverity: {row.severity}\n"
|
270 |
+
formatted_results += f"\tSource Shape: {row.sourceShape}\n"
|
271 |
+
formatted_results += f"\tFocus Node: {row.focus}\n"
|
272 |
+
formatted_results += f"\tResult Path: {row.resultPath}\n"
|
273 |
+
formatted_results += f"\tMessage: {row.message}\n"
|
274 |
+
formatted_results = f"Results ({count}):\n" + formatted_results
|
275 |
+
|
276 |
+
# Assemble module output.
|
277 |
+
module_output = (
|
278 |
+
f"\n=== Module: {tsv} ===\n"
|
279 |
+
f"Overridden Conforms: {module_conforms}\n"
|
280 |
+
f"{formatted_results}\n"
|
281 |
+
"------------------------\n"
|
282 |
+
)
|
283 |
+
all_results.append(module_output)
|
284 |
+
if not module_conforms:
|
285 |
+
overall_conforms = False
|
286 |
+
combined_results = "\n".join(all_results)
|
287 |
+
# Optionally, parse the combined results for easier display.
|
288 |
+
combined_results = parse_results_text(combined_results)
|
289 |
+
return overall_conforms, combined_results
|
290 |
+
else:
|
291 |
+
logging.info("Using default SHACL template")
|
292 |
+
shacl_text = """
|
293 |
+
@prefix sh: <http://www.w3.org/ns/shacl#> .
|
294 |
+
@prefix ex: <http://example.org/> .
|
295 |
+
ex:DefaultShape a sh:NodeShape ;
|
296 |
+
sh:targetNode ex:SomeNode ;
|
297 |
+
sh:property [
|
298 |
+
sh:path ex:someProperty ;
|
299 |
+
sh:datatype xsd:string ;
|
300 |
+
] .
|
301 |
+
"""
|
302 |
+
shacl_graph = rdflib.Graph()
|
303 |
+
shacl_graph.parse(data=shacl_text, format='turtle')
|
304 |
+
conforms, results_graph, results_text = validate(data_graph, shacl_graph=shacl_graph, inference='rdfs', debug=True)
|
305 |
+
logging.info(f"Validation completed; Conforms: {conforms}")
|
306 |
+
logging.info("Results text:")
|
307 |
+
logging.info(results_text)
|
308 |
+
serialized_results = results_graph.serialize(format='turtle')
|
309 |
+
logging.info("Detailed results graph:")
|
310 |
+
logging.info(serialized_results.decode('utf-8') if isinstance(serialized_results, bytes) else serialized_results)
|
311 |
+
combined_results = (f"{results_text.strip()}\nDetailed Results:\n"
|
312 |
+
f"{serialized_results.decode('utf-8') if isinstance(serialized_results, bytes) else serialized_results}")
|
313 |
+
return conforms, combined_results
|