#!/usr/bin/env python3
"""
BIBFRAME Ontology Documentation MCP Server
Provides BIBFRAME ontology documentation via MCP tools.
Dynamically loads from the official BIBFRAME ontology at id.loc.gov.
This server focuses on ontology documentation - properties, classes, domains, ranges, and usage examples.
For SHACL validation, use the separate mcp4rdf validator.
Deploy as a Hugging Face Space for your RDF applications to query.
"""
import os
import gradio as gr
import json
import requests
from typing import Dict, List, Optional, Any
from rdflib import Graph, URIRef, Literal, Namespace, RDFS, RDF, OWL
from functools import lru_cache
import re
# Enable MCP server via environment variable
os.environ["GRADIO_MCP_SERVER"] = "True"
# Namespaces
BF = Namespace("http://id.loc.gov/ontologies/bibframe/")
BFLC = Namespace("http://id.loc.gov/ontologies/bflc/")
MADSRDF = Namespace("http://www.loc.gov/mads/rdf/v1#")
class BIBFRAMEKnowledgeBase:
"""Loads and caches BIBFRAME ontology data"""
def __init__(self):
self.ontology_graph = Graph()
self.properties = {}
self.classes = {}
self._loaded = False
def load_ontology(self):
"""Load the BIBFRAME ontology from the official source"""
if self._loaded:
return
try:
print("📚 Loading BIBFRAME ontology from id.loc.gov...")
# Try with requests first for better error handling
headers = {
'User-Agent': 'BIBFRAME-MCP-Server/1.0 (https://github.com/your-repo)'
}
try:
response = requests.get(
"http://id.loc.gov/ontologies/bibframe.rdf",
headers=headers,
timeout=30
)
response.raise_for_status()
# Parse from string
self.ontology_graph.parse(data=response.text, format="xml")
print("📚 Successfully downloaded and parsed BIBFRAME ontology")
print(f"🔍 Graph contains {len(self.ontology_graph)} triples")
except requests.exceptions.RequestException as req_e:
print(f"⚠️ HTTP request failed: {req_e}")
# Fallback to direct parsing
self.ontology_graph.parse(
"http://id.loc.gov/ontologies/bibframe.rdf",
format="xml"
)
# Debug: Let's see what's actually in the ontology
print("🔍 Analyzing ontology structure...")
# Check what namespaces are used
namespaces = list(self.ontology_graph.namespaces())
print(f"🔍 Found namespaces: {[f'{prefix}: {ns}' for prefix, ns in namespaces[:5]]}")
# Check what types are actually used
all_types = set()
for s, p, o in self.ontology_graph.triples((None, RDF.type, None)):
all_types.add(str(o))
print(f"🔍 Found types: {list(all_types)[:5]}...")
# Try to find ALL properties regardless of type
print("🔍 Looking for all BIBFRAME properties...")
bf_subjects = set()
for subj in self.ontology_graph.subjects():
if str(subj).startswith(str(BF)):
bf_subjects.add(subj)
print(f"🔍 Found {len(bf_subjects)} BIBFRAME subjects total")
# Extract properties using OWL property types (BIBFRAME uses OWL, not RDFS)
print("🔍 Extracting OWL properties...")
owl_property_types = [
(OWL.ObjectProperty, "ObjectProperty"),
(OWL.DatatypeProperty, "DatatypeProperty"),
(OWL.AnnotationProperty, "AnnotationProperty"),
(RDF.Property, "RDF.Property")
]
for prop_type, type_name in owl_property_types:
prop_count = 0
for prop in self.ontology_graph.subjects(RDF.type, prop_type):
if str(prop).startswith(str(BF)):
local_name = str(prop).replace(str(BF), "")
if f"bf:{local_name}" not in self.properties:
self.properties[f"bf:{local_name}"] = {
"uri": str(prop),
"label": self._get_label(prop),
"definition": self._get_comment(prop),
"domain": self._get_domains(prop),
"range": self._get_ranges(prop),
"subPropertyOf": self._get_super_properties(prop)
}
prop_count += 1
print(f"🔍 Found {prop_count} properties of type {type_name}")
# Also extract properties that have domain/range but might not have explicit type
print("🔍 Looking for properties with domain/range...")
domain_props = set()
for prop in self.ontology_graph.subjects(RDFS.domain, None):
if str(prop).startswith(str(BF)):
domain_props.add(prop)
for prop in self.ontology_graph.subjects(RDFS.range, None):
if str(prop).startswith(str(BF)):
domain_props.add(prop)
print(f"🔍 Found {len(domain_props)} properties with domain/range")
# Add these as properties
for prop in domain_props:
local_name = str(prop).replace(str(BF), "")
if f"bf:{local_name}" not in self.properties:
self.properties[f"bf:{local_name}"] = {
"uri": str(prop),
"label": self._get_label(prop),
"definition": self._get_comment(prop),
"domain": self._get_domains(prop),
"range": self._get_ranges(prop),
"subPropertyOf": self._get_super_properties(prop)
}
# Extract classes (try both OWL.Class and RDFS.Class)
print("🔍 Looking for classes...")
class_types = [(OWL.Class, "OWL.Class"), (RDFS.Class, "RDFS.Class")]
for class_type, type_name in class_types:
class_count = 0
for cls in self.ontology_graph.subjects(RDF.type, class_type):
if str(cls).startswith(str(BF)):
local_name = str(cls).replace(str(BF), "")
if f"bf:{local_name}" not in self.classes:
self.classes[f"bf:{local_name}"] = {
"uri": str(cls),
"label": self._get_label(cls),
"definition": self._get_comment(cls),
"subClassOf": self._get_super_classes(cls)
}
class_count += 1
print(f"🔍 Found {class_count} classes of type {type_name}")
self._loaded = True
total_props = len(self.properties)
total_classes = len(self.classes)
print(f"✅ Loaded {total_props} properties and {total_classes} classes from ontology")
# If we didn't find any properties, something went wrong
if total_props == 0 and total_classes == 0:
print("⚠️ No properties or classes found - this seems wrong!")
print("📦 Adding minimal fallback data...")
self._load_minimal_fallback()
else:
print("🎉 Successfully loaded BIBFRAME ontology data!")
except Exception as e:
print(f"⚠️ Error loading ontology: {e}")
print(f"⚠️ Error type: {type(e).__name__}")
print("📦 Using minimal fallback data")
self._load_minimal_fallback()
def _load_minimal_fallback(self):
"""Minimal fallback data if ontology loading fails"""
# Don't overwrite existing data
if not self.properties:
self.properties = {
"bf:assigner": {
"uri": "http://id.loc.gov/ontologies/bibframe/assigner",
"label": "Assigner",
"definition": "Entity who assigned the identifier or other metadata value",
"domain": ["http://id.loc.gov/ontologies/bibframe/AdminMetadata"],
"range": ["http://id.loc.gov/ontologies/bibframe/Agent"],
"subPropertyOf": []
},
"bf:title": {
"uri": "http://id.loc.gov/ontologies/bibframe/title",
"label": "Title",
"definition": "Title information relating to a resource",
"domain": ["http://id.loc.gov/ontologies/bibframe/Work", "http://id.loc.gov/ontologies/bibframe/Instance"],
"range": ["http://id.loc.gov/ontologies/bibframe/Title"],
"subPropertyOf": []
}
}
if not self.classes:
self.classes = {
"bf:Work": {
"uri": "http://id.loc.gov/ontologies/bibframe/Work",
"label": "Work",
"definition": "Most abstract level of description for a creative concept",
"subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"]
},
"bf:Instance": {
"uri": "http://id.loc.gov/ontologies/bibframe/Instance",
"label": "Instance",
"definition": "Individual exemplar of a Work",
"subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"]
}
}
self._loaded = True
print(f"📦 Loaded fallback data: {len(self.properties)} properties, {len(self.classes)} classes")
def _get_label(self, resource):
return str(self.ontology_graph.value(resource, RDFS.label) or "")
def _get_comment(self, resource):
return str(self.ontology_graph.value(resource, RDFS.comment) or "")
def _get_domains(self, prop):
return [str(d) for d in self.ontology_graph.objects(prop, RDFS.domain)]
def _get_ranges(self, prop):
return [str(r) for r in self.ontology_graph.objects(prop, RDFS.range)]
def _get_super_properties(self, prop):
return [str(sp) for sp in self.ontology_graph.objects(prop, RDFS.subPropertyOf)]
def _get_super_classes(self, cls):
return [str(sc) for sc in self.ontology_graph.objects(cls, RDFS.subClassOf)]
# Global instance
kb = BIBFRAMEKnowledgeBase()
kb.load_ontology()
# Helper functions
def _simplify_uri(uri: str) -> str:
"""Convert full URI to simplified form (e.g., bf:Work)"""
if not uri:
return ""
if "bibframe/" in uri:
return "bf:" + uri.split("bibframe/")[-1]
elif "rdf-schema#" in uri:
return "rdfs:" + uri.split("#")[-1]
elif "XMLSchema#" in uri:
return "xsd:" + uri.split("#")[-1]
return uri
def _generate_property_examples(prop_name: str, prop_info: Dict) -> List[str]:
"""Generate RDF/XML usage examples based on property range"""
examples = []
for range_type in prop_info.get("range", []):
range_simplified = _simplify_uri(range_type)
if "Agent" in range_simplified or "Organization" in range_simplified or "Person" in range_simplified:
examples.append(f'<{prop_name} rdf:resource="http://id.loc.gov/rwo/agents/n123"/>')
examples.append(f'''<{prop_name}>
Organization Name
{prop_name}>''')
elif "date" in range_simplified.lower():
examples.append(f'<{prop_name}>2024{prop_name}>')
examples.append(f'<{prop_name} rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-15{prop_name}>')
elif "Place" in range_simplified:
examples.append(f'<{prop_name} rdf:resource="http://id.loc.gov/vocabulary/countries/nyu"/>')
examples.append(f'''<{prop_name}>
New York
{prop_name}>''')
elif "Title" in range_simplified:
examples.append(f'''<{prop_name}>
Main Title Here
{prop_name}>''')
elif "string" in range_simplified.lower() or "Literal" in range_simplified:
examples.append(f'<{prop_name}>Text value here{prop_name}>')
else:
# Generic object property
examples.append(f'<{prop_name} rdf:resource="http://example.org/resource"/>')
if not examples:
# Fallback if no range specified
examples.append(f'<{prop_name}>Value{prop_name}>')
return examples
def _generate_usage_notes(prop_name: str, prop_info: Dict) -> str:
"""Generate helpful usage notes based on property metadata"""
notes = []
# Note about domains
if prop_info.get("domain"):
domains = [_simplify_uri(d) for d in prop_info["domain"]]
notes.append(f"Used with: {', '.join(domains)}")
# Note about ranges
if prop_info.get("range"):
ranges = [_simplify_uri(r) for r in prop_info["range"]]
notes.append(f"Expected values: {', '.join(ranges)}")
# Special notes for common properties
if prop_name == "bf:assigner":
notes.append("Commonly required for AdminMetadata. Often same as bf:agent or bf:descriptionModifier")
elif prop_name == "bf:adminMetadata":
notes.append("Required for Work and Instance. Contains cataloging metadata")
elif prop_name == "bf:title":
notes.append("Required for Work and Instance. Use bf:Title with bf:mainTitle")
return "; ".join(notes) if notes else "No special usage notes"
def get_property_info(property_uri: str) -> str:
"""
Get detailed information about a BIBFRAME property from the official ontology.
Args:
property_uri (str): Full URI or short name (e.g., "bf:assigner" or "assigner")
Returns:
str: JSON string with property information, examples, and usage guidance
"""
# Normalize property name
if not property_uri.startswith("bf:") and not property_uri.startswith("http"):
property_uri = f"bf:{property_uri}"
prop_info = kb.properties.get(property_uri, {})
if not prop_info:
# Find partial matches
matches = [k for k in kb.properties.keys() if property_uri.lower() in k.lower()]
return json.dumps({
"error": f"Property '{property_uri}' not found in BIBFRAME ontology",
"suggestions": matches[:10] if matches else list(kb.properties.keys())[:10],
"total_properties": len(kb.properties)
}, indent=2)
# Generate examples based on range
examples = _generate_property_examples(property_uri, prop_info)
# Generate usage notes
usage_notes = _generate_usage_notes(property_uri, prop_info)
result = {
"property": property_uri,
"uri": prop_info["uri"],
"label": prop_info["label"],
"definition": prop_info["definition"],
"domain": [_simplify_uri(d) for d in prop_info["domain"]],
"range": [_simplify_uri(r) for r in prop_info["range"]],
"examples": examples,
"usage_notes": usage_notes,
"documentation_url": f"https://id.loc.gov/ontologies/bibframe.html#{property_uri.replace('bf:', '')}"
}
return json.dumps(result, indent=2)
def get_class_info(class_name: str) -> str:
"""
Get detailed information about a BIBFRAME class from the official ontology.
Args:
class_name (str): Class name (e.g., "Work", "bf:Work", or full URI)
Returns:
str: JSON string with class information and typical properties
"""
# Normalize class name
if not class_name.startswith("bf:") and not class_name.startswith("http"):
class_name = f"bf:{class_name}"
class_info = kb.classes.get(class_name, {})
if not class_info:
# Find partial matches
matches = [k for k in kb.classes.keys() if class_name.lower() in k.lower()]
return json.dumps({
"error": f"Class '{class_name}' not found in BIBFRAME ontology",
"suggestions": matches[:10] if matches else list(kb.classes.keys())[:10],
"total_classes": len(kb.classes)
}, indent=2)
# Find properties that have this class in their domain
applicable_properties = []
for prop_name, prop_data in kb.properties.items():
if class_info["uri"] in prop_data.get("domain", []):
applicable_properties.append({
"property": prop_name,
"label": prop_data.get("label", ""),
"definition": prop_data.get("definition", "")
})
result = {
"class": class_name,
"uri": class_info["uri"],
"label": class_info["label"],
"definition": class_info["definition"],
"superClasses": [_simplify_uri(sc) for sc in class_info.get("subClassOf", [])],
"applicable_properties": applicable_properties[:20], # Limit to first 20
"total_properties": len(applicable_properties),
"documentation_url": f"https://id.loc.gov/ontologies/bibframe.html#{class_name.replace('bf:', '')}"
}
return json.dumps(result, indent=2)
def search_ontology(search_term: str, search_type: str = "all") -> str:
"""
Search the BIBFRAME ontology for properties or classes matching a term.
Args:
search_term (str): Term to search for
search_type (str): Type of search - "properties", "classes", or "all"
Returns:
str: JSON string with matching properties and/or classes
"""
results = {
"search_term": search_term,
"properties": [],
"classes": []
}
search_lower = search_term.lower()
# Search properties
if search_type in ["properties", "all"]:
for prop_name, prop_data in kb.properties.items():
if (search_lower in prop_name.lower() or
search_lower in prop_data.get("label", "").lower() or
search_lower in prop_data.get("definition", "").lower()):
results["properties"].append({
"property": prop_name,
"label": prop_data.get("label", ""),
"definition": prop_data.get("definition", "")[:100] + "..." if len(prop_data.get("definition", "")) > 100 else prop_data.get("definition", "")
})
# Search classes
if search_type in ["classes", "all"]:
for class_name, class_data in kb.classes.items():
if (search_lower in class_name.lower() or
search_lower in class_data.get("label", "").lower() or
search_lower in class_data.get("definition", "").lower()):
results["classes"].append({
"class": class_name,
"label": class_data.get("label", ""),
"definition": class_data.get("definition", "")[:100] + "..." if len(class_data.get("definition", "")) > 100 else class_data.get("definition", "")
})
# Limit results
results["properties"] = results["properties"][:20]
results["classes"] = results["classes"][:20]
results["total_found"] = len(results["properties"]) + len(results["classes"])
return json.dumps(results, indent=2)
def get_property_usage(property_name: str, class_name: str = "") -> str:
"""
Get usage information for a property, optionally in the context of a specific class.
Args:
property_name (str): Property name (e.g., "assigner" or "bf:assigner")
class_name (str): Optional class context (e.g., "AdminMetadata")
Returns:
str: JSON string with usage information and examples
"""
# Normalize property name
if not property_name.startswith("bf:"):
property_name = f"bf:{property_name}"
prop_info = kb.properties.get(property_name, {})
if not prop_info:
return json.dumps({"error": f"Property '{property_name}' not found in BIBFRAME ontology"}, indent=2)
usage = {
"property": property_name,
"label": prop_info.get("label", ""),
"definition": prop_info.get("definition", ""),
"domain": [_simplify_uri(d) for d in prop_info.get("domain", [])],
"range": [_simplify_uri(r) for r in prop_info.get("range", [])],
"examples": _generate_property_examples(property_name, prop_info)
}
# Add class-specific context if provided
if class_name:
if not class_name.startswith("bf:"):
class_name = f"bf:{class_name}"
class_info = kb.classes.get(class_name, {})
if class_info:
class_uri = class_info["uri"]
if class_uri in prop_info.get("domain", []):
usage["applies_to_class"] = True
usage["context_note"] = f"{property_name} is applicable to {class_name}"
else:
usage["applies_to_class"] = False
usage["context_note"] = f"{property_name} is not typically used with {class_name}"
else:
usage["context_note"] = f"Class '{class_name}' not found in ontology"
return json.dumps(usage, indent=2)
# Create Gradio interface - DOCUMENTATION ONLY (no interactive components)
def create_interface():
with gr.Blocks(title="BIBFRAME MCP Server") as demo:
gr.Markdown(f"""
# 📚 BIBFRAME Ontology MCP Server
**Status:** ✅ Loaded {len(kb.properties)} properties and {len(kb.classes)} classes
This is a **Model Context Protocol (MCP)** server providing BIBFRAME ontology documentation tools.
The web interface is documentation only - use the MCP endpoint to query the ontology.
---
## 🔌 MCP Endpoint
**SSE Endpoint:** `https://jimfhahn-mcp4bibframe-docs.hf.space/api/mcp`
---
## 🛠️ Available MCP Tools
### 1. `get_property_info`
Get detailed information about a BIBFRAME property.
**Input:** `property_uri` (string) - e.g., "assigner", "bf:assigner", or "title"
**Returns:** JSON with property URI, label, definition, domain, range, examples, and usage notes
**Example:**
```json
{{
"property": "bf:assigner",
"label": "Assigner",
"definition": "Entity who assigned the identifier or other metadata value",
"domain": ["bf:AdminMetadata"],
"range": ["bf:Agent"],
"examples": [""]
}}
```
---
### 2. `get_class_info`
Get detailed information about a BIBFRAME class.
**Input:** `class_name` (string) - e.g., "Work", "bf:Work", or "Instance"
**Returns:** JSON with class URI, label, definition, superclasses, and applicable properties
**Example:**
```json
{{
"class": "bf:Work",
"label": "Work",
"definition": "Most abstract level of description for a creative concept",
"applicable_properties": [
{{"property": "bf:title", "label": "Title"}},
{{"property": "bf:contribution", "label": "Contribution"}}
]
}}
```
---
### 3. `search_ontology`
Search for properties or classes matching a term.
**Inputs:**
- `search_term` (string) - e.g., "title", "agent", "date"
- `search_type` (string) - "all", "properties", or "classes"
**Returns:** JSON with matching properties and classes (max 20 each)
---
### 4. `get_property_usage`
Get usage information for a property in context of a specific class.
**Inputs:**
- `property_name` (string) - e.g., "assigner"
- `class_name` (string, optional) - e.g., "AdminMetadata"
**Returns:** JSON with usage information, examples, and context notes
---
## 🔧 Configuration
### For Claude Desktop
Add to `claude_desktop_config.json`:
```json
{{
"mcpServers": {{
"bibframe-docs": {{
"url": "https://jimfhahn-mcp4bibframe-docs.hf.space/api/mcp"
}}
}}
}}
```
### For Other MCP Clients
Use the SSE endpoint URL above with your MCP client library.
---
## 📖 Data Source
- **BIBFRAME Ontology:** [http://id.loc.gov/ontologies/bibframe.rdf](http://id.loc.gov/ontologies/bibframe.rdf)
- **Official Documentation:** [https://www.loc.gov/bibframe/](https://www.loc.gov/bibframe/)
---
## 🔗 Related Tools
- **SHACL Validator:** [mcp4rdf](https://huggingface.co/spaces/jimfhahn/mcp4rdf)
- **BIBFRAME Profiles:** [LC BFE Profiles](https://github.com/lcnetdev/bfe-profiles)
---
*This server automatically loads the latest BIBFRAME ontology on startup.*
""")
return demo
if __name__ == "__main__":
print("🚀 Starting BIBFRAME MCP Server...")
# Force ontology loading to show progress
kb.load_ontology()
print(f"✅ Loaded {len(kb.properties)} properties and {len(kb.classes)} classes")
demo = create_interface()
print("🔧 Launching with MCP server enabled...")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_api=True
)