#!/usr/bin/env python3 """ BIBFRAME Ontology Documentation MCP Server Provides BIBFRAME ontology documentation via MCP tools. Dynamically loads from the official BIBFRAME ontology at id.loc.gov. This server focuses on ontology documentation - properties, classes, domains, ranges, and usage examples. For SHACL validation, use the separate mcp4rdf validator. Deploy as a Hugging Face Space for your RDF applications to query. """ import os import gradio as gr import json import requests from typing import Dict, List, Optional, Any from rdflib import Graph, URIRef, Literal, Namespace, RDFS, RDF, OWL from functools import lru_cache import re # Enable MCP server via environment variable os.environ["GRADIO_MCP_SERVER"] = "True" # Namespaces BF = Namespace("http://id.loc.gov/ontologies/bibframe/") BFLC = Namespace("http://id.loc.gov/ontologies/bflc/") MADSRDF = Namespace("http://www.loc.gov/mads/rdf/v1#") class BIBFRAMEKnowledgeBase: """Loads and caches BIBFRAME ontology data""" def __init__(self): self.ontology_graph = Graph() self.properties = {} self.classes = {} self._loaded = False def load_ontology(self): """Load the BIBFRAME ontology from the official source""" if self._loaded: return try: print("📚 Loading BIBFRAME ontology from id.loc.gov...") # Try with requests first for better error handling headers = { 'User-Agent': 'BIBFRAME-MCP-Server/1.0 (https://github.com/your-repo)' } try: response = requests.get( "http://id.loc.gov/ontologies/bibframe.rdf", headers=headers, timeout=30 ) response.raise_for_status() # Parse from string self.ontology_graph.parse(data=response.text, format="xml") print("📚 Successfully downloaded and parsed BIBFRAME ontology") print(f"🔍 Graph contains {len(self.ontology_graph)} triples") except requests.exceptions.RequestException as req_e: print(f"⚠️ HTTP request failed: {req_e}") # Fallback to direct parsing self.ontology_graph.parse( "http://id.loc.gov/ontologies/bibframe.rdf", format="xml" ) # Debug: Let's see what's actually in the ontology print("🔍 Analyzing ontology structure...") # Check what namespaces are used namespaces = list(self.ontology_graph.namespaces()) print(f"🔍 Found namespaces: {[f'{prefix}: {ns}' for prefix, ns in namespaces[:5]]}") # Check what types are actually used all_types = set() for s, p, o in self.ontology_graph.triples((None, RDF.type, None)): all_types.add(str(o)) print(f"🔍 Found types: {list(all_types)[:5]}...") # Try to find ALL properties regardless of type print("🔍 Looking for all BIBFRAME properties...") bf_subjects = set() for subj in self.ontology_graph.subjects(): if str(subj).startswith(str(BF)): bf_subjects.add(subj) print(f"🔍 Found {len(bf_subjects)} BIBFRAME subjects total") # Extract properties using OWL property types (BIBFRAME uses OWL, not RDFS) print("🔍 Extracting OWL properties...") owl_property_types = [ (OWL.ObjectProperty, "ObjectProperty"), (OWL.DatatypeProperty, "DatatypeProperty"), (OWL.AnnotationProperty, "AnnotationProperty"), (RDF.Property, "RDF.Property") ] for prop_type, type_name in owl_property_types: prop_count = 0 for prop in self.ontology_graph.subjects(RDF.type, prop_type): if str(prop).startswith(str(BF)): local_name = str(prop).replace(str(BF), "") if f"bf:{local_name}" not in self.properties: self.properties[f"bf:{local_name}"] = { "uri": str(prop), "label": self._get_label(prop), "definition": self._get_comment(prop), "domain": self._get_domains(prop), "range": self._get_ranges(prop), "subPropertyOf": self._get_super_properties(prop) } prop_count += 1 print(f"🔍 Found {prop_count} properties of type {type_name}") # Also extract properties that have domain/range but might not have explicit type print("🔍 Looking for properties with domain/range...") domain_props = set() for prop in self.ontology_graph.subjects(RDFS.domain, None): if str(prop).startswith(str(BF)): domain_props.add(prop) for prop in self.ontology_graph.subjects(RDFS.range, None): if str(prop).startswith(str(BF)): domain_props.add(prop) print(f"🔍 Found {len(domain_props)} properties with domain/range") # Add these as properties for prop in domain_props: local_name = str(prop).replace(str(BF), "") if f"bf:{local_name}" not in self.properties: self.properties[f"bf:{local_name}"] = { "uri": str(prop), "label": self._get_label(prop), "definition": self._get_comment(prop), "domain": self._get_domains(prop), "range": self._get_ranges(prop), "subPropertyOf": self._get_super_properties(prop) } # Extract classes (try both OWL.Class and RDFS.Class) print("🔍 Looking for classes...") class_types = [(OWL.Class, "OWL.Class"), (RDFS.Class, "RDFS.Class")] for class_type, type_name in class_types: class_count = 0 for cls in self.ontology_graph.subjects(RDF.type, class_type): if str(cls).startswith(str(BF)): local_name = str(cls).replace(str(BF), "") if f"bf:{local_name}" not in self.classes: self.classes[f"bf:{local_name}"] = { "uri": str(cls), "label": self._get_label(cls), "definition": self._get_comment(cls), "subClassOf": self._get_super_classes(cls) } class_count += 1 print(f"🔍 Found {class_count} classes of type {type_name}") self._loaded = True total_props = len(self.properties) total_classes = len(self.classes) print(f"✅ Loaded {total_props} properties and {total_classes} classes from ontology") # If we didn't find any properties, something went wrong if total_props == 0 and total_classes == 0: print("⚠️ No properties or classes found - this seems wrong!") print("📦 Adding minimal fallback data...") self._load_minimal_fallback() else: print("🎉 Successfully loaded BIBFRAME ontology data!") except Exception as e: print(f"⚠️ Error loading ontology: {e}") print(f"⚠️ Error type: {type(e).__name__}") print("📦 Using minimal fallback data") self._load_minimal_fallback() def _load_minimal_fallback(self): """Minimal fallback data if ontology loading fails""" # Don't overwrite existing data if not self.properties: self.properties = { "bf:assigner": { "uri": "http://id.loc.gov/ontologies/bibframe/assigner", "label": "Assigner", "definition": "Entity who assigned the identifier or other metadata value", "domain": ["http://id.loc.gov/ontologies/bibframe/AdminMetadata"], "range": ["http://id.loc.gov/ontologies/bibframe/Agent"], "subPropertyOf": [] }, "bf:title": { "uri": "http://id.loc.gov/ontologies/bibframe/title", "label": "Title", "definition": "Title information relating to a resource", "domain": ["http://id.loc.gov/ontologies/bibframe/Work", "http://id.loc.gov/ontologies/bibframe/Instance"], "range": ["http://id.loc.gov/ontologies/bibframe/Title"], "subPropertyOf": [] } } if not self.classes: self.classes = { "bf:Work": { "uri": "http://id.loc.gov/ontologies/bibframe/Work", "label": "Work", "definition": "Most abstract level of description for a creative concept", "subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"] }, "bf:Instance": { "uri": "http://id.loc.gov/ontologies/bibframe/Instance", "label": "Instance", "definition": "Individual exemplar of a Work", "subClassOf": ["http://id.loc.gov/ontologies/bibframe/Resource"] } } self._loaded = True print(f"📦 Loaded fallback data: {len(self.properties)} properties, {len(self.classes)} classes") def _get_label(self, resource): return str(self.ontology_graph.value(resource, RDFS.label) or "") def _get_comment(self, resource): return str(self.ontology_graph.value(resource, RDFS.comment) or "") def _get_domains(self, prop): return [str(d) for d in self.ontology_graph.objects(prop, RDFS.domain)] def _get_ranges(self, prop): return [str(r) for r in self.ontology_graph.objects(prop, RDFS.range)] def _get_super_properties(self, prop): return [str(sp) for sp in self.ontology_graph.objects(prop, RDFS.subPropertyOf)] def _get_super_classes(self, cls): return [str(sc) for sc in self.ontology_graph.objects(cls, RDFS.subClassOf)] # Global instance kb = BIBFRAMEKnowledgeBase() kb.load_ontology() # Helper functions def _simplify_uri(uri: str) -> str: """Convert full URI to simplified form (e.g., bf:Work)""" if not uri: return "" if "bibframe/" in uri: return "bf:" + uri.split("bibframe/")[-1] elif "rdf-schema#" in uri: return "rdfs:" + uri.split("#")[-1] elif "XMLSchema#" in uri: return "xsd:" + uri.split("#")[-1] return uri def _generate_property_examples(prop_name: str, prop_info: Dict) -> List[str]: """Generate RDF/XML usage examples based on property range""" examples = [] for range_type in prop_info.get("range", []): range_simplified = _simplify_uri(range_type) if "Agent" in range_simplified or "Organization" in range_simplified or "Person" in range_simplified: examples.append(f'<{prop_name} rdf:resource="http://id.loc.gov/rwo/agents/n123"/>') examples.append(f'''<{prop_name}> Organization Name ''') elif "date" in range_simplified.lower(): examples.append(f'<{prop_name}>2024') examples.append(f'<{prop_name} rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-15') elif "Place" in range_simplified: examples.append(f'<{prop_name} rdf:resource="http://id.loc.gov/vocabulary/countries/nyu"/>') examples.append(f'''<{prop_name}> New York ''') elif "Title" in range_simplified: examples.append(f'''<{prop_name}> Main Title Here ''') elif "string" in range_simplified.lower() or "Literal" in range_simplified: examples.append(f'<{prop_name}>Text value here') else: # Generic object property examples.append(f'<{prop_name} rdf:resource="http://example.org/resource"/>') if not examples: # Fallback if no range specified examples.append(f'<{prop_name}>Value') return examples def _generate_usage_notes(prop_name: str, prop_info: Dict) -> str: """Generate helpful usage notes based on property metadata""" notes = [] # Note about domains if prop_info.get("domain"): domains = [_simplify_uri(d) for d in prop_info["domain"]] notes.append(f"Used with: {', '.join(domains)}") # Note about ranges if prop_info.get("range"): ranges = [_simplify_uri(r) for r in prop_info["range"]] notes.append(f"Expected values: {', '.join(ranges)}") # Special notes for common properties if prop_name == "bf:assigner": notes.append("Commonly required for AdminMetadata. Often same as bf:agent or bf:descriptionModifier") elif prop_name == "bf:adminMetadata": notes.append("Required for Work and Instance. Contains cataloging metadata") elif prop_name == "bf:title": notes.append("Required for Work and Instance. Use bf:Title with bf:mainTitle") return "; ".join(notes) if notes else "No special usage notes" def get_property_info(property_uri: str) -> str: """ Get detailed information about a BIBFRAME property from the official ontology. Args: property_uri (str): Full URI or short name (e.g., "bf:assigner" or "assigner") Returns: str: JSON string with property information, examples, and usage guidance """ # Normalize property name if not property_uri.startswith("bf:") and not property_uri.startswith("http"): property_uri = f"bf:{property_uri}" prop_info = kb.properties.get(property_uri, {}) if not prop_info: # Find partial matches matches = [k for k in kb.properties.keys() if property_uri.lower() in k.lower()] return json.dumps({ "error": f"Property '{property_uri}' not found in BIBFRAME ontology", "suggestions": matches[:10] if matches else list(kb.properties.keys())[:10], "total_properties": len(kb.properties) }, indent=2) # Generate examples based on range examples = _generate_property_examples(property_uri, prop_info) # Generate usage notes usage_notes = _generate_usage_notes(property_uri, prop_info) result = { "property": property_uri, "uri": prop_info["uri"], "label": prop_info["label"], "definition": prop_info["definition"], "domain": [_simplify_uri(d) for d in prop_info["domain"]], "range": [_simplify_uri(r) for r in prop_info["range"]], "examples": examples, "usage_notes": usage_notes, "documentation_url": f"https://id.loc.gov/ontologies/bibframe.html#{property_uri.replace('bf:', '')}" } return json.dumps(result, indent=2) def get_class_info(class_name: str) -> str: """ Get detailed information about a BIBFRAME class from the official ontology. Args: class_name (str): Class name (e.g., "Work", "bf:Work", or full URI) Returns: str: JSON string with class information and typical properties """ # Normalize class name if not class_name.startswith("bf:") and not class_name.startswith("http"): class_name = f"bf:{class_name}" class_info = kb.classes.get(class_name, {}) if not class_info: # Find partial matches matches = [k for k in kb.classes.keys() if class_name.lower() in k.lower()] return json.dumps({ "error": f"Class '{class_name}' not found in BIBFRAME ontology", "suggestions": matches[:10] if matches else list(kb.classes.keys())[:10], "total_classes": len(kb.classes) }, indent=2) # Find properties that have this class in their domain applicable_properties = [] for prop_name, prop_data in kb.properties.items(): if class_info["uri"] in prop_data.get("domain", []): applicable_properties.append({ "property": prop_name, "label": prop_data.get("label", ""), "definition": prop_data.get("definition", "") }) result = { "class": class_name, "uri": class_info["uri"], "label": class_info["label"], "definition": class_info["definition"], "superClasses": [_simplify_uri(sc) for sc in class_info.get("subClassOf", [])], "applicable_properties": applicable_properties[:20], # Limit to first 20 "total_properties": len(applicable_properties), "documentation_url": f"https://id.loc.gov/ontologies/bibframe.html#{class_name.replace('bf:', '')}" } return json.dumps(result, indent=2) def search_ontology(search_term: str, search_type: str = "all") -> str: """ Search the BIBFRAME ontology for properties or classes matching a term. Args: search_term (str): Term to search for search_type (str): Type of search - "properties", "classes", or "all" Returns: str: JSON string with matching properties and/or classes """ results = { "search_term": search_term, "properties": [], "classes": [] } search_lower = search_term.lower() # Search properties if search_type in ["properties", "all"]: for prop_name, prop_data in kb.properties.items(): if (search_lower in prop_name.lower() or search_lower in prop_data.get("label", "").lower() or search_lower in prop_data.get("definition", "").lower()): results["properties"].append({ "property": prop_name, "label": prop_data.get("label", ""), "definition": prop_data.get("definition", "")[:100] + "..." if len(prop_data.get("definition", "")) > 100 else prop_data.get("definition", "") }) # Search classes if search_type in ["classes", "all"]: for class_name, class_data in kb.classes.items(): if (search_lower in class_name.lower() or search_lower in class_data.get("label", "").lower() or search_lower in class_data.get("definition", "").lower()): results["classes"].append({ "class": class_name, "label": class_data.get("label", ""), "definition": class_data.get("definition", "")[:100] + "..." if len(class_data.get("definition", "")) > 100 else class_data.get("definition", "") }) # Limit results results["properties"] = results["properties"][:20] results["classes"] = results["classes"][:20] results["total_found"] = len(results["properties"]) + len(results["classes"]) return json.dumps(results, indent=2) def get_property_usage(property_name: str, class_name: str = "") -> str: """ Get usage information for a property, optionally in the context of a specific class. Args: property_name (str): Property name (e.g., "assigner" or "bf:assigner") class_name (str): Optional class context (e.g., "AdminMetadata") Returns: str: JSON string with usage information and examples """ # Normalize property name if not property_name.startswith("bf:"): property_name = f"bf:{property_name}" prop_info = kb.properties.get(property_name, {}) if not prop_info: return json.dumps({"error": f"Property '{property_name}' not found in BIBFRAME ontology"}, indent=2) usage = { "property": property_name, "label": prop_info.get("label", ""), "definition": prop_info.get("definition", ""), "domain": [_simplify_uri(d) for d in prop_info.get("domain", [])], "range": [_simplify_uri(r) for r in prop_info.get("range", [])], "examples": _generate_property_examples(property_name, prop_info) } # Add class-specific context if provided if class_name: if not class_name.startswith("bf:"): class_name = f"bf:{class_name}" class_info = kb.classes.get(class_name, {}) if class_info: class_uri = class_info["uri"] if class_uri in prop_info.get("domain", []): usage["applies_to_class"] = True usage["context_note"] = f"{property_name} is applicable to {class_name}" else: usage["applies_to_class"] = False usage["context_note"] = f"{property_name} is not typically used with {class_name}" else: usage["context_note"] = f"Class '{class_name}' not found in ontology" return json.dumps(usage, indent=2) # Create Gradio interface - DOCUMENTATION ONLY (no interactive components) def create_interface(): with gr.Blocks(title="BIBFRAME MCP Server") as demo: gr.Markdown(f""" # 📚 BIBFRAME Ontology MCP Server **Status:** ✅ Loaded {len(kb.properties)} properties and {len(kb.classes)} classes This is a **Model Context Protocol (MCP)** server providing BIBFRAME ontology documentation tools. The web interface is documentation only - use the MCP endpoint to query the ontology. --- ## 🔌 MCP Endpoint **SSE Endpoint:** `https://jimfhahn-mcp4bibframe-docs.hf.space/api/mcp` --- ## 🛠️ Available MCP Tools ### 1. `get_property_info` Get detailed information about a BIBFRAME property. **Input:** `property_uri` (string) - e.g., "assigner", "bf:assigner", or "title" **Returns:** JSON with property URI, label, definition, domain, range, examples, and usage notes **Example:** ```json {{ "property": "bf:assigner", "label": "Assigner", "definition": "Entity who assigned the identifier or other metadata value", "domain": ["bf:AdminMetadata"], "range": ["bf:Agent"], "examples": [""] }} ``` --- ### 2. `get_class_info` Get detailed information about a BIBFRAME class. **Input:** `class_name` (string) - e.g., "Work", "bf:Work", or "Instance" **Returns:** JSON with class URI, label, definition, superclasses, and applicable properties **Example:** ```json {{ "class": "bf:Work", "label": "Work", "definition": "Most abstract level of description for a creative concept", "applicable_properties": [ {{"property": "bf:title", "label": "Title"}}, {{"property": "bf:contribution", "label": "Contribution"}} ] }} ``` --- ### 3. `search_ontology` Search for properties or classes matching a term. **Inputs:** - `search_term` (string) - e.g., "title", "agent", "date" - `search_type` (string) - "all", "properties", or "classes" **Returns:** JSON with matching properties and classes (max 20 each) --- ### 4. `get_property_usage` Get usage information for a property in context of a specific class. **Inputs:** - `property_name` (string) - e.g., "assigner" - `class_name` (string, optional) - e.g., "AdminMetadata" **Returns:** JSON with usage information, examples, and context notes --- ## 🔧 Configuration ### For Claude Desktop Add to `claude_desktop_config.json`: ```json {{ "mcpServers": {{ "bibframe-docs": {{ "url": "https://jimfhahn-mcp4bibframe-docs.hf.space/api/mcp" }} }} }} ``` ### For Other MCP Clients Use the SSE endpoint URL above with your MCP client library. --- ## 📖 Data Source - **BIBFRAME Ontology:** [http://id.loc.gov/ontologies/bibframe.rdf](http://id.loc.gov/ontologies/bibframe.rdf) - **Official Documentation:** [https://www.loc.gov/bibframe/](https://www.loc.gov/bibframe/) --- ## 🔗 Related Tools - **SHACL Validator:** [mcp4rdf](https://huggingface.co/spaces/jimfhahn/mcp4rdf) - **BIBFRAME Profiles:** [LC BFE Profiles](https://github.com/lcnetdev/bfe-profiles) --- *This server automatically loads the latest BIBFRAME ontology on startup.* """) return demo if __name__ == "__main__": print("🚀 Starting BIBFRAME MCP Server...") # Force ontology loading to show progress kb.load_ontology() print(f"✅ Loaded {len(kb.properties)} properties and {len(kb.classes)} classes") demo = create_interface() print("🔧 Launching with MCP server enabled...") demo.launch( server_name="0.0.0.0", server_port=7860, show_api=True )