Knowledge Graph Memory Guide¶
Learn how to use AgentiCraft's knowledge graph memory for storing and querying structured information through entities and relationships.
Overview¶
Knowledge graph memory extracts entities and relationships from text, creating a queryable graph structure. This enables agents to understand connections between people, places, organizations, and concepts.
Architecture¶
┌──────────────┐ ┌─────────────────┐ ┌──────────────┐
│ Text Input │────▶│ Entity Extractor│────▶│ Entities │
└──────────────┘ └─────────────────┘ └──────────────┘
│ │
▼ ▼
┌─────────────────┐ ┌──────────────┐
│ Relationship │────▶│ Graph │
│ Detector │ │ Storage │
└─────────────────┘ └──────────────┘
Basic Usage¶
Creating a Knowledge Graph¶
from agenticraft.memory.graph import KnowledgeGraphMemory
# Create knowledge graph
graph = KnowledgeGraphMemory(
capacity=10000 # Maximum number of nodes
)
# Store information - entities are extracted automatically
await graph.store(
key="meeting_001",
value="John Smith from OpenAI met with Sarah Chen from Microsoft to discuss the GPT-4 integration project."
)
# View extracted entities
entities = await graph.get_entities()
print("Entities found:")
for entity in entities:
print(f"- {entity['name']} ({entity['type']})")
Entity Types¶
The system recognizes these entity types by default:
Type | Description | Examples |
---|---|---|
PERSON | People names | John Smith, Dr. Chen |
ORGANIZATION | Companies, institutions | OpenAI, Microsoft, MIT |
LOCATION | Places, cities, countries | San Francisco, USA |
DATE | Temporal references | June 2025, yesterday |
PRODUCT | Products, technologies | GPT-4, Windows |
EVENT | Events, occurrences | conference, meeting |
CONCEPT | Abstract concepts | AI safety, machine learning |
Entity Extraction¶
Automatic Extraction¶
# Automatic extraction with store
text = """
Dr. Emily Watson from Stanford University published groundbreaking research
on quantum computing in Nature journal. She collaborated with teams from
IBM Research in Zurich and Google's quantum AI lab in Santa Barbara.
"""
await graph.store("research_news", text)
# Check extracted entities
people = await graph.get_entities(entity_type="PERSON")
# Returns: [{"name": "Dr. Emily Watson", "type": "PERSON", "count": 1}]
orgs = await graph.get_entities(entity_type="ORGANIZATION")
# Returns: Stanford University, IBM Research, Google, Nature
Manual Entity Addition¶
# Add entities manually
graph.add_entity(
name="AGI Summit 2025",
entity_type="EVENT",
attributes={
"date": "2025-09-15",
"location": "San Francisco",
"attendees": 5000
}
)
# Add with relationships
graph.add_entity("Claude", "PRODUCT")
graph.add_entity("Anthropic", "ORGANIZATION")
graph.add_relationship("Anthropic", "develops", "Claude")
Custom Entity Patterns¶
# Define custom entity patterns
class CustomKnowledgeGraph(KnowledgeGraphMemory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Add custom patterns
self.entity_patterns.update({
"PROJECT": r"[A-Z][A-Za-z\s]+ (?:Project|Initiative|Program)",
"VERSION": r"v?\d+\.\d+(?:\.\d+)?",
"SKILL": r"(?:Python|JavaScript|machine learning|NLP|computer vision)"
})
def extract_entities(self, text: str):
entities = super().extract_entities(text)
# Custom extraction logic
if "AI" in text or "ML" in text:
entities.append({
"name": "Artificial Intelligence",
"type": "CONCEPT",
"confidence": 0.9
})
return entities
Relationships¶
Relationship Detection¶
# Automatic relationship detection
text = "Tim Cook announced that Apple is investing $1 billion in AI research led by John Giannandrea."
await graph.store("news_001", text)
# Query relationships
relationships = await graph.get_relationships("Tim Cook")
# Returns:
# [
# {"from": "Tim Cook", "relation": "announced", "to": "investment"},
# {"from": "Tim Cook", "relation": "associated_with", "to": "Apple"}
# ]
Relationship Types¶
Common relationship types detected:
- Organizational: works_for, leads, founded, acquired
- Personal: knows, met_with, collaborated_with
- Location: located_in, headquartered_in, from
- Temporal: happened_on, started_at, ended_at
- Causal: caused, resulted_in, led_to
Custom Relationships¶
# Add custom relationships
graph.add_relationship(
from_entity="GPT-4",
relation="successor_of",
to_entity="GPT-3.5",
attributes={
"improvement": "10x",
"release_date": "2023-03-14"
}
)
# Bidirectional relationships
graph.add_relationship("Alice", "collaborates_with", "Bob")
graph.add_relationship("Bob", "collaborates_with", "Alice")
# Weighted relationships
graph.add_relationship(
"Product A",
"competes_with",
"Product B",
attributes={"intensity": 0.8}
)
Graph Queries¶
Basic Queries¶
# Get all entities
all_entities = await graph.get_entities()
# Filter by type
people = await graph.get_entities(entity_type="PERSON")
companies = await graph.get_entities(entity_type="ORGANIZATION")
# Get specific entity details
entity_info = graph.get_entity("John Smith")
print(f"Occurrences: {entity_info['count']}")
print(f"First seen: {entity_info['first_seen']}")
print(f"Attributes: {entity_info['attributes']}")
Relationship Queries¶
# Get all relationships for an entity
rels = await graph.get_relationships("OpenAI")
# Get specific relationship types
work_rels = await graph.get_relationships(
entity_name="Sarah Chen",
relation_type="works_for"
)
# Get entities connected by relationship
graph.add_relationship("Python", "used_for", "Data Science")
graph.add_relationship("Python", "used_for", "Web Development")
uses = graph.get_entities_by_relationship(
relation="used_for",
from_entity="Python"
)
# Returns: ["Data Science", "Web Development"]
Path Finding¶
# Find paths between entities
paths = graph.find_paths(
start_entity="John Smith",
end_entity="Microsoft",
max_depth=3
)
# Example result:
# [
# ["John Smith", "works_for", "OpenAI", "partners_with", "Microsoft"],
# ["John Smith", "collaborates_with", "Sarah Chen", "works_for", "Microsoft"]
# ]
# Find shortest path
shortest = graph.find_shortest_path("Entity A", "Entity B")
Subgraph Extraction¶
# Get subgraph around an entity
subgraph = graph.get_subgraph(
center_entity="GPT-4",
depth=2, # Two hops from center
include_types=["PRODUCT", "ORGANIZATION", "PERSON"]
)
# Returns nodes and edges within 2 hops of GPT-4
print(f"Nodes: {len(subgraph['nodes'])}")
print(f"Edges: {len(subgraph['edges'])}")
Visualization¶
Dictionary Format¶
# Get graph as dictionary
graph_dict = graph.visualize(format="dict")
print("Nodes:")
for node in graph_dict["nodes"]:
print(f"- {node['id']} ({node['type']})")
print("\nEdges:")
for edge in graph_dict["edges"]:
print(f"- {edge['source']} --{edge['relation']}--> {edge['target']}")
Cytoscape Format¶
# Export for Cytoscape.js visualization
cytoscape_data = graph.visualize(format="cytoscape")
# Use in web application
html_template = """
<script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.21.1/cytoscape.min.js"></script>
<div id="cy" style="width: 800px; height: 600px;"></div>
<script>
var cy = cytoscape({
container: document.getElementById('cy'),
elements: %s,
style: [
{
selector: 'node',
style: {
'label': 'data(label)',
'background-color': 'data(color)'
}
},
{
selector: 'edge',
style: {
'label': 'data(relation)',
'curve-style': 'bezier',
'target-arrow-shape': 'triangle'
}
}
]
});
</script>
""" % json.dumps(cytoscape_data)
GraphViz Export¶
# Export as GraphViz DOT format
dot_graph = graph.visualize(format="graphviz")
# Save to file
with open("knowledge_graph.dot", "w") as f:
f.write(dot_graph)
# Render with GraphViz
# dot -Tpng knowledge_graph.dot -o knowledge_graph.png
NetworkX Integration¶
import networkx as nx
import matplotlib.pyplot as plt
# Convert to NetworkX graph
def to_networkx(knowledge_graph):
G = nx.DiGraph()
# Add nodes
for entity in knowledge_graph.entities.values():
G.add_node(
entity.name,
type=entity.entity_type,
count=entity.count
)
# Add edges
for rel in knowledge_graph.relationships:
G.add_edge(
rel.from_entity,
rel.to_entity,
relation=rel.relation
)
return G
# Visualize
G = to_networkx(graph)
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_color='lightblue',
node_size=1000, font_size=10, arrows=True)
plt.show()
Advanced Features¶
Entity Resolution¶
# Merge similar entities
class SmartKnowledgeGraph(KnowledgeGraphMemory):
def resolve_entities(self, threshold=0.8):
"""Merge entities that likely refer to the same thing."""
from difflib import SequenceMatcher
entities = list(self.entities.values())
merged = set()
for i, e1 in enumerate(entities):
if e1.name in merged:
continue
for j, e2 in enumerate(entities[i+1:], i+1):
if e2.name in merged:
continue
# Check similarity
similarity = SequenceMatcher(
None, e1.name.lower(), e2.name.lower()
).ratio()
if similarity >= threshold:
# Merge e2 into e1
self.merge_entities(e1.name, e2.name)
merged.add(e2.name)
return len(merged)
Temporal Queries¶
# Add temporal information
graph.add_entity(
"Product Launch",
"EVENT",
attributes={
"date": "2025-09-01",
"products": ["ProductX", "ProductY"]
}
)
# Query by time
async def get_events_in_range(graph, start_date, end_date):
"""Get events within a date range."""
events = await graph.get_entities(entity_type="EVENT")
in_range = []
for event in events:
event_date = event.get("attributes", {}).get("date")
if event_date and start_date <= event_date <= end_date:
in_range.append(event)
return in_range
Graph Analytics¶
# Analyze graph structure
def analyze_graph(graph):
"""Compute graph statistics."""
stats = {
"total_entities": len(graph.entities),
"total_relationships": len(graph.relationships),
"entities_by_type": {},
"most_connected": [],
"isolated_entities": []
}
# Count by type
for entity in graph.entities.values():
stats["entities_by_type"][entity.entity_type] = \
stats["entities_by_type"].get(entity.entity_type, 0) + 1
# Find most connected
connection_counts = {}
for rel in graph.relationships:
connection_counts[rel.from_entity] = \
connection_counts.get(rel.from_entity, 0) + 1
connection_counts[rel.to_entity] = \
connection_counts.get(rel.to_entity, 0) + 1
# Sort by connections
sorted_entities = sorted(
connection_counts.items(),
key=lambda x: x[1],
reverse=True
)
stats["most_connected"] = sorted_entities[:10]
# Find isolated entities
connected = set(connection_counts.keys())
all_entities = set(graph.entities.keys())
stats["isolated_entities"] = list(all_entities - connected)
return stats
Knowledge Inference¶
# Infer new relationships
class InferenceGraph(KnowledgeGraphMemory):
def infer_relationships(self):
"""Infer implicit relationships."""
new_relationships = []
# Transitive relationships
for r1 in self.relationships:
if r1.relation == "works_for":
for r2 in self.relationships:
if (r2.from_entity == r1.to_entity and
r2.relation == "subsidiary_of"):
# Person works for subsidiary of parent company
new_rel = (
r1.from_entity,
"indirectly_works_for",
r2.to_entity
)
new_relationships.append(new_rel)
# Add inferred relationships
for from_e, rel, to_e in new_relationships:
self.add_relationship(from_e, rel, to_e)
return len(new_relationships)
Performance Optimization¶
Capacity Management¶
# Monitor and manage capacity
stats = graph.get_stats()
print(f"Entities: {stats['entity_count']}/{stats['capacity']}")
print(f"Usage: {stats['usage']:.1%}")
# Prune old entities when near capacity
if stats['usage'] > 0.9:
# Remove least recently used
graph.prune(keep_recent=5000)
# Or remove by criteria
old_date = datetime.now() - timedelta(days=90)
graph.prune_before(old_date)
Batch Operations¶
# Batch entity extraction
texts = [
"Text 1 with entities...",
"Text 2 with more entities...",
# ... many more texts
]
# Process in batches
batch_size = 100
for i in range(0, len(texts), batch_size):
batch = texts[i:i+batch_size]
# Extract entities from batch
for j, text in enumerate(batch):
await graph.store(f"doc_{i+j}", text)
# Consolidate after each batch
graph.consolidate_entities()
Query Optimization¶
# Cache frequent queries
class CachedKnowledgeGraph(KnowledgeGraphMemory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._query_cache = {}
self._cache_size = 1000
async def get_relationships(self, entity_name, relation_type=None):
# Create cache key
cache_key = f"{entity_name}:{relation_type}"
# Check cache
if cache_key in self._query_cache:
return self._query_cache[cache_key]
# Perform query
result = await super().get_relationships(entity_name, relation_type)
# Update cache
self._query_cache[cache_key] = result
# Limit cache size
if len(self._query_cache) > self._cache_size:
# Remove oldest entries (simple FIFO)
oldest = list(self._query_cache.keys())[:-self._cache_size]
for key in oldest:
del self._query_cache[key]
return result
Integration Examples¶
With Agents¶
from agenticraft.agents import KnowledgeAgent
# Create agent with knowledge graph
agent = KnowledgeAgent(
name="KnowledgeBot",
knowledge_graph=KnowledgeGraphMemory(capacity=50000)
)
# Agent automatically extracts knowledge
response = await agent.arun(
"Tell me about the meeting between the CEO of OpenAI and Google's AI team"
)
# Query agent's knowledge
knowledge = agent.get_knowledge_about("OpenAI")
With Vector Memory¶
# Hybrid memory system
class HybridMemory:
def __init__(self):
self.vector_memory = ChromaDBMemory()
self.graph_memory = KnowledgeGraphMemory()
async def store(self, key: str, text: str):
# Store in both systems
await self.vector_memory.store(key, text)
await self.graph_memory.store(key, text)
async def query(self, query: str):
# Get semantic matches
semantic_results = await self.vector_memory.search(query)
# Extract entities from query
query_entities = self.graph_memory.extract_entities(query)
# Get graph context for entities
graph_context = []
for entity in query_entities:
rels = await self.graph_memory.get_relationships(entity['name'])
graph_context.extend(rels)
return {
"semantic_matches": semantic_results,
"graph_context": graph_context
}
Best Practices¶
1. Entity Naming Consistency¶
# Standardize entity names
def standardize_entity_name(name: str) -> str:
"""Standardize entity names for consistency."""
# Remove extra whitespace
name = ' '.join(name.split())
# Consistent casing for known entities
known_entities = {
"openai": "OpenAI",
"gpt-4": "GPT-4",
"gpt4": "GPT-4",
"microsoft": "Microsoft",
"ms": "Microsoft"
}
lower_name = name.lower()
return known_entities.get(lower_name, name)
2. Relationship Validation¶
# Validate relationships make sense
VALID_RELATIONS = {
"PERSON": ["works_for", "knows", "founded", "leads"],
"ORGANIZATION": ["owns", "acquired", "partners_with", "competes_with"],
"PRODUCT": ["developed_by", "used_by", "version_of", "integrates_with"]
}
def is_valid_relationship(from_type, relation, to_type):
"""Check if a relationship makes semantic sense."""
valid_rels = VALID_RELATIONS.get(from_type, [])
return relation in valid_rels
3. Regular Maintenance¶
# Maintenance routine
async def maintain_knowledge_graph(graph):
"""Regular maintenance tasks."""
# Remove duplicate entities
graph.deduplicate_entities()
# Merge similar entities
graph.resolve_entities(threshold=0.85)
# Remove orphaned entities (no relationships)
graph.remove_orphans()
# Consolidate weak relationships
graph.consolidate_relationships(min_weight=0.1)
# Update statistics
stats = graph.get_stats()
logger.info(f"Graph maintenance complete: {stats}")
Troubleshooting¶
Common Issues¶
Memory capacity reached:
# Increase capacity or prune
graph = KnowledgeGraphMemory(capacity=100000)
# Or
graph.prune(keep_recent=50000)
Entity extraction missing entities:
# Add custom patterns
graph.entity_patterns["CUSTOM_TYPE"] = r"your_pattern_here"
# Or override extraction
class CustomExtractor(KnowledgeGraphMemory):
def extract_entities(self, text):
entities = super().extract_entities(text)
# Add your logic
return entities
Slow queries:
# Add indexing for large graphs
graph.build_index()
# Use query limits
results = await graph.get_entities(limit=100)
Next Steps¶
- Memory Patterns - Common usage patterns
- API Reference - Complete API documentation
- Performance Guide - Optimization techniques
- Examples - Working examples