Files
innovatieplatform/wiki/tools/query.py
znetsixe 926872a082 Add document converter, seeder data structure, and project wiki
- ai-service/convert.py: converts Office/PDF files to markdown with frontmatter
- database/seeders/data/: folder structure for themas, projects, documents, etc.
- database/seeders/data/raw/: drop zone for Office/PDF files to convert
- wiki/: project architecture, concepts, and knowledge graph documentation
- Remove unused Laravel example tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 08:33:42 +02:00

250 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""Wiki Knowledge Graph query tool.
Queryable interface over knowledge-graph.yaml + wiki pages.
Usable by both humans (CLI) and LLM agents (imported).
Usage:
python wiki/tools/query.py health # project health
python wiki/tools/query.py entity "search term" # everything about an entity
python wiki/tools/query.py metric "search term" # find metrics
python wiki/tools/query.py status "proven" # all pages with status
python wiki/tools/query.py test "test name" # test results
python wiki/tools/query.py search "keyword" # full-text search
python wiki/tools/query.py related "page-name" # pages linking to/from
python wiki/tools/query.py timeline # commit timeline
"""
import yaml
import os
import sys
import re
from pathlib import Path
WIKI_DIR = Path(__file__).parent.parent
GRAPH_PATH = WIKI_DIR / 'knowledge-graph.yaml'
def load_graph():
if not GRAPH_PATH.exists():
return {}
with open(GRAPH_PATH) as f:
return yaml.safe_load(f) or {}
def load_all_pages():
pages = {}
for md_path in WIKI_DIR.rglob('*.md'):
if 'tools' in str(md_path):
continue
rel = md_path.relative_to(WIKI_DIR)
content = md_path.read_text()
meta = {}
if content.startswith('---'):
parts = content.split('---', 2)
if len(parts) >= 3:
try:
meta = yaml.safe_load(parts[1]) or {}
except yaml.YAMLError:
pass
content = parts[2]
links = re.findall(r'\[\[([^\]]+)\]\]', content)
pages[str(rel)] = {
'path': str(rel), 'meta': meta, 'content': content,
'links': links, 'title': meta.get('title', str(rel)),
'status': meta.get('status', 'unknown'),
'tags': meta.get('tags', []),
}
return pages
def flatten_graph(graph, prefix=''):
items = []
if isinstance(graph, dict):
for k, v in graph.items():
path = f"{prefix}.{k}" if prefix else k
if isinstance(v, (dict, list)):
items.extend(flatten_graph(v, path))
else:
items.append((path, str(v)))
elif isinstance(graph, list):
for i, v in enumerate(graph):
path = f"{prefix}[{i}]"
if isinstance(v, (dict, list)):
items.extend(flatten_graph(v, path))
else:
items.append((path, str(v)))
return items
def cmd_health():
graph = load_graph()
pages = load_all_pages()
statuses = {}
for p in pages.values():
s = p['status']
statuses[s] = statuses.get(s, 0) + 1
tests = graph.get('tests', {})
total_pass = sum(t.get('passing', 0) for t in tests.values() if isinstance(t, dict))
total_count = sum(t.get('count', t.get('total', 0)) for t in tests.values() if isinstance(t, dict))
disproven = len(graph.get('disproven', {}))
timeline = len(graph.get('timeline', []))
# Count broken links
all_titles = set()
for p in pages.values():
all_titles.add(p['title'].lower())
all_titles.add(p['path'].lower().replace('.md', '').split('/')[-1])
broken = sum(1 for p in pages.values() for link in p['links']
if not any(link.lower().replace('-', ' ') in t or t in link.lower().replace('-', ' ')
for t in all_titles))
print(f"Wiki Health:\n")
print(f" Pages: {len(pages)}")
print(f" Statuses: {statuses}")
if total_count:
print(f" Tests: {total_pass}/{total_count} passing")
print(f" Disproven: {disproven} claims tracked")
print(f" Timeline: {timeline} commits")
print(f" Broken links: {broken}")
def cmd_entity(query):
graph = load_graph()
pages = load_all_pages()
q = query.lower()
print(f"Entity: '{query}'\n")
flat = flatten_graph(graph)
hits = [(p, v) for p, v in flat if q in p.lower() or q in v.lower()]
if hits:
print(" -- Knowledge Graph --")
for path, value in hits[:20]:
print(f" {path}: {value}")
print("\n -- Wiki Pages --")
for rel, page in sorted(pages.items()):
if q in page['content'].lower() or q in page['title'].lower():
lines = [l.strip() for l in page['content'].split('\n')
if q in l.lower() and l.strip()]
print(f" {rel} ({page['status']})")
for line in lines[:3]:
print(f" {line[:100]}")
def cmd_metric(query):
flat = flatten_graph(load_graph())
q = query.lower()
print(f"Metrics matching '{query}':\n")
found = 0
for path, value in flat:
if q in path.lower() or q in value.lower():
print(f" {path}: {value}")
found += 1
if not found:
print(" (no matches)")
def cmd_status(status):
pages = load_all_pages()
graph = load_graph()
print(f"Status: '{status}'\n")
for rel, page in sorted(pages.items()):
if page['status'] == status:
print(f" {page['title']} ({rel})")
if page['tags']:
print(f" tags: {page['tags']}")
if status == 'disproven' and 'disproven' in graph:
print("\n -- Disproven Claims --")
for name, claim in graph['disproven'].items():
print(f" {name}:")
for k, v in claim.items():
print(f" {k}: {v}")
def cmd_test(query):
tests = load_graph().get('tests', {})
q = query.lower()
print(f"Test results for '{query}':\n")
for name, suite in tests.items():
if q in name.lower() or q in str(suite).lower():
print(f" -- {name} --")
if isinstance(suite, dict):
for k, v in suite.items():
if isinstance(v, dict):
print(f" {k}: {v.get('passing', '?')}/{v.get('total', '?')}")
elif k in ('count', 'passing', 'accuracy', 'file', 'date'):
print(f" {k}: {v}")
elif k == 'results' and isinstance(v, list):
for r in v:
mark = '' if r.get('result') == 'pass' else ''
print(f" {mark} {r.get('test', '?')}")
def cmd_search(query):
flat = flatten_graph(load_graph())
pages = load_all_pages()
q = query.lower()
print(f"Search: '{query}'\n")
graph_hits = [(p, v) for p, v in flat if q in v.lower()]
if graph_hits:
print(f" -- Knowledge Graph ({len(graph_hits)} hits) --")
for p, v in graph_hits[:10]:
print(f" {p}: {v[:80]}")
page_hits = sorted(
[(page['content'].lower().count(q), rel, page['title'])
for rel, page in pages.items() if q in page['content'].lower()],
reverse=True)
if page_hits:
print(f"\n -- Wiki Pages ({len(page_hits)} pages) --")
for count, rel, title in page_hits:
print(f" {count:3d}x {title} ({rel})")
def cmd_related(page_name):
pages = load_all_pages()
q = page_name.lower().replace('-', ' ').replace('_', ' ')
print(f"Related to: '{page_name}'\n")
print(" -- Links TO --")
for rel, page in sorted(pages.items()):
for link in page['links']:
if q in link.lower().replace('-', ' '):
print(f" <- {page['title']} ({rel})")
break
print("\n -- Links FROM --")
for rel, page in pages.items():
if q in page['title'].lower().replace('-', ' '):
for link in page['links']:
print(f" -> [[{link}]]")
break
def cmd_timeline():
for entry in load_graph().get('timeline', []):
print(f" [{entry.get('date')}] {entry.get('commit', '?')}: {entry.get('desc', '?')}")
COMMANDS = {
'health': cmd_health, 'entity': cmd_entity, 'metric': cmd_metric,
'status': cmd_status, 'test': cmd_test, 'search': cmd_search,
'related': cmd_related, 'timeline': cmd_timeline,
}
if __name__ == '__main__':
if len(sys.argv) < 2 or sys.argv[1] not in COMMANDS:
print(f"Usage: query.py <{'|'.join(COMMANDS)}> [args]")
sys.exit(1)
cmd = sys.argv[1]
args = sys.argv[2:]
if cmd in ('timeline', 'health'):
COMMANDS[cmd]()
elif args:
COMMANDS[cmd](' '.join(args))
else:
print(f"Usage: query.py {cmd} <query>")