#!/usr/bin/env python3 """Wiki Knowledge Graph query tool. Queryable interface over knowledge-graph.yaml + wiki pages. Usable by both humans (CLI) and LLM agents (imported). Usage: python wiki/tools/query.py health # project health python wiki/tools/query.py entity "search term" # everything about an entity python wiki/tools/query.py metric "search term" # find metrics python wiki/tools/query.py status "proven" # all pages with status python wiki/tools/query.py test "test name" # test results python wiki/tools/query.py search "keyword" # full-text search python wiki/tools/query.py related "page-name" # pages linking to/from python wiki/tools/query.py timeline # commit timeline """ import yaml import os import sys import re from pathlib import Path WIKI_DIR = Path(__file__).parent.parent GRAPH_PATH = WIKI_DIR / 'knowledge-graph.yaml' def load_graph(): if not GRAPH_PATH.exists(): return {} with open(GRAPH_PATH) as f: return yaml.safe_load(f) or {} def load_all_pages(): pages = {} for md_path in WIKI_DIR.rglob('*.md'): if 'tools' in str(md_path): continue rel = md_path.relative_to(WIKI_DIR) content = md_path.read_text() meta = {} if content.startswith('---'): parts = content.split('---', 2) if len(parts) >= 3: try: meta = yaml.safe_load(parts[1]) or {} except yaml.YAMLError: pass content = parts[2] links = re.findall(r'\[\[([^\]]+)\]\]', content) pages[str(rel)] = { 'path': str(rel), 'meta': meta, 'content': content, 'links': links, 'title': meta.get('title', str(rel)), 'status': meta.get('status', 'unknown'), 'tags': meta.get('tags', []), } return pages def flatten_graph(graph, prefix=''): items = [] if isinstance(graph, dict): for k, v in graph.items(): path = f"{prefix}.{k}" if prefix else k if isinstance(v, (dict, list)): items.extend(flatten_graph(v, path)) else: items.append((path, str(v))) elif isinstance(graph, list): for i, v in enumerate(graph): path = f"{prefix}[{i}]" if isinstance(v, (dict, list)): items.extend(flatten_graph(v, path)) else: items.append((path, str(v))) return items def cmd_health(): graph = load_graph() pages = load_all_pages() statuses = {} for p in pages.values(): s = p['status'] statuses[s] = statuses.get(s, 0) + 1 tests = graph.get('tests', {}) total_pass = sum(t.get('passing', 0) for t in tests.values() if isinstance(t, dict)) total_count = sum(t.get('count', t.get('total', 0)) for t in tests.values() if isinstance(t, dict)) disproven = len(graph.get('disproven', {})) timeline = len(graph.get('timeline', [])) # Count broken links all_titles = set() for p in pages.values(): all_titles.add(p['title'].lower()) all_titles.add(p['path'].lower().replace('.md', '').split('/')[-1]) broken = sum(1 for p in pages.values() for link in p['links'] if not any(link.lower().replace('-', ' ') in t or t in link.lower().replace('-', ' ') for t in all_titles)) print(f"Wiki Health:\n") print(f" Pages: {len(pages)}") print(f" Statuses: {statuses}") if total_count: print(f" Tests: {total_pass}/{total_count} passing") print(f" Disproven: {disproven} claims tracked") print(f" Timeline: {timeline} commits") print(f" Broken links: {broken}") def cmd_entity(query): graph = load_graph() pages = load_all_pages() q = query.lower() print(f"Entity: '{query}'\n") flat = flatten_graph(graph) hits = [(p, v) for p, v in flat if q in p.lower() or q in v.lower()] if hits: print(" -- Knowledge Graph --") for path, value in hits[:20]: print(f" {path}: {value}") print("\n -- Wiki Pages --") for rel, page in sorted(pages.items()): if q in page['content'].lower() or q in page['title'].lower(): lines = [l.strip() for l in page['content'].split('\n') if q in l.lower() and l.strip()] print(f" {rel} ({page['status']})") for line in lines[:3]: print(f" {line[:100]}") def cmd_metric(query): flat = flatten_graph(load_graph()) q = query.lower() print(f"Metrics matching '{query}':\n") found = 0 for path, value in flat: if q in path.lower() or q in value.lower(): print(f" {path}: {value}") found += 1 if not found: print(" (no matches)") def cmd_status(status): pages = load_all_pages() graph = load_graph() print(f"Status: '{status}'\n") for rel, page in sorted(pages.items()): if page['status'] == status: print(f" {page['title']} ({rel})") if page['tags']: print(f" tags: {page['tags']}") if status == 'disproven' and 'disproven' in graph: print("\n -- Disproven Claims --") for name, claim in graph['disproven'].items(): print(f" {name}:") for k, v in claim.items(): print(f" {k}: {v}") def cmd_test(query): tests = load_graph().get('tests', {}) q = query.lower() print(f"Test results for '{query}':\n") for name, suite in tests.items(): if q in name.lower() or q in str(suite).lower(): print(f" -- {name} --") if isinstance(suite, dict): for k, v in suite.items(): if isinstance(v, dict): print(f" {k}: {v.get('passing', '?')}/{v.get('total', '?')}") elif k in ('count', 'passing', 'accuracy', 'file', 'date'): print(f" {k}: {v}") elif k == 'results' and isinstance(v, list): for r in v: mark = '✓' if r.get('result') == 'pass' else '✗' print(f" {mark} {r.get('test', '?')}") def cmd_search(query): flat = flatten_graph(load_graph()) pages = load_all_pages() q = query.lower() print(f"Search: '{query}'\n") graph_hits = [(p, v) for p, v in flat if q in v.lower()] if graph_hits: print(f" -- Knowledge Graph ({len(graph_hits)} hits) --") for p, v in graph_hits[:10]: print(f" {p}: {v[:80]}") page_hits = sorted( [(page['content'].lower().count(q), rel, page['title']) for rel, page in pages.items() if q in page['content'].lower()], reverse=True) if page_hits: print(f"\n -- Wiki Pages ({len(page_hits)} pages) --") for count, rel, title in page_hits: print(f" {count:3d}x {title} ({rel})") def cmd_related(page_name): pages = load_all_pages() q = page_name.lower().replace('-', ' ').replace('_', ' ') print(f"Related to: '{page_name}'\n") print(" -- Links TO --") for rel, page in sorted(pages.items()): for link in page['links']: if q in link.lower().replace('-', ' '): print(f" <- {page['title']} ({rel})") break print("\n -- Links FROM --") for rel, page in pages.items(): if q in page['title'].lower().replace('-', ' '): for link in page['links']: print(f" -> [[{link}]]") break def cmd_timeline(): for entry in load_graph().get('timeline', []): print(f" [{entry.get('date')}] {entry.get('commit', '?')}: {entry.get('desc', '?')}") COMMANDS = { 'health': cmd_health, 'entity': cmd_entity, 'metric': cmd_metric, 'status': cmd_status, 'test': cmd_test, 'search': cmd_search, 'related': cmd_related, 'timeline': cmd_timeline, } if __name__ == '__main__': if len(sys.argv) < 2 or sys.argv[1] not in COMMANDS: print(f"Usage: query.py <{'|'.join(COMMANDS)}> [args]") sys.exit(1) cmd = sys.argv[1] args = sys.argv[2:] if cmd in ('timeline', 'health'): COMMANDS[cmd]() elif args: COMMANDS[cmd](' '.join(args)) else: print(f"Usage: query.py {cmd} ")