- ai-service/convert.py: converts Office/PDF files to markdown with frontmatter - database/seeders/data/: folder structure for themas, projects, documents, etc. - database/seeders/data/raw/: drop zone for Office/PDF files to convert - wiki/: project architecture, concepts, and knowledge graph documentation - Remove unused Laravel example tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
250 lines
8.3 KiB
Python
250 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Wiki Knowledge Graph query tool.
|
|
|
|
Queryable interface over knowledge-graph.yaml + wiki pages.
|
|
Usable by both humans (CLI) and LLM agents (imported).
|
|
|
|
Usage:
|
|
python wiki/tools/query.py health # project health
|
|
python wiki/tools/query.py entity "search term" # everything about an entity
|
|
python wiki/tools/query.py metric "search term" # find metrics
|
|
python wiki/tools/query.py status "proven" # all pages with status
|
|
python wiki/tools/query.py test "test name" # test results
|
|
python wiki/tools/query.py search "keyword" # full-text search
|
|
python wiki/tools/query.py related "page-name" # pages linking to/from
|
|
python wiki/tools/query.py timeline # commit timeline
|
|
"""
|
|
|
|
import yaml
|
|
import os
|
|
import sys
|
|
import re
|
|
from pathlib import Path
|
|
|
|
WIKI_DIR = Path(__file__).parent.parent
|
|
GRAPH_PATH = WIKI_DIR / 'knowledge-graph.yaml'
|
|
|
|
|
|
def load_graph():
|
|
if not GRAPH_PATH.exists():
|
|
return {}
|
|
with open(GRAPH_PATH) as f:
|
|
return yaml.safe_load(f) or {}
|
|
|
|
|
|
def load_all_pages():
|
|
pages = {}
|
|
for md_path in WIKI_DIR.rglob('*.md'):
|
|
if 'tools' in str(md_path):
|
|
continue
|
|
rel = md_path.relative_to(WIKI_DIR)
|
|
content = md_path.read_text()
|
|
meta = {}
|
|
if content.startswith('---'):
|
|
parts = content.split('---', 2)
|
|
if len(parts) >= 3:
|
|
try:
|
|
meta = yaml.safe_load(parts[1]) or {}
|
|
except yaml.YAMLError:
|
|
pass
|
|
content = parts[2]
|
|
links = re.findall(r'\[\[([^\]]+)\]\]', content)
|
|
pages[str(rel)] = {
|
|
'path': str(rel), 'meta': meta, 'content': content,
|
|
'links': links, 'title': meta.get('title', str(rel)),
|
|
'status': meta.get('status', 'unknown'),
|
|
'tags': meta.get('tags', []),
|
|
}
|
|
return pages
|
|
|
|
|
|
def flatten_graph(graph, prefix=''):
|
|
items = []
|
|
if isinstance(graph, dict):
|
|
for k, v in graph.items():
|
|
path = f"{prefix}.{k}" if prefix else k
|
|
if isinstance(v, (dict, list)):
|
|
items.extend(flatten_graph(v, path))
|
|
else:
|
|
items.append((path, str(v)))
|
|
elif isinstance(graph, list):
|
|
for i, v in enumerate(graph):
|
|
path = f"{prefix}[{i}]"
|
|
if isinstance(v, (dict, list)):
|
|
items.extend(flatten_graph(v, path))
|
|
else:
|
|
items.append((path, str(v)))
|
|
return items
|
|
|
|
|
|
def cmd_health():
|
|
graph = load_graph()
|
|
pages = load_all_pages()
|
|
statuses = {}
|
|
for p in pages.values():
|
|
s = p['status']
|
|
statuses[s] = statuses.get(s, 0) + 1
|
|
|
|
tests = graph.get('tests', {})
|
|
total_pass = sum(t.get('passing', 0) for t in tests.values() if isinstance(t, dict))
|
|
total_count = sum(t.get('count', t.get('total', 0)) for t in tests.values() if isinstance(t, dict))
|
|
disproven = len(graph.get('disproven', {}))
|
|
timeline = len(graph.get('timeline', []))
|
|
|
|
# Count broken links
|
|
all_titles = set()
|
|
for p in pages.values():
|
|
all_titles.add(p['title'].lower())
|
|
all_titles.add(p['path'].lower().replace('.md', '').split('/')[-1])
|
|
broken = sum(1 for p in pages.values() for link in p['links']
|
|
if not any(link.lower().replace('-', ' ') in t or t in link.lower().replace('-', ' ')
|
|
for t in all_titles))
|
|
|
|
print(f"Wiki Health:\n")
|
|
print(f" Pages: {len(pages)}")
|
|
print(f" Statuses: {statuses}")
|
|
if total_count:
|
|
print(f" Tests: {total_pass}/{total_count} passing")
|
|
print(f" Disproven: {disproven} claims tracked")
|
|
print(f" Timeline: {timeline} commits")
|
|
print(f" Broken links: {broken}")
|
|
|
|
|
|
def cmd_entity(query):
|
|
graph = load_graph()
|
|
pages = load_all_pages()
|
|
q = query.lower()
|
|
print(f"Entity: '{query}'\n")
|
|
|
|
flat = flatten_graph(graph)
|
|
hits = [(p, v) for p, v in flat if q in p.lower() or q in v.lower()]
|
|
if hits:
|
|
print(" -- Knowledge Graph --")
|
|
for path, value in hits[:20]:
|
|
print(f" {path}: {value}")
|
|
|
|
print("\n -- Wiki Pages --")
|
|
for rel, page in sorted(pages.items()):
|
|
if q in page['content'].lower() or q in page['title'].lower():
|
|
lines = [l.strip() for l in page['content'].split('\n')
|
|
if q in l.lower() and l.strip()]
|
|
print(f" {rel} ({page['status']})")
|
|
for line in lines[:3]:
|
|
print(f" {line[:100]}")
|
|
|
|
|
|
def cmd_metric(query):
|
|
flat = flatten_graph(load_graph())
|
|
q = query.lower()
|
|
print(f"Metrics matching '{query}':\n")
|
|
found = 0
|
|
for path, value in flat:
|
|
if q in path.lower() or q in value.lower():
|
|
print(f" {path}: {value}")
|
|
found += 1
|
|
if not found:
|
|
print(" (no matches)")
|
|
|
|
|
|
def cmd_status(status):
|
|
pages = load_all_pages()
|
|
graph = load_graph()
|
|
print(f"Status: '{status}'\n")
|
|
for rel, page in sorted(pages.items()):
|
|
if page['status'] == status:
|
|
print(f" {page['title']} ({rel})")
|
|
if page['tags']:
|
|
print(f" tags: {page['tags']}")
|
|
if status == 'disproven' and 'disproven' in graph:
|
|
print("\n -- Disproven Claims --")
|
|
for name, claim in graph['disproven'].items():
|
|
print(f" {name}:")
|
|
for k, v in claim.items():
|
|
print(f" {k}: {v}")
|
|
|
|
|
|
def cmd_test(query):
|
|
tests = load_graph().get('tests', {})
|
|
q = query.lower()
|
|
print(f"Test results for '{query}':\n")
|
|
for name, suite in tests.items():
|
|
if q in name.lower() or q in str(suite).lower():
|
|
print(f" -- {name} --")
|
|
if isinstance(suite, dict):
|
|
for k, v in suite.items():
|
|
if isinstance(v, dict):
|
|
print(f" {k}: {v.get('passing', '?')}/{v.get('total', '?')}")
|
|
elif k in ('count', 'passing', 'accuracy', 'file', 'date'):
|
|
print(f" {k}: {v}")
|
|
elif k == 'results' and isinstance(v, list):
|
|
for r in v:
|
|
mark = '✓' if r.get('result') == 'pass' else '✗'
|
|
print(f" {mark} {r.get('test', '?')}")
|
|
|
|
|
|
def cmd_search(query):
|
|
flat = flatten_graph(load_graph())
|
|
pages = load_all_pages()
|
|
q = query.lower()
|
|
print(f"Search: '{query}'\n")
|
|
|
|
graph_hits = [(p, v) for p, v in flat if q in v.lower()]
|
|
if graph_hits:
|
|
print(f" -- Knowledge Graph ({len(graph_hits)} hits) --")
|
|
for p, v in graph_hits[:10]:
|
|
print(f" {p}: {v[:80]}")
|
|
|
|
page_hits = sorted(
|
|
[(page['content'].lower().count(q), rel, page['title'])
|
|
for rel, page in pages.items() if q in page['content'].lower()],
|
|
reverse=True)
|
|
if page_hits:
|
|
print(f"\n -- Wiki Pages ({len(page_hits)} pages) --")
|
|
for count, rel, title in page_hits:
|
|
print(f" {count:3d}x {title} ({rel})")
|
|
|
|
|
|
def cmd_related(page_name):
|
|
pages = load_all_pages()
|
|
q = page_name.lower().replace('-', ' ').replace('_', ' ')
|
|
print(f"Related to: '{page_name}'\n")
|
|
|
|
print(" -- Links TO --")
|
|
for rel, page in sorted(pages.items()):
|
|
for link in page['links']:
|
|
if q in link.lower().replace('-', ' '):
|
|
print(f" <- {page['title']} ({rel})")
|
|
break
|
|
|
|
print("\n -- Links FROM --")
|
|
for rel, page in pages.items():
|
|
if q in page['title'].lower().replace('-', ' '):
|
|
for link in page['links']:
|
|
print(f" -> [[{link}]]")
|
|
break
|
|
|
|
|
|
def cmd_timeline():
|
|
for entry in load_graph().get('timeline', []):
|
|
print(f" [{entry.get('date')}] {entry.get('commit', '?')}: {entry.get('desc', '?')}")
|
|
|
|
|
|
COMMANDS = {
|
|
'health': cmd_health, 'entity': cmd_entity, 'metric': cmd_metric,
|
|
'status': cmd_status, 'test': cmd_test, 'search': cmd_search,
|
|
'related': cmd_related, 'timeline': cmd_timeline,
|
|
}
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) < 2 or sys.argv[1] not in COMMANDS:
|
|
print(f"Usage: query.py <{'|'.join(COMMANDS)}> [args]")
|
|
sys.exit(1)
|
|
cmd = sys.argv[1]
|
|
args = sys.argv[2:]
|
|
if cmd in ('timeline', 'health'):
|
|
COMMANDS[cmd]()
|
|
elif args:
|
|
COMMANDS[cmd](' '.join(args))
|
|
else:
|
|
print(f"Usage: query.py {cmd} <query>")
|