Add document converter, seeder data structure, and project wiki
- ai-service/convert.py: converts Office/PDF files to markdown with frontmatter - database/seeders/data/: folder structure for themas, projects, documents, etc. - database/seeders/data/raw/: drop zone for Office/PDF files to convert - wiki/: project architecture, concepts, and knowledge graph documentation - Remove unused Laravel example tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
249
wiki/tools/query.py
Normal file
249
wiki/tools/query.py
Normal file
@@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Wiki Knowledge Graph query tool.
|
||||
|
||||
Queryable interface over knowledge-graph.yaml + wiki pages.
|
||||
Usable by both humans (CLI) and LLM agents (imported).
|
||||
|
||||
Usage:
|
||||
python wiki/tools/query.py health # project health
|
||||
python wiki/tools/query.py entity "search term" # everything about an entity
|
||||
python wiki/tools/query.py metric "search term" # find metrics
|
||||
python wiki/tools/query.py status "proven" # all pages with status
|
||||
python wiki/tools/query.py test "test name" # test results
|
||||
python wiki/tools/query.py search "keyword" # full-text search
|
||||
python wiki/tools/query.py related "page-name" # pages linking to/from
|
||||
python wiki/tools/query.py timeline # commit timeline
|
||||
"""
|
||||
|
||||
import yaml
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
WIKI_DIR = Path(__file__).parent.parent
|
||||
GRAPH_PATH = WIKI_DIR / 'knowledge-graph.yaml'
|
||||
|
||||
|
||||
def load_graph():
|
||||
if not GRAPH_PATH.exists():
|
||||
return {}
|
||||
with open(GRAPH_PATH) as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
|
||||
|
||||
def load_all_pages():
|
||||
pages = {}
|
||||
for md_path in WIKI_DIR.rglob('*.md'):
|
||||
if 'tools' in str(md_path):
|
||||
continue
|
||||
rel = md_path.relative_to(WIKI_DIR)
|
||||
content = md_path.read_text()
|
||||
meta = {}
|
||||
if content.startswith('---'):
|
||||
parts = content.split('---', 2)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
meta = yaml.safe_load(parts[1]) or {}
|
||||
except yaml.YAMLError:
|
||||
pass
|
||||
content = parts[2]
|
||||
links = re.findall(r'\[\[([^\]]+)\]\]', content)
|
||||
pages[str(rel)] = {
|
||||
'path': str(rel), 'meta': meta, 'content': content,
|
||||
'links': links, 'title': meta.get('title', str(rel)),
|
||||
'status': meta.get('status', 'unknown'),
|
||||
'tags': meta.get('tags', []),
|
||||
}
|
||||
return pages
|
||||
|
||||
|
||||
def flatten_graph(graph, prefix=''):
|
||||
items = []
|
||||
if isinstance(graph, dict):
|
||||
for k, v in graph.items():
|
||||
path = f"{prefix}.{k}" if prefix else k
|
||||
if isinstance(v, (dict, list)):
|
||||
items.extend(flatten_graph(v, path))
|
||||
else:
|
||||
items.append((path, str(v)))
|
||||
elif isinstance(graph, list):
|
||||
for i, v in enumerate(graph):
|
||||
path = f"{prefix}[{i}]"
|
||||
if isinstance(v, (dict, list)):
|
||||
items.extend(flatten_graph(v, path))
|
||||
else:
|
||||
items.append((path, str(v)))
|
||||
return items
|
||||
|
||||
|
||||
def cmd_health():
|
||||
graph = load_graph()
|
||||
pages = load_all_pages()
|
||||
statuses = {}
|
||||
for p in pages.values():
|
||||
s = p['status']
|
||||
statuses[s] = statuses.get(s, 0) + 1
|
||||
|
||||
tests = graph.get('tests', {})
|
||||
total_pass = sum(t.get('passing', 0) for t in tests.values() if isinstance(t, dict))
|
||||
total_count = sum(t.get('count', t.get('total', 0)) for t in tests.values() if isinstance(t, dict))
|
||||
disproven = len(graph.get('disproven', {}))
|
||||
timeline = len(graph.get('timeline', []))
|
||||
|
||||
# Count broken links
|
||||
all_titles = set()
|
||||
for p in pages.values():
|
||||
all_titles.add(p['title'].lower())
|
||||
all_titles.add(p['path'].lower().replace('.md', '').split('/')[-1])
|
||||
broken = sum(1 for p in pages.values() for link in p['links']
|
||||
if not any(link.lower().replace('-', ' ') in t or t in link.lower().replace('-', ' ')
|
||||
for t in all_titles))
|
||||
|
||||
print(f"Wiki Health:\n")
|
||||
print(f" Pages: {len(pages)}")
|
||||
print(f" Statuses: {statuses}")
|
||||
if total_count:
|
||||
print(f" Tests: {total_pass}/{total_count} passing")
|
||||
print(f" Disproven: {disproven} claims tracked")
|
||||
print(f" Timeline: {timeline} commits")
|
||||
print(f" Broken links: {broken}")
|
||||
|
||||
|
||||
def cmd_entity(query):
|
||||
graph = load_graph()
|
||||
pages = load_all_pages()
|
||||
q = query.lower()
|
||||
print(f"Entity: '{query}'\n")
|
||||
|
||||
flat = flatten_graph(graph)
|
||||
hits = [(p, v) for p, v in flat if q in p.lower() or q in v.lower()]
|
||||
if hits:
|
||||
print(" -- Knowledge Graph --")
|
||||
for path, value in hits[:20]:
|
||||
print(f" {path}: {value}")
|
||||
|
||||
print("\n -- Wiki Pages --")
|
||||
for rel, page in sorted(pages.items()):
|
||||
if q in page['content'].lower() or q in page['title'].lower():
|
||||
lines = [l.strip() for l in page['content'].split('\n')
|
||||
if q in l.lower() and l.strip()]
|
||||
print(f" {rel} ({page['status']})")
|
||||
for line in lines[:3]:
|
||||
print(f" {line[:100]}")
|
||||
|
||||
|
||||
def cmd_metric(query):
|
||||
flat = flatten_graph(load_graph())
|
||||
q = query.lower()
|
||||
print(f"Metrics matching '{query}':\n")
|
||||
found = 0
|
||||
for path, value in flat:
|
||||
if q in path.lower() or q in value.lower():
|
||||
print(f" {path}: {value}")
|
||||
found += 1
|
||||
if not found:
|
||||
print(" (no matches)")
|
||||
|
||||
|
||||
def cmd_status(status):
|
||||
pages = load_all_pages()
|
||||
graph = load_graph()
|
||||
print(f"Status: '{status}'\n")
|
||||
for rel, page in sorted(pages.items()):
|
||||
if page['status'] == status:
|
||||
print(f" {page['title']} ({rel})")
|
||||
if page['tags']:
|
||||
print(f" tags: {page['tags']}")
|
||||
if status == 'disproven' and 'disproven' in graph:
|
||||
print("\n -- Disproven Claims --")
|
||||
for name, claim in graph['disproven'].items():
|
||||
print(f" {name}:")
|
||||
for k, v in claim.items():
|
||||
print(f" {k}: {v}")
|
||||
|
||||
|
||||
def cmd_test(query):
|
||||
tests = load_graph().get('tests', {})
|
||||
q = query.lower()
|
||||
print(f"Test results for '{query}':\n")
|
||||
for name, suite in tests.items():
|
||||
if q in name.lower() or q in str(suite).lower():
|
||||
print(f" -- {name} --")
|
||||
if isinstance(suite, dict):
|
||||
for k, v in suite.items():
|
||||
if isinstance(v, dict):
|
||||
print(f" {k}: {v.get('passing', '?')}/{v.get('total', '?')}")
|
||||
elif k in ('count', 'passing', 'accuracy', 'file', 'date'):
|
||||
print(f" {k}: {v}")
|
||||
elif k == 'results' and isinstance(v, list):
|
||||
for r in v:
|
||||
mark = '✓' if r.get('result') == 'pass' else '✗'
|
||||
print(f" {mark} {r.get('test', '?')}")
|
||||
|
||||
|
||||
def cmd_search(query):
|
||||
flat = flatten_graph(load_graph())
|
||||
pages = load_all_pages()
|
||||
q = query.lower()
|
||||
print(f"Search: '{query}'\n")
|
||||
|
||||
graph_hits = [(p, v) for p, v in flat if q in v.lower()]
|
||||
if graph_hits:
|
||||
print(f" -- Knowledge Graph ({len(graph_hits)} hits) --")
|
||||
for p, v in graph_hits[:10]:
|
||||
print(f" {p}: {v[:80]}")
|
||||
|
||||
page_hits = sorted(
|
||||
[(page['content'].lower().count(q), rel, page['title'])
|
||||
for rel, page in pages.items() if q in page['content'].lower()],
|
||||
reverse=True)
|
||||
if page_hits:
|
||||
print(f"\n -- Wiki Pages ({len(page_hits)} pages) --")
|
||||
for count, rel, title in page_hits:
|
||||
print(f" {count:3d}x {title} ({rel})")
|
||||
|
||||
|
||||
def cmd_related(page_name):
|
||||
pages = load_all_pages()
|
||||
q = page_name.lower().replace('-', ' ').replace('_', ' ')
|
||||
print(f"Related to: '{page_name}'\n")
|
||||
|
||||
print(" -- Links TO --")
|
||||
for rel, page in sorted(pages.items()):
|
||||
for link in page['links']:
|
||||
if q in link.lower().replace('-', ' '):
|
||||
print(f" <- {page['title']} ({rel})")
|
||||
break
|
||||
|
||||
print("\n -- Links FROM --")
|
||||
for rel, page in pages.items():
|
||||
if q in page['title'].lower().replace('-', ' '):
|
||||
for link in page['links']:
|
||||
print(f" -> [[{link}]]")
|
||||
break
|
||||
|
||||
|
||||
def cmd_timeline():
|
||||
for entry in load_graph().get('timeline', []):
|
||||
print(f" [{entry.get('date')}] {entry.get('commit', '?')}: {entry.get('desc', '?')}")
|
||||
|
||||
|
||||
COMMANDS = {
|
||||
'health': cmd_health, 'entity': cmd_entity, 'metric': cmd_metric,
|
||||
'status': cmd_status, 'test': cmd_test, 'search': cmd_search,
|
||||
'related': cmd_related, 'timeline': cmd_timeline,
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2 or sys.argv[1] not in COMMANDS:
|
||||
print(f"Usage: query.py <{'|'.join(COMMANDS)}> [args]")
|
||||
sys.exit(1)
|
||||
cmd = sys.argv[1]
|
||||
args = sys.argv[2:]
|
||||
if cmd in ('timeline', 'health'):
|
||||
COMMANDS[cmd]()
|
||||
elif args:
|
||||
COMMANDS[cmd](' '.join(args))
|
||||
else:
|
||||
print(f"Usage: query.py {cmd} <query>")
|
||||
Reference in New Issue
Block a user