From 75458713be2ee1e7451b2023ba790a6b7f95003b Mon Sep 17 00:00:00 2001 From: znetsixe Date: Mon, 23 Mar 2026 11:23:24 +0100 Subject: [PATCH] Add architecture review and wiki draft --- ...ayering-resilience-and-config-authority.md | 36 ++ ...CISION-20260323-compose-secrets-via-env.md | 36 ++ .agents/improvements/IMPROVEMENTS_BACKLOG.md | 3 + architecture/stack-architecture-review.md | 554 ++++++++++++++++++ architecture/wiki-platform-overview.md | 150 +++++ temp/cloud.env.example | 24 + temp/cloud.yml | 117 ++++ 7 files changed, 920 insertions(+) create mode 100644 .agents/decisions/DECISION-20260323-architecture-layering-resilience-and-config-authority.md create mode 100644 .agents/decisions/DECISION-20260323-compose-secrets-via-env.md create mode 100644 architecture/stack-architecture-review.md create mode 100644 architecture/wiki-platform-overview.md create mode 100644 temp/cloud.env.example create mode 100644 temp/cloud.yml diff --git a/.agents/decisions/DECISION-20260323-architecture-layering-resilience-and-config-authority.md b/.agents/decisions/DECISION-20260323-architecture-layering-resilience-and-config-authority.md new file mode 100644 index 0000000..8e64b89 --- /dev/null +++ b/.agents/decisions/DECISION-20260323-architecture-layering-resilience-and-config-authority.md @@ -0,0 +1,36 @@ +# DECISION-20260323-architecture-layering-resilience-and-config-authority + +## Context +- Task/request: refine the EVOLV architecture baseline using the current stack drawings and owner guidance. +- Impacted files/contracts: architecture documentation, future wiki structure, telemetry/storage strategy, security boundaries, and configuration authority assumptions. +- Why a decision is required now: the architecture can no longer stay at a generic "Node-RED plus cloud" level; several operating principles were clarified by the owner and need to be treated as architectural defaults. + +## Options +1. Keep the architecture intentionally broad and tool-centric +- Benefits: fewer early commitments. +- Risks: blurred boundaries for resilience, data ownership, and security; easier to drift into contradictory implementations. +- Rollout notes: wiki remains descriptive but not decision-shaping. + +2. Adopt explicit defaults for resilience, API boundary, telemetry layering, and configuration authority +- Benefits: clearer target operating model; easier to design stack services and wiki pages consistently; aligns diagrams with intended operational behavior. +- Risks: some assumptions may outpace current implementation and therefore create an architecture debt backlog. +- Rollout notes: document gaps clearly and treat incomplete systems as planned workstreams rather than pretending they already exist. + +## Decision +- Selected option: Option 2. +- Decision owner: repository owner confirmed during architecture review. +- Date: 2026-03-23. +- Rationale: the owner clarified concrete architecture goals that materially affect security, resilience, and platform structure. The documentation should encode those as defaults instead of leaving them implicit. + +## Consequences +- Compatibility impact: low immediate code impact, but future implementations should align to these defaults. +- Safety/security impact: improved boundary clarity by making central the integration entry point and keeping edge protected behind site/central mediation. +- Data/operations impact: multi-level InfluxDB and smart-storage behavior become first-class design concerns; `tagcodering` becomes the intended configuration backbone. + +## Implementation Notes +- Required code/doc updates: update the architecture review doc, add visual wiki-ready diagrams, and track follow-up work for incomplete `tagcodering` integration and telemetry policy design. +- Validation evidence required: architecture docs reflect the agreed principles and diagrams; no contradiction with current repo evidence for implemented components. + +## Rollback / Migration +- Rollback strategy: return to a generic descriptive architecture document without explicit defaults. +- Migration/deprecation plan: implement these principles incrementally, starting with configuration authority, telemetry policy, and site/central API boundaries. diff --git a/.agents/decisions/DECISION-20260323-compose-secrets-via-env.md b/.agents/decisions/DECISION-20260323-compose-secrets-via-env.md new file mode 100644 index 0000000..d3dcf13 --- /dev/null +++ b/.agents/decisions/DECISION-20260323-compose-secrets-via-env.md @@ -0,0 +1,36 @@ +# DECISION-20260323-compose-secrets-via-env + +## Context +- Task/request: harden the target-state stack example so credentials are not stored directly in `temp/cloud.yml`. +- Impacted files/contracts: `temp/cloud.yml`, deployment/operations practice for target-state infrastructure examples. +- Why a decision is required now: the repository contained inline credentials in a tracked compose file, which conflicts with the intended security posture and creates avoidable secret-leak risk. + +## Options +1. Keep credentials inline in the compose file +- Benefits: simplest to run as a standalone example. +- Risks: secrets leak into git history, reviews, copies, and local machines; encourages unsafe operational practice. +- Rollout notes: none, but the risk remains permanent once committed. + +2. Move credentials to server-side environment variables and keep only placeholders in compose +- Benefits: aligns the manifest with a safer deployment pattern; keeps tracked config portable across environments; supports secret rotation without editing the compose file. +- Risks: operators must manage `.env` or equivalent secret injection correctly. +- Rollout notes: provide an example env file and document that the real `.env` stays on the server and out of version control. + +## Decision +- Selected option: Option 2. +- Decision owner: repository owner confirmed during task discussion. +- Date: 2026-03-23. +- Rationale: the target architecture should model the right operational pattern. Inline secrets in repository-tracked compose files are not acceptable for EVOLV's intended OT/IT deployment posture. + +## Consequences +- Compatibility impact: low; operators now need to supply environment variables when deploying `temp/cloud.yml`. +- Safety/security impact: improved secret hygiene and lower credential exposure risk. +- Data/operations impact: deployment requires an accompanying `.env` on the server or explicit `--env-file` usage. + +## Implementation Notes +- Required code/doc updates: replace inline secrets in `temp/cloud.yml`; add `temp/cloud.env.example`; keep the real `.env` untracked on the server. +- Validation evidence required: inspect compose file for `${...}` placeholders and verify no real credentials remain in tracked files touched by this change. + +## Rollback / Migration +- Rollback strategy: reintroduce inline values, though this is not recommended. +- Migration/deprecation plan: create a server-local `.env` from `temp/cloud.env.example`, fill in real values, and run compose from that environment. diff --git a/.agents/improvements/IMPROVEMENTS_BACKLOG.md b/.agents/improvements/IMPROVEMENTS_BACKLOG.md index 85bf187..9cfd7f0 100644 --- a/.agents/improvements/IMPROVEMENTS_BACKLOG.md +++ b/.agents/improvements/IMPROVEMENTS_BACKLOG.md @@ -22,3 +22,6 @@ Lifecycle: | IMP-20260219-022 | 2026-02-19 | generalFunctions/outliers | `DynamicClusterDeviation.update()` emits verbose `console.log` traces on each call with no log-level guard, unsafe for production telemetry volume. | `nodes/generalFunctions/src/outliers/outlierDetection.js:7` | open | | IMP-20260224-006 | 2026-02-24 | rotatingMachine prediction fallback | When only one pressure side is available, predictor uses absolute pressure as surrogate differential, which can materially bias flow prediction under varying suction/discharge conditions. | `nodes/rotatingMachine/src/specificClass.js:573`, `nodes/rotatingMachine/src/specificClass.js:588` | open | | IMP-20260224-012 | 2026-02-24 | cross-node unit architecture | Canonical unit-anchor strategy is implemented in rotatingMachine plus phase-1 controllers (`machineGroupControl`, `pumpingStation`, `valve`, `valveGroupControl`); continue rollout to remaining nodes so all runtime paths use canonical storage + explicit ingress/egress units. | `nodes/machineGroupControl/src/specificClass.js:42`, `nodes/pumpingStation/src/specificClass.js:48`, `nodes/valve/src/specificClass.js:87`, `nodes/valveGroupControl/src/specificClass.js:78` | open | +| IMP-20260323-001 | 2026-03-23 | architecture/security | `temp/cloud.yml` stores environment credentials directly in a repository-tracked target-state stack example; replace with env placeholders/secret injection and split illustrative architecture from deployable manifests. | `temp/cloud.yml:1` | open | +| IMP-20260323-002 | 2026-03-23 | architecture/configuration | Intended database-backed configuration authority (`tagcodering`) is not yet visibly integrated as the primary runtime config backbone in this repository; define access pattern, schema ownership, and rollout path for edge/site/central consumers. | `architecture/stack-architecture-review.md:1` | open | +| IMP-20260323-003 | 2026-03-23 | architecture/telemetry | Multi-level smart-storage strategy is a stated architecture goal, but signal classes, reconstruction guarantees, and authoritative-layer rules are not yet formalized; define telemetry policy before broad deployment. | `architecture/stack-architecture-review.md:1` | open | diff --git a/architecture/stack-architecture-review.md b/architecture/stack-architecture-review.md new file mode 100644 index 0000000..61de927 --- /dev/null +++ b/architecture/stack-architecture-review.md @@ -0,0 +1,554 @@ +# EVOLV Architecture Review + +## Purpose + +This document captures: + +- the architecture implemented in this repository today +- the broader edge/site/central architecture shown in the drawings under `temp/` +- the key strengths and weaknesses of that direction +- the currently preferred target stack based on owner decisions from this review + +It is the local staging document for a later wiki update. + +## Evidence Used + +Implemented stack evidence: + +- `docker-compose.yml` +- `docker/settings.js` +- `docker/grafana/provisioning/datasources/influxdb.yaml` +- `package.json` +- `nodes/*` + +Target-state evidence: + +- `temp/fullStack.pdf` +- `temp/edge.pdf` +- `temp/CoreSync.drawio.pdf` +- `temp/cloud.yml` + +Owner decisions from this review: + +- local InfluxDB is required for operational resilience +- central acts as the advisory/intelligence and API-entry layer, not as a direct field caller +- intended configuration authority is the database-backed `tagcodering` model +- architecture wiki pages should be visual, not text-only + +## 1. What Exists Today + +### 1.1 Product/runtime layer + +The codebase is currently a modular Node-RED package for wastewater/process automation: + +- EVOLV ships custom Node-RED nodes for plant assets and process logic +- nodes emit both process/control messages and telemetry-oriented outputs +- shared helper logic lives in `nodes/generalFunctions/` +- Grafana-facing integration exists through `dashboardAPI` and Influx-oriented outputs + +### 1.2 Implemented development stack + +The concrete development stack in this repository is: + +- Node-RED +- InfluxDB 2.x +- Grafana + +That gives a clear local flow: + +1. EVOLV logic runs in Node-RED. +2. Telemetry is emitted in a time-series-oriented shape. +3. InfluxDB stores the telemetry. +4. Grafana renders operational dashboards. + +### 1.3 Existing runtime pattern in the nodes + +A recurring EVOLV pattern is: + +- output 0: process/control message +- output 1: Influx/telemetry message +- output 2: registration/control plumbing where relevant + +So even in its current implemented form, EVOLV is not only a Node-RED project. It is already a control-plus-observability platform, with Node-RED as orchestration/runtime and InfluxDB/Grafana as telemetry and visualization services. + +## 2. What The Drawings Describe + +Across `temp/fullStack.pdf` and `temp/CoreSync.drawio.pdf`, the intended platform is broader and layered. + +### 2.1 Edge / OT layer + +The drawings consistently place these capabilities at the edge: + +- PLC / OPC UA connectivity +- Node-RED container as protocol translator and logic runtime +- local broker in some variants +- local InfluxDB / Prometheus style storage in some variants +- local Grafana/SCADA in some variants + +This is the plant-side operational layer. + +### 2.2 Site / local server layer + +The CoreSync drawings also show a site aggregation layer: + +- RWZI-local server +- Node-RED / CoreSync services +- site-local broker +- site-local database +- upward API-based synchronization + +This layer decouples field assets from central services and absorbs plant-specific complexity. + +### 2.3 Central / cloud layer + +The broader stack drawings and `temp/cloud.yml` show a central platform layer with: + +- Gitea +- Jenkins +- reverse proxy / ingress +- Grafana +- InfluxDB +- Node-RED +- RabbitMQ / messaging +- VPN / tunnel concepts +- Keycloak in the drawing +- Portainer in the drawing + +This is a platform-services layer, not just an application runtime. + +## 3. Architecture Decisions From This Review + +These decisions now shape the preferred EVOLV target architecture. + +### 3.1 Local telemetry is mandatory for resilience + +Local InfluxDB is not optional. It is required so that: + +- operations continue when central SCADA or central services are down +- local dashboards and advanced digital-twin workflows can still consume recent and relevant process history +- local edge/site layers can make smarter decisions without depending on round-trips to central + +### 3.2 Multi-level InfluxDB is part of the architecture + +InfluxDB should exist on multiple levels where it adds operational value: + +- edge/local for resilience and near-real-time replay +- site for plant-level history, diagnostics, and resilience +- central for fleet-wide analytics, benchmarking, and advisory intelligence + +This is not just copy-paste storage at each level. The design intent is event-driven and selective. + +### 3.3 Storage should be smart, not only deadband-driven + +The target is not simple "store every point" or only a fixed deadband rule such as 1%. + +The desired storage approach is: + +- observe signal slope and change behavior +- preserve points where state is changing materially +- store fewer points where the signal can be reconstructed downstream with sufficient fidelity +- carry enough metadata or conventions so reconstruction quality is auditable + +This implies EVOLV should evolve toward smart storage and signal-aware retention rather than naive event dumping. + +### 3.4 Central is the intelligence and API-entry layer + +Central may advise and coordinate edge/site layers, but external API requests should not hit field-edge systems directly. + +The intended pattern is: + +- external and enterprise integrations terminate centrally +- central evaluates, aggregates, authorizes, and advises +- site/edge layers receive mediated requests, policies, or setpoints +- field-edge remains protected behind an intermediate layer + +This aligns with the stated security direction. + +### 3.5 Configuration source of truth should be database-backed + +The intended configuration authority is the database-backed `tagcodering` model, which already exists but is not yet complete enough to serve as the fully realized source of truth. + +That means the architecture should assume: + +- asset and machine metadata belong in `tagcodering` +- Node-RED flows should consume configuration rather than silently becoming the only configuration store +- more work is still needed before this behaves as the intended central configuration backbone + +## 4. Visual Model + +### 4.1 Platform topology + +```mermaid +flowchart LR + subgraph OT["OT / Field"] + PLC["PLC / IO"] + DEV["Sensors / Machines"] + end + + subgraph EDGE["Edge Layer"] + ENR["Edge Node-RED"] + EDB["Local InfluxDB"] + EUI["Local Grafana / Local Monitoring"] + EBR["Optional Local Broker"] + end + + subgraph SITE["Site Layer"] + SNR["Site Node-RED / CoreSync"] + SDB["Site InfluxDB"] + SUI["Site Grafana / SCADA Support"] + SBR["Site Broker"] + end + + subgraph CENTRAL["Central Layer"] + API["API / Integration Gateway"] + INTEL["Overview Intelligence / Advisory Logic"] + CDB["Central InfluxDB"] + CGR["Central Grafana"] + CFG["Tagcodering Config Model"] + GIT["Gitea"] + CI["CI/CD"] + IAM["IAM / Keycloak"] + end + + DEV --> PLC + PLC --> ENR + ENR --> EDB + ENR --> EUI + ENR --> EBR + ENR <--> SNR + EDB <--> SDB + SNR --> SDB + SNR --> SUI + SNR --> SBR + SNR <--> API + API --> INTEL + API <--> CFG + SDB <--> CDB + INTEL --> SNR + CGR --> CDB + CI --> GIT + IAM --> API + IAM --> CGR +``` + +### 4.2 Command and access boundary + +```mermaid +flowchart TD + EXT["External APIs / Enterprise Requests"] --> API["Central API Gateway"] + API --> AUTH["AuthN/AuthZ / Policy Checks"] + AUTH --> INTEL["Central Advisory / Decision Support"] + INTEL --> SITE["Site Integration Layer"] + SITE --> EDGE["Edge Runtime"] + EDGE --> PLC["PLC / Field Assets"] + + EXT -. no direct access .-> EDGE + EXT -. no direct access .-> PLC +``` + +### 4.3 Smart telemetry flow + +```mermaid +flowchart LR + RAW["Raw Signal"] --> EDGELOGIC["Edge Signal Evaluation"] + EDGELOGIC --> KEEP["Keep Critical Change Points"] + EDGELOGIC --> SKIP["Skip Reconstructable Flat Points"] + EDGELOGIC --> LOCAL["Local InfluxDB"] + LOCAL --> SITE["Site InfluxDB"] + SITE --> CENTRAL["Central InfluxDB"] + KEEP --> LOCAL + SKIP -. reconstruction assumptions / metadata .-> SITE + CENTRAL --> DASH["Fleet Dashboards / Analytics"] +``` + +## 5. Upsides Of This Direction + +### 5.1 Strong separation between control and observability + +Node-RED for runtime/orchestration and InfluxDB/Grafana for telemetry is still the right structural split: + +- control stays close to the process +- telemetry storage/querying stays in time-series-native tooling +- dashboards do not need to overload Node-RED itself + +### 5.2 Edge-first matches operational reality + +For wastewater/process systems, edge-first remains correct: + +- lower latency +- better degraded-mode behavior +- less dependence on WAN or central platform uptime +- clearer OT trust boundary + +### 5.3 Site mediation improves safety and security + +Using central as the enterprise/API entry point and site as the mediator improves posture: + +- field systems are less exposed +- policy decisions can be centralized +- external integrations do not probe the edge directly +- site can continue operating even when upstream is degraded + +### 5.4 Multi-level storage enables better analytics + +Multiple Influx layers can support: + +- local resilience +- site diagnostics +- fleet benchmarking +- smarter retention and reconstruction strategies + +That is substantially more capable than a single central historian model. + +### 5.5 `tagcodering` is the right long-term direction + +A database-backed configuration authority is stronger than embedding configuration only in flows because it supports: + +- machine metadata management +- controlled rollout of configuration changes +- clearer versioning and provenance +- future API-driven configuration services + +## 6. Downsides And Risks + +### 6.1 Smart storage raises algorithmic and governance complexity + +Signal-aware storage and reconstruction is promising, but it creates architectural obligations: + +- reconstruction rules must be explicit +- acceptable reconstruction error must be defined per signal type +- operators must know whether they see raw or reconstructed history +- compliance-relevant data may need stricter retention than operational convenience data + +Without those rules, smart storage can become opaque and hard to trust. + +### 6.2 Multi-level databases can create ownership confusion + +If edge, site, and central all store telemetry, you must define: + +- which layer is authoritative for which time horizon +- when backfill is allowed +- when data is summarized vs copied +- how duplicates or gaps are detected + +Otherwise operations will argue over which trend is "the real one." + +### 6.3 Central intelligence must remain advisory-first + +Central guidance can become valuable, but direct closed-loop dependency on central would be risky. + +The architecture should therefore preserve: + +- local control authority at edge/site +- bounded and explicit central advice +- safe behavior if central recommendations stop arriving + +### 6.4 `tagcodering` is not yet complete enough to lean on blindly + +It is the right target, but its current partial state means there is still architecture debt: + +- incomplete config workflows +- likely mismatch between desired and implemented schema behavior +- temporary duplication between flows, node config, and database-held metadata + +This should be treated as a core platform workstream, not a side issue. + +### 6.5 Broker responsibilities are still not crisp enough + +The materials still reference MQTT/AMQP/RabbitMQ/brokers without one stable responsibility split. That needs to be resolved before large-scale deployment. + +Questions still open: + +- command bus or event bus? +- site-only or cross-site? +- telemetry transport or only synchronization/eventing? +- durability expectations and replay behavior? + +## 7. Recommended Ideal Stack + +The ideal EVOLV stack should be layered around operational boundaries, not around tools. + +### 7.1 Layer A: Edge execution + +Purpose: + +- connect to PLCs and field assets +- execute time-sensitive local logic +- preserve operation during WAN/central loss +- provide local telemetry access for resilience and digital-twin use cases + +Recommended components: + +- Node-RED runtime for EVOLV edge flows +- OPC UA and protocol adapters +- local InfluxDB +- optional local Grafana for local engineering/monitoring +- optional local broker only when multiple participants need decoupling + +Principle: + +- edge remains safe and useful when disconnected + +### 7.2 Layer B: Site integration + +Purpose: + +- aggregate multiple edge systems at plant/site level +- host plant-local dashboards and diagnostics +- mediate between raw OT detail and central standardization +- serve as the protected step between field systems and central requests + +Recommended components: + +- site Node-RED / CoreSync services +- site InfluxDB +- site Grafana / SCADA-supporting dashboards +- site broker where asynchronous eventing is justified + +Principle: + +- site absorbs plant complexity and protects field assets + +### 7.3 Layer C: Central platform + +Purpose: + +- fleet-wide analytics +- shared dashboards +- engineering lifecycle +- enterprise/API entry point +- overview intelligence and advisory logic + +Recommended components: + +- Gitea +- CI/CD +- central InfluxDB +- central Grafana +- API/integration gateway +- IAM +- VPN/private connectivity +- `tagcodering`-backed configuration services + +Principle: + +- central coordinates, advises, and governs; it is not the direct field caller + +### 7.4 Cross-cutting platform services + +These should be explicit architecture elements: + +- secrets management +- certificate management +- backup/restore +- audit logging +- monitoring/alerting of the platform itself +- versioned configuration and schema management +- rollout/rollback strategy + +## 8. Recommended Opinionated Choices + +### 8.1 Keep Node-RED as the orchestration layer, not the whole platform + +Node-RED should own: + +- process orchestration +- protocol mediation +- edge/site logic +- KPI production + +It should not become the sole owner of: + +- identity +- long-term configuration authority +- secret management +- compliance/audit authority + +### 8.2 Use InfluxDB by function and horizon + +Recommended split: + +- edge: resilience, local replay, digital-twin input +- site: plant diagnostics and local continuity +- central: fleet analytics, advisory intelligence, benchmarking, and long-term cross-site views + +### 8.3 Prefer smart telemetry retention over naive point dumping + +Recommended rule: + +- keep information-rich points +- reduce information-poor flat spans +- document reconstruction assumptions +- define signal-class-specific fidelity expectations + +This needs design discipline, but it is a real differentiator if executed well. + +### 8.4 Put enterprise/API ingress at central, not at edge + +This should become a hard architectural rule: + +- external requests land centrally +- central authenticates and authorizes +- central or site mediates downward +- edge never becomes the exposed public integration surface + +### 8.5 Make `tagcodering` the target configuration backbone + +The architecture should be designed so that `tagcodering` can mature into: + +- machine and asset registry +- configuration source of truth +- site/central configuration exchange point +- API-served configuration source for runtime layers + +## 9. Suggested Phasing + +### Phase 1: Stabilize contracts + +- define topic and payload contracts +- define telemetry classes and reconstruction policy +- define asset, machine, and site identity model +- define `tagcodering` scope and schema ownership + +### Phase 2: Harden local/site resilience + +- formalize edge and site runtime patterns +- define local telemetry retention and replay behavior +- define central-loss behavior +- define dashboard behavior during isolation + +### Phase 3: Harden central platform + +- IAM +- API gateway +- central observability +- CI/CD +- backup and disaster recovery +- config services over `tagcodering` + +### Phase 4: Introduce selective synchronization and intelligence + +- event-driven telemetry propagation rules +- smart-storage promotion/backfill policies +- advisory services from central +- auditability of downward recommendations and configuration changes + +## 10. Immediate Open Questions Before Wiki Finalization + +1. Which signals are allowed to use reconstruction-aware smart storage, and which must remain raw or near-raw for audit/compliance reasons? +2. How should `tagcodering` be exposed to runtime layers: direct database access, a dedicated API, or both? +3. What exact responsibility split should EVOLV use between API synchronization and broker-based eventing? + +## 11. Recommended Wiki Structure + +The wiki should not be one long page. It should be split into: + +1. platform overview with the main topology diagram +2. edge-site-central runtime model +3. telemetry and smart storage model +4. security and access-boundary model +5. configuration architecture centered on `tagcodering` + +## 12. Next Step + +Use this document as the architecture baseline. The companion markdown page in `architecture/` can then be shaped into a wiki-ready visual overview page with Mermaid diagrams and shorter human-readable sections. diff --git a/architecture/wiki-platform-overview.md b/architecture/wiki-platform-overview.md new file mode 100644 index 0000000..2c601b5 --- /dev/null +++ b/architecture/wiki-platform-overview.md @@ -0,0 +1,150 @@ +# EVOLV Platform Architecture + +## At A Glance + +EVOLV is not only a Node-RED package. It is a layered automation platform: + +- edge for plant-side execution +- site for local aggregation and resilience +- central for coordination, analytics, APIs, and governance + +```mermaid +flowchart LR + subgraph EDGE["Edge"] + PLC["PLC / IO"] + ENR["Node-RED"] + EDB["Local InfluxDB"] + EUI["Local Monitoring"] + end + + subgraph SITE["Site"] + SNR["CoreSync / Site Node-RED"] + SDB["Site InfluxDB"] + SUI["Site Dashboards"] + end + + subgraph CENTRAL["Central"] + API["API Gateway"] + CFG["Tagcodering"] + CDB["Central InfluxDB"] + CGR["Grafana"] + INTEL["Overview Intelligence"] + GIT["Gitea + CI/CD"] + end + + PLC --> ENR + ENR --> EDB + ENR --> EUI + ENR <--> SNR + EDB <--> SDB + SNR --> SUI + SNR <--> API + API <--> CFG + API --> INTEL + SDB <--> CDB + CDB --> CGR + GIT --> ENR + GIT --> SNR +``` + +## Core Principles + +### 1. Edge-first operation + +The edge layer must remain useful and safe when central systems are down. + +That means: + +- local logic remains operational +- local telemetry remains queryable +- local dashboards can keep working + +### 2. Multi-level telemetry + +InfluxDB is expected on multiple levels: + +- local for resilience and digital-twin use +- site for plant diagnostics +- central for fleet analytics and advisory logic + +### 3. Smart storage + +Telemetry should not be stored only with naive deadband rules. + +The target model is signal-aware: + +- preserve critical change points +- reduce low-information flat sections +- allow downstream reconstruction where justified + +```mermaid +flowchart LR + SIG["Process Signal"] --> EVAL["Slope / Event Evaluation"] + EVAL --> KEEP["Keep critical points"] + EVAL --> REDUCE["Reduce reconstructable points"] + KEEP --> L0["Local InfluxDB"] + REDUCE --> L0 + L0 --> L1["Site InfluxDB"] + L1 --> L2["Central InfluxDB"] +``` + +### 4. Central is the safe entry point + +External systems should enter through central APIs, not by directly calling field-edge systems. + +```mermaid +flowchart TD + EXT["External Request"] --> API["Central API Gateway"] + API --> AUTH["Auth / Policy"] + AUTH --> SITE["Site Layer"] + SITE --> EDGE["Edge Layer"] + EDGE --> PLC["Field Assets"] + + EXT -. blocked .-> EDGE + EXT -. blocked .-> PLC +``` + +### 5. Configuration belongs in `tagcodering` + +The intended configuration source of truth is the database-backed `tagcodering` model: + +- machine metadata +- asset configuration +- runtime-consumable configuration +- future central/site configuration services + +This already exists partially but still needs more work before it fully serves that role. + +## Layer Roles + +### Edge + +- PLC connectivity +- local logic +- protocol translation +- local telemetry buffering +- local monitoring and digital-twin support + +### Site + +- aggregation of edge systems +- local dashboards and diagnostics +- mediation between OT and central +- protected handoff for central requests + +### Central + +- enterprise/API gateway +- fleet dashboards +- analytics and intelligence +- source control and CI/CD +- configuration governance through `tagcodering` + +## Why This Matters + +This architecture gives EVOLV: + +- better resilience +- safer external integration +- better data quality for analytics +- a path from Node-RED package to platform diff --git a/temp/cloud.env.example b/temp/cloud.env.example new file mode 100644 index 0000000..b80542e --- /dev/null +++ b/temp/cloud.env.example @@ -0,0 +1,24 @@ +# Copy this file to `.env` on the target server and populate real values there. +# Keep the real `.env` out of version control. + +INFLUXDB_ADMIN_USER=replace-me +INFLUXDB_ADMIN_PASSWORD=replace-me +INFLUXDB_BUCKET=lvl0 +INFLUXDB_ORG=wbd + +GF_SECURITY_ADMIN_USER=replace-me +GF_SECURITY_ADMIN_PASSWORD=replace-me + +NPM_DB_MYSQL_HOST=db +NPM_DB_MYSQL_PORT=3306 +NPM_DB_MYSQL_USER=npm +NPM_DB_MYSQL_PASSWORD=replace-me +NPM_DB_MYSQL_NAME=npm + +MYSQL_ROOT_PASSWORD=replace-me +MYSQL_DATABASE=npm +MYSQL_USER=npm +MYSQL_PASSWORD=replace-me + +RABBITMQ_DEFAULT_USER=replace-me +RABBITMQ_DEFAULT_PASS=replace-me diff --git a/temp/cloud.yml b/temp/cloud.yml new file mode 100644 index 0000000..86f0f88 --- /dev/null +++ b/temp/cloud.yml @@ -0,0 +1,117 @@ +services: + node-red: + image: nodered/node-red:latest + container_name: node-red + restart: always + ports: + - "1880:1880" + volumes: + - node_red_data:/data + + influxdb: + image: influxdb:2.7 + container_name: influxdb + restart: always + ports: + - "8086:8086" + environment: + - INFLUXDB_ADMIN_USER=${INFLUXDB_ADMIN_USER} + - INFLUXDB_ADMIN_PASSWORD=${INFLUXDB_ADMIN_PASSWORD} + - INFLUXDB_BUCKET=${INFLUXDB_BUCKET} + - INFLUXDB_ORG=${INFLUXDB_ORG} + volumes: + - influxdb_data:/var/lib/influxdb2 + + grafana: + image: grafana/grafana:latest + container_name: grafana + restart: always + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_USER=${GF_SECURITY_ADMIN_USER} + - GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD} + volumes: + - grafana_data:/var/lib/grafana + depends_on: + - influxdb + + jenkins: + image: jenkins/jenkins:lts + container_name: jenkins + restart: always + ports: + - "8080:8080" # Web + - "50000:50000" # Agents + volumes: + - jenkins_home:/var/jenkins_home + + gitea: + image: gitea/gitea:latest + container_name: gitea + restart: always + environment: + - USER_UID=1000 + - USER_GID=1000 + ports: + - "3001:3000" # Webinterface (anders dan Grafana) + - "222:22" # SSH voor Git + volumes: + - gitea_data:/data + + proxymanager: + image: jc21/nginx-proxy-manager:latest + container_name: proxymanager + restart: always + ports: + - "80:80" # HTTP + - "443:443" # HTTPS + - "81:81" # Admin UI + environment: + DB_MYSQL_HOST: ${NPM_DB_MYSQL_HOST:-db} + DB_MYSQL_PORT: ${NPM_DB_MYSQL_PORT:-3306} + DB_MYSQL_USER: ${NPM_DB_MYSQL_USER} + DB_MYSQL_PASSWORD: ${NPM_DB_MYSQL_PASSWORD} + DB_MYSQL_NAME: ${NPM_DB_MYSQL_NAME} + volumes: + - proxymanager_data:/data + - proxymanager_letsencrypt:/etc/letsencrypt + - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + - db + + db: + image: jc21/mariadb-aria:latest + container_name: proxymanager_db + restart: always + environment: + MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD} + MYSQL_DATABASE: ${MYSQL_DATABASE} + MYSQL_USER: ${MYSQL_USER} + MYSQL_PASSWORD: ${MYSQL_PASSWORD} + volumes: + - proxymanager_db_data:/var/lib/mysql + + rabbitmq: + image: rabbitmq:3-management + container_name: rabbitmq + restart: always + ports: + - "5672:5672" # AMQP protocol voor apps + - "15672:15672" # Management webinterface + environment: + - RABBITMQ_DEFAULT_USER=${RABBITMQ_DEFAULT_USER} + - RABBITMQ_DEFAULT_PASS=${RABBITMQ_DEFAULT_PASS} + volumes: + - rabbitmq_data:/var/lib/rabbitmq + +volumes: + rabbitmq_data: + node_red_data: + influxdb_data: + grafana_data: + jenkins_home: + gitea_data: + proxymanager_data: + proxymanager_letsencrypt: + proxymanager_db_data: