update of trace_signal_fitter.py and some doc
All checks were successful
CI-Build/Kettenoeler/pipeline/head This commit looks good

This commit is contained in:
2025-08-27 23:59:57 +02:00
parent 27993d72ee
commit a9053997a1
3 changed files with 780 additions and 230 deletions

View File

@@ -1,151 +1,294 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
trace_signal_fitter.py
----------------------
Zwei Betriebsarten für eine einzelne .trace-Datei:
trace_signal_fitter.py Advanced Range-/Unsupervised-Fit mit Physik-Constraints & Bericht
1) Range-Fit (überwacht): --rmin/--rmax gesetzt
Sucht für alle 8-bit (D0..D7) und adjazenten 16-bit (LE/BE) eine lineare Abbildung
phys = raw*scale + offset, die möglichst viele Samples in [rmin, rmax] bringt.
Ranking primär nach hit_ratio.
2) Unsupervised (ohne Range): --rmin/--rmax weggelassen
Findet „plausible“ physikalische Kandidaten nach Glattheit/Varianz/Spannweite/Rate,
ohne Scale/Offset zu schätzen (raw-Werte direkt). Ranking primär nach „smoothness“.
Modi:
1) Range-Fit (supervised): --rmin/--rmax gesetzt → finde scale & offset, maximiere Hit-Ratio in [rmin, rmax].
2) Unsupervised: ohne Range → plausible Rohsignale nach Smoothness/Var/Rate/Span.
Neu:
- Periodizität: Rate (Hz), Jitter (std der Inter-Arrival-Times), CV.
- Slew-Rate: p95/p99 von |Δ|/s (supervised in phys-Einheit, unsupervised normiert auf Roh-Span).
- Grenzwerte als Argumente (--rate-min/max, --jitter-max-ms, --max-slope-abs, --max-slope-frac, ...).
- Zusätzlich signed 16-bit Varianten (le16s/be16s).
- JSON + Markdown-Bericht pro Trace mit PASS/FAIL und Begründungen.
Logformat (Kettenöler):
<timestamp_ms> <TX/RX> 0x<ID_HEX> <dlc> <byte0> <byte1> ...
<timestamp_ms> <TX|RX> 0x<ID_HEX> <DLC> <byte0> <byte1> ... <byte7>
Outputs:
- Range-Fit: <trace_stem>_encoding_candidates.csv + optional Plots
- Unsupervised:<trace_stem>_unsupervised_candidates.csv + optional Plots
- supervised: <trace>_encoding_candidates.csv, Plots, <trace>_report.md, <trace>_report.json
- unsupervised: <trace>_unsupervised_candidates.csv, Plots, <trace>_report.md, <trace>_report.json
"""
import re
from __future__ import annotations
import sys
import json
import argparse
from pathlib import Path
from typing import List, Tuple, Dict, Iterable
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
LOG_PATTERN = re.compile(r"(\d+)\s+(TX|RX)\s+0x([0-9A-Fa-f]+)\s+\d+\s+((?:[0-9A-Fa-f]{2}\s+)+)")
def parse_trace(path: Path, rx_only=False) -> pd.DataFrame:
# ---------- Parsing ----------
def parse_trace(path: Path, rx_only: bool = False) -> pd.DataFrame:
"""
Robustes Parsen des Kettenöler-Formats:
<ts_ms> <TX|RX> 0x<ID> <DLC> <b0> <b1> ... (hex)
"""
rows = []
with open(path, "r", errors="ignore") as f:
for line in f:
m = LOG_PATTERN.match(line)
if not m:
parts = line.strip().split()
if len(parts) < 4:
continue
ts = int(m.group(1)); dr = m.group(2)
if rx_only and dr != "RX":
try:
ts = int(parts[0])
dr = parts[1]
if rx_only and dr != "RX":
continue
cid = int(parts[2], 16) if parts[2].lower().startswith("0x") else int(parts[2], 16)
dlc = int(parts[3])
bytes_hex = parts[4:4+dlc] if dlc > 0 else []
data = []
for b in bytes_hex:
try:
data.append(int(b, 16))
except Exception:
data.append(0)
rows.append((ts, dr, cid, data))
except Exception:
continue
cid = int(m.group(3), 16)
data = [int(x, 16) for x in m.group(4).split() if x.strip()]
rows.append((ts, dr, cid, data))
df = pd.DataFrame(rows, columns=["ts","dir","id","data"])
df = pd.DataFrame(rows, columns=["ts", "dir", "id", "data"])
if df.empty:
return df
df["time_s"] = (df["ts"] - df["ts"].min())/1000.0
df["time_s"] = (df["ts"] - df["ts"].min()) / 1000.0
return df
def be16(a,b): return (a<<8)|b
def le16(a,b): return a | (b<<8)
def p95_abs_diff(arr: np.ndarray) -> float:
# ---------- Helpers ----------
def be16(a: int, b: int) -> int: return (a << 8) | b
def le16(a: int, b: int) -> int: return a | (b << 8)
def s16(u: int) -> int: return u if u < 0x8000 else u - 0x10000
def p_quant_abs_diff(arr: np.ndarray, q: float) -> float:
if arr.size < 2:
return 0.0
d = np.abs(np.diff(arr))
return float(np.percentile(d, 95))
return float(np.percentile(d, q * 100))
def basic_rate(times: np.ndarray) -> float:
if times.size < 2: return 0.0
dur = times.max() - times.min()
if dur <= 0: return 0.0
return float(times.size / dur)
def p_quant(arr: np.ndarray, q: float) -> float:
if arr.size == 0:
return 0.0
return float(np.percentile(arr, q * 100))
def interval_best_offset(raw: np.ndarray, scale: float, rmin: float, rmax: float):
a = rmin - scale*raw
b = rmax - scale*raw
lo = np.minimum(a,b)
hi = np.maximum(a,b)
def interarrival_metrics(times: np.ndarray) -> Dict[str, float]:
if times.size < 2:
return {"rate_hz": 0.0, "period_mean": 0.0, "period_std": 0.0, "jitter_cv": 0.0, "n": int(times.size)}
dt = np.diff(times)
period_mean = float(np.mean(dt))
period_std = float(np.std(dt))
rate_hz = 1.0 / period_mean if period_mean > 0 else 0.0
jitter_cv = (period_std / period_mean) if period_mean > 0 else 0.0
return {"rate_hz": rate_hz, "period_mean": period_mean, "period_std": period_std, "jitter_cv": jitter_cv, "n": int(times.size)}
def slope_metrics(values: np.ndarray, times: np.ndarray) -> Dict[str, float]:
if values.size < 2:
return {"slope_p95": 0.0, "slope_p99": 0.0, "jerk_p95": 0.0}
dv = np.abs(np.diff(values))
dt = np.diff(times)
# vermeide Division durch 0
dt = np.where(dt <= 0, np.nan, dt)
slope = dv / dt
slope = slope[~np.isnan(slope)]
if slope.size == 0:
return {"slope_p95": 0.0, "slope_p99": 0.0, "jerk_p95": 0.0}
jerk = np.abs(np.diff(slope))
return {
"slope_p95": float(np.percentile(slope, 95)),
"slope_p99": float(np.percentile(slope, 99)),
"jerk_p95": float(np.percentile(jerk, 95)) if jerk.size > 0 else 0.0,
}
def prefilter(vals: np.ndarray) -> Tuple[bool, Dict[str, float]]:
if vals.size < 12:
return False, {"reason": "too_few_samples"}
uniq = np.unique(vals)
if uniq.size <= 2:
return False, {"reason": "too_constant"}
p95 = p_quant_abs_diff(vals, 0.95)
if p95 == 0:
return False, {"reason": "no_changes"}
r = float(np.percentile(vals, 97) - np.percentile(vals, 3) + 1e-9)
if p95 > 0.5 * r:
return False, {"reason": "too_jumpi"}
return True, {"p95_abs_diff": p95, "span_est": r}
def try_scaleset() -> List[float]:
base = [
1e-3, 2e-3, 5e-3,
1e-2, 2e-2, 5e-2,
0.05, 0.0625, 0.1, 0.125, 0.2, 0.25, 0.5,
0.75, 0.8, 1.0, 1.25, 2.0, 5.0, 10.0
]
return sorted(set(base))
def interval_best_offset(raw: np.ndarray, scale: float, rmin: float, rmax: float) -> Tuple[float, float]:
"""
Finde das Offset, das die meisten Werte (scale*raw + offset) in [rmin, rmax] bringt.
Sweep über Intervallgrenzen (klassische "interval stabbing" Lösung).
"""
a = rmin - scale * raw
b = rmax - scale * raw
lo = np.minimum(a, b)
hi = np.maximum(a, b)
events = []
for L,H in zip(lo,hi):
for L, H in zip(lo, hi):
events.append((L, +1))
events.append((H, -1))
events.sort(key=lambda t: (t[0], -t[1]))
best = -1; cur = 0; best_x = None
best = -1
cur = 0
best_x = None
for x, v in events:
cur += v
if cur > best:
best = cur; best_x = x
return best_x, float(best)/float(len(raw))
best = cur
best_x = x
hit_ratio = float(best) / float(len(raw)) if len(raw) else 0.0
return float(best_x if best_x is not None else 0.0), hit_ratio
def gen_candidates(df: pd.DataFrame):
times = df["time_s"].to_numpy(dtype=float)
# ---------- Candidate Generation ----------
def gen_candidates(df: pd.DataFrame) -> Iterable[Tuple[str, np.ndarray, np.ndarray]]:
"""
Liefert (label, values, times) für:
- 8-bit Bytes D0..D7
- 16-bit adjazente Paare (LE/BE) + signed Varianten
Times wird auf die gefilterten Indizes gemappt (DLC-abhängig).
"""
times_all = df["time_s"].to_numpy(dtype=float)
data = df["data"].tolist()
# 8-bit
for i in range(8):
vals = [d[i] for d in data if len(d)>i]
if not vals: continue
yield (f"byte[{i}]", np.array(vals, dtype=float)), times[:len(vals)]
# 16-bit (adjacent)
pairs = [(i,i+1) for i in range(7)]
for i,j in pairs:
vals = [le16(d[i],d[j]) for d in data if len(d)>j]
if vals:
yield (f"le16[{i}-{j}]", np.array(vals, dtype=float)), times[:len(vals)]
vals = [be16(d[i],d[j]) for d in data if len(d)>j]
if vals:
yield (f"be16[{i}-{j}]", np.array(vals, dtype=float)), times[:len(vals)]
idx = [k for k, d in enumerate(data) if len(d) > i]
if len(idx) < 3:
continue
vals = np.array([data[k][i] for k in idx], dtype=float)
t = times_all[idx]
yield f"byte[{i}]", vals, t
def prefilter(vals: np.ndarray):
if vals.size < 12:
return False, {"reason":"too_few_samples"}
uniq = np.unique(vals)
if uniq.size <= 2:
return False, {"reason":"too_constant"}
p95 = p95_abs_diff(vals)
if p95 == 0:
return False, {"reason":"no_changes"}
r = float(np.percentile(vals, 97) - np.percentile(vals, 3) + 1e-9)
if p95 > 0.5*r:
return False, {"reason":"too_jumpi"}
return True, {"p95_abs_diff":p95, "span_est":r}
# 16-bit adjazent
for i in range(7):
j = i + 1
idx = [k for k, d in enumerate(data) if len(d) > j]
if len(idx) < 3:
continue
a = [data[k][i] for k in idx]
b = [data[k][j] for k in idx]
u_le = np.array([le16(x, y) for x, y in zip(a, b)], dtype=float)
u_be = np.array([be16(x, y) for x, y in zip(a, b)], dtype=float)
s_le = np.array([s16(le16(x, y)) for x, y in zip(a, b)], dtype=float)
s_be = np.array([s16(be16(x, y)) for x, y in zip(a, b)], dtype=float)
t = times_all[idx]
yield f"le16[{i}-{j}]", u_le, t
yield f"be16[{i}-{j}]", u_be, t
yield f"le16s[{i}-{j}]", s_le, t
yield f"be16s[{i}-{j}]", s_be, t
def try_scaleset():
base = [1e-3, 2e-3, 5e-3,
1e-2, 2e-2, 5e-2,
0.1, 0.2, 0.25, 0.5,
1.0, 2.0, 5.0, 10.0,
0.0625, 0.125, 0.75, 0.8, 1.25]
return sorted(set(base))
def evaluate_supervised(label, vals: np.ndarray, times: np.ndarray, rmin: float, rmax: float, allow_neg_scale=False):
# ---------- Evaluation ----------
def evaluate_supervised(label: str,
vals: np.ndarray,
times: np.ndarray,
rmin: float,
rmax: float,
allow_neg_scale: bool,
constraints: Dict[str, float]) -> Dict[str, float] | None:
ok, meta = prefilter(vals)
if not ok:
return None
scales = try_scaleset()
if allow_neg_scale:
scales = scales + [-s for s in scales if s>0]
best = {"hit_ratio": -1.0}
scales += [-s for s in scales if s > 0]
best = {"hit_ratio": -1.0, "scale": None, "offset": 0.0}
for s in scales:
o, hr = interval_best_offset(vals, s, rmin, rmax)
if hr > best["hit_ratio"]:
best = {"scale":s, "offset":float(o), "hit_ratio":hr}
phys = vals*best["scale"] + best["offset"]
within = (phys>=rmin) & (phys<=rmax)
best = {"scale": s, "offset": float(o), "hit_ratio": hr}
phys = vals * best["scale"] + best["offset"]
within = (phys >= rmin) & (phys <= rmax)
in_count = int(np.count_nonzero(within))
p95_raw = p95_abs_diff(vals)
p95_phys = p95_abs_diff(phys)
rate = basic_rate(times[:len(vals)])
p95_raw = p_quant_abs_diff(vals, 0.95)
p95_phys = p_quant_abs_diff(phys, 0.95)
ia = interarrival_metrics(times[:len(vals)])
sm = slope_metrics(phys, times[:len(phys)])
prange = (rmax - rmin) if (rmax > rmin) else 1.0
slope_p95_frac = sm["slope_p95"] / prange
slope_p99_frac = sm["slope_p99"] / prange
failures = []
if constraints.get("rate_min") is not None and ia["rate_hz"] < constraints["rate_min"] - 1e-9:
failures.append(f"rate {ia['rate_hz']:.2f}Hz < min {constraints['rate_min']:.2f}Hz")
if constraints.get("rate_max") is not None and ia["rate_hz"] > constraints["rate_max"] + 1e-9:
failures.append(f"rate {ia['rate_hz']:.2f}Hz > max {constraints['rate_max']:.2f}Hz")
if constraints.get("jitter_max_ms") is not None:
jitter_ms = ia["period_std"] * 1000.0
if jitter_ms > constraints["jitter_max_ms"] + 1e-9:
failures.append(f"jitter {jitter_ms:.1f}ms > max {constraints['jitter_max_ms']:.1f}ms")
def _resolve_abs_slope_limit():
if constraints.get("max_slope_abs") is not None:
return constraints["max_slope_abs"]
if constraints.get("max_slope_frac") is not None:
return constraints["max_slope_frac"] * prange
return None
max_s_abs = _resolve_abs_slope_limit()
if max_s_abs is not None:
q = constraints.get("slope_quantile", 0.95)
qv = sm["slope_p95"] if q <= 0.95 else sm["slope_p99"]
if qv > max_s_abs + 1e-9:
failures.append(f"slope(q={q:.2f}) {qv:.3g} > max {max_s_abs:.3g}")
uniq_ratio = len(np.unique(vals)) / float(len(vals))
if constraints.get("min_uniq_ratio") is not None and uniq_ratio < constraints["min_uniq_ratio"] - 1e-9:
failures.append(f"uniq_ratio {uniq_ratio:.3f} < min {constraints['min_uniq_ratio']:.3f}")
passed = (len(failures) == 0)
# Quality Score
score = best["hit_ratio"]
if max_s_abs is not None and max_s_abs > 0:
slope_pen = min(sm["slope_p95"] / max_s_abs, 1.0)
score *= (1.0 - 0.3 * slope_pen)
if constraints.get("jitter_max_ms") is not None:
jitter_ms = ia["period_std"] * 1000.0
jitter_pen = min(jitter_ms / constraints["jitter_max_ms"], 1.0)
score *= (1.0 - 0.2 * jitter_pen)
return {
"label": label,
"mode": "range_fit",
"n": int(vals.size),
"rate_hz_est": float(rate),
"raw_min": float(np.min(vals)),
"raw_max": float(np.max(vals)),
"raw_var": float(np.var(vals)),
@@ -158,38 +301,49 @@ def evaluate_supervised(label, vals: np.ndarray, times: np.ndarray, rmin: float,
"phys_max": float(np.max(phys)),
"p95_absdiff_phys": float(p95_phys),
"span_phys": float(np.percentile(phys, 97) - np.percentile(phys, 3)),
"prefilter_span_est": float(meta.get("span_est", 0.0)),
"prefilter_p95_absdiff": float(meta.get("p95_abs_diff", 0.0)),
"rate_hz_est": float(ia["rate_hz"]),
"period_std_ms": float(ia["period_std"] * 1000.0),
"jitter_cv": float(ia["jitter_cv"]),
"slope_p95_per_s": float(sm["slope_p95"]),
"slope_p99_per_s": float(sm["slope_p99"]),
"slope_p95_frac": float(slope_p95_frac),
"slope_p99_frac": float(slope_p99_frac),
"uniq_ratio": float(uniq_ratio),
"passed": bool(passed),
"fail_reasons": "; ".join(failures),
"quality_score": float(score),
}
def evaluate_unsupervised(label, vals: np.ndarray, times: np.ndarray, min_smooth=0.2):
"""
Liefert nur Plausibilitätsmetriken (keine scale/offset).
smoothness = 1 - clamp(p95(|Δ|) / span, 0..1)
uniq_ratio = |unique| / n
Ranking: smoothness desc, span desc, var desc, rate desc, n desc
"""
def evaluate_unsupervised(label: str,
vals: np.ndarray,
times: np.ndarray,
min_smooth: float = 0.2,
max_slope_frac_raw: float | None = None,
slope_quantile: float = 0.95) -> Dict[str, float] | None:
if vals.size < 12:
return None
p95 = p95_abs_diff(vals)
p95 = p_quant_abs_diff(vals, 0.95)
span = float(np.percentile(vals, 97) - np.percentile(vals, 3) + 1e-9)
smooth = 1.0 - min(max(p95/span, 0.0), 1.0)
uniq = len(np.unique(vals))
uniq_ratio = float(uniq) / float(vals.size)
smooth = 1.0 - min(max(p95 / span, 0.0), 1.0)
uniq_ratio = float(len(np.unique(vals))) / float(vals.size)
var = float(np.var(vals))
rate = basic_rate(times[:len(vals)])
# Filter: zu konstant, zu sprunghaft
ia = interarrival_metrics(times[:len(vals)])
sm = slope_metrics(vals, times[:len(vals)])
slope_q = sm["slope_p95"] if slope_quantile <= 0.95 else sm["slope_p99"]
slope_frac_raw = (slope_q / span) if span > 0 else 0.0
if uniq_ratio <= 0.02:
return None
if smooth < min_smooth:
return None
if (max_slope_frac_raw is not None) and (slope_frac_raw > max_slope_frac_raw):
return None
return {
"label": label,
"mode": "unsupervised",
"n": int(vals.size),
"rate_hz_est": float(rate),
"raw_min": float(np.min(vals)),
"raw_max": float(np.max(vals)),
"raw_var": var,
@@ -197,119 +351,310 @@ def evaluate_unsupervised(label, vals: np.ndarray, times: np.ndarray, min_smooth
"p95_absdiff_raw": float(p95),
"smoothness": float(smooth),
"uniq_ratio": float(uniq_ratio),
"rate_hz_est": float(ia["rate_hz"]),
"period_std_ms": float(ia["period_std"] * 1000.0),
"jitter_cv": float(ia["jitter_cv"]),
"slope_q_raw": float(slope_q),
"slope_frac_raw": float(slope_frac_raw),
}
def plot_timeseries(times, series, out_png: Path, title: str, ylabel: str):
plt.figure(figsize=(10,4))
# ---------- Plot & Report ----------
def plot_timeseries(times: np.ndarray, series: np.ndarray, out_png: Path, title: str, ylabel: str) -> None:
plt.figure(figsize=(10, 4))
plt.plot(times[:len(series)], series, marker=".", linestyle="-")
plt.xlabel("Zeit (s)"); plt.ylabel(ylabel)
plt.title(title); plt.grid(True); plt.tight_layout()
plt.xlabel("Zeit (s)")
plt.ylabel(ylabel)
plt.title(title)
plt.grid(True)
plt.tight_layout()
out_png.parent.mkdir(parents=True, exist_ok=True)
plt.savefig(out_png, dpi=150); plt.close()
plt.savefig(out_png, dpi=150)
plt.close()
def df_to_md_table(df: pd.DataFrame) -> str:
"""Robustes Markdown-Table: nutzt to_markdown falls vorhanden, sonst CSV in Codeblock."""
try:
return df.to_markdown(index=False) # benötigt evtl. 'tabulate'
except Exception:
return "```\n" + df.to_csv(index=False) + "```"
def write_report_md(path: Path, header: dict, top_rows: pd.DataFrame, failures: pd.DataFrame, mode: str, links: dict) -> None:
md = []
md.append(f"# Trace Report {header.get('trace_name','')}")
md.append("")
md.append(f"- **Mode:** {mode}")
for k, v in header.items():
if k in ("trace_name",):
continue
md.append(f"- **{k}**: {v}")
md.append("")
if mode == "range_fit":
md.append("## Top-Kandidaten (Range-Fit)")
md.append("Hit-Ratio, Slope/Jitter & Score beste zuerst.\n")
if top_rows is not None and not top_rows.empty:
md.append(df_to_md_table(top_rows))
else:
md.append("_Keine Kandidaten über Schwelle._")
md.append("")
if failures is not None and not failures.empty:
md.append("## Ausgeschlossene Kandidaten (Gründe)\n")
md.append(df_to_md_table(failures[["label", "fail_reasons"]]))
else:
md.append("## Top-Kandidaten (Unsupervised)\n")
if top_rows is not None and not top_rows.empty:
md.append(df_to_md_table(top_rows))
else:
md.append("_Keine plausiblen Rohsignale._")
md.append("\n## Artefakte")
for k, v in links.items():
md.append(f"- **{k}**: `{v}`")
path.write_text("\n".join(md), encoding="utf-8")
# ---------- Main ----------
def main():
ap = argparse.ArgumentParser(description="Finde Encoding-Kandidaten (mit Range) oder plausible Rohsignale (ohne Range) in einer .trace-Datei")
ap.add_argument("trace", help="Pfad zur .trace Datei (aus can_split_by_id.py)")
ap.add_argument("--rmin", type=float, default=None, help="untere Grenze des Zielbereichs (phys)")
ap.add_argument("--rmax", type=float, default=None, help="obere Grenze des Zielbereichs (phys)")
ap.add_argument("--rx-only", action="store_true", help="Nur RX Frames nutzen")
ap.add_argument("--allow-neg-scale", action="store_true", help="Auch negative scale testen (nur Range-Fit)")
ap.add_argument("--outdir", default=".", help="Output-Verzeichnis (CSV/Plots)")
ap.add_argument("--plots-top", type=int, default=8, help="Erzeuge Plots für die Top-N Kandidaten")
ap.add_argument("--min-hit", type=float, default=0.5, help="Mindest-Hit-Ratio für Range-Fit (0..1)")
ap.add_argument("--min-smooth", type=float, default=0.2, help="Mindest-Smoothness für Unsupervised (0..1)")
ap = argparse.ArgumentParser(description="Range-/Unsupervised-Fit mit physikbasierten Constraints + Bericht")
ap.add_argument("trace", help="Pfad zur .trace Datei")
# supervision
ap.add_argument("--rmin", type=float, default=None)
ap.add_argument("--rmax", type=float, default=None)
ap.add_argument("--allow-neg-scale", action="store_true")
# shared
ap.add_argument("--rx-only", action="store_true")
ap.add_argument("--outdir", default=".")
ap.add_argument("--plots-top", type=int, default=8)
# supervised thresholds
ap.add_argument("--min-hit", type=float, default=0.5)
ap.add_argument("--rate-min", type=float, default=None)
ap.add_argument("--rate-max", type=float, default=None)
ap.add_argument("--jitter-max-ms", type=float, default=None)
ap.add_argument("--max-slope-abs", type=float, default=None, help="Max |Δphys|/s (z. B. °C/s, km/h/s)")
ap.add_argument("--max-slope-frac", type=float, default=None, help="Max |Δphys|/s relativ zu (rmax-rmin)")
ap.add_argument("--slope-quantile", type=float, default=0.95, help="0.95 oder 0.99")
ap.add_argument("--min-uniq-ratio", type=float, default=None)
# unsupervised thresholds
ap.add_argument("--min-smooth", type=float, default=0.2)
ap.add_argument("--max-slope-frac-raw", type=float, default=None, help="roh: (|Δraw|/s)/Span")
args = ap.parse_args()
trace = Path(args.trace)
df = parse_trace(trace, rx_only=args.rx_only)
if df.empty:
print("Keine Daten in Trace.", file=sys.stderr); sys.exit(2)
print("Keine Daten in Trace.", file=sys.stderr)
sys.exit(2)
supervised = (args.rmin is not None) and (args.rmax is not None)
results = []
outdir = Path(args.outdir)
outdir.mkdir(parents=True, exist_ok=True)
for (label, series), times in gen_candidates(df):
if supervised:
r = evaluate_supervised(label, series, times, args.rmin, args.rmax, allow_neg_scale=args.allow_neg_scale)
if r is None:
if supervised:
constraints = {
"rate_min": args.rate_min,
"rate_max": args.rate_max,
"jitter_max_ms": args.jitter_max_ms,
"max_slope_abs": args.max_slope_abs,
"max_slope_frac": args.max_slope_frac,
"slope_quantile": args.slope_quantile,
"min_uniq_ratio": args.min_uniq_ratio,
}
results = []
rejected = []
for label, series, times in gen_candidates(df):
r = evaluate_supervised(label, series, times, args.rmin, args.rmax, args.allow_neg_scale, constraints)
if r is None:
continue
if r["hit_ratio"] >= args.min_hit:
r["trace"] = trace.stem
results.append(r)
(results if r["passed"] else rejected).append({**r, "trace": trace.stem})
if not results and not rejected:
print("Keine Kandidaten über Schwelle gefunden.", file=sys.stderr)
sys.exit(3)
df_ok = pd.DataFrame(results).sort_values(
["quality_score", "hit_ratio", "p95_absdiff_phys", "rate_hz_est", "n"],
ascending=[False, False, True, False, False]
)
df_rej = pd.DataFrame(rejected)
csv_path = outdir / f"{trace.stem}_encoding_candidates.csv"
if not df_ok.empty:
df_ok.to_csv(csv_path, index=False)
print(f"Kandidaten-CSV: {csv_path}")
# Plots für Top-Kandidaten (oder Rejected, falls keine OK)
top_for_plots = df_ok if not df_ok.empty else df_rej
data = df["data"].tolist()
times_all = df["time_s"].to_numpy(dtype=float)
def reconstruct_vals(label: str) -> np.ndarray | None:
if label.startswith("byte["):
i = int(label.split("[")[1].split("]")[0])
idx = [k for k, d in enumerate(data) if len(d) > i]
if not idx: return None
return np.array([data[k][i] for k in idx], dtype=float), times_all[idx]
elif label.startswith(("le16", "be16", "le16s", "be16s")):
signed = label.startswith(("le16s", "be16s"))
i, j = map(int, label.split("[")[1].split("]")[0].split("-"))
idx = [k for k, d in enumerate(data) if len(d) > j]
if not idx: return None
a = [data[k][i] for k in idx]
b = [data[k][j] for k in idx]
if label.startswith("le16"):
v = [le16(x, y) for x, y in zip(a, b)]
else:
v = [be16(x, y) for x, y in zip(a, b)]
if signed:
v = [s16(int(x)) for x in v]
return np.array(v, dtype=float), times_all[idx]
return None
for _, row in top_for_plots.head(max(1, args.plots_top)).iterrows():
rec = reconstruct_vals(row["label"])
if rec is None:
continue
vals, tt = rec
phys = vals * row["scale"] + row["offset"]
out_png = outdir / f"{trace.stem}_{row['label'].replace('[','_').replace(']','')}.png"
plot_timeseries(tt[:len(phys)], phys, out_png,
f"{trace.name} {row['label']} (scale={row['scale']:.6g}, offset={row['offset']:.6g})",
"phys (geschätzt)")
# Bericht
hdr = {
"trace_name": trace.name,
"mode": "range_fit",
"rmin": args.rmin,
"rmax": args.rmax,
"min_hit": args.min_hit,
"rate_min": args.rate_min,
"rate_max": args.rate_max,
"jitter_max_ms": args.jitter_max_ms,
"max_slope_abs": args.max_slope_abs,
"max_slope_frac": args.max_slope_frac,
"slope_quantile": args.slope_quantile,
}
top_view = df_ok.head(12)[
["label", "quality_score", "hit_ratio", "scale", "offset",
"rate_hz_est", "period_std_ms", "slope_p95_per_s", "slope_p99_per_s",
"p95_absdiff_phys", "uniq_ratio"]
] if not df_ok.empty else pd.DataFrame()
fail_view = df_rej[["label", "fail_reasons"]] if not df_rej.empty else pd.DataFrame()
md_path = outdir / f"{trace.stem}_report.md"
json_path = outdir / f"{trace.stem}_report.json"
write_report_md(md_path, hdr, top_view, fail_view, "range_fit",
{"candidates_csv": str(csv_path) if not df_ok.empty else "(leer)"})
with open(json_path, "w", encoding="utf-8") as f:
json.dump({
"header": hdr,
"accepted": df_ok.to_dict(orient="records"),
"rejected": df_rej.to_dict(orient="records"),
}, f, ensure_ascii=False, indent=2)
print(f"Report: {md_path}")
print(f"Report JSON: {json_path}")
if not df_ok.empty:
print("\nTop-Kandidaten:")
cols = ["label", "quality_score", "hit_ratio", "scale", "offset",
"rate_hz_est", "period_std_ms", "slope_p95_per_s", "slope_p99_per_s"]
print(df_ok.head(10)[cols].to_string(index=False))
else:
r = evaluate_unsupervised(label, series, times, min_smooth=args.min_smooth)
print("\nKeine Kandidaten PASS; siehe Gründe in report.")
else:
# Unsupervised
results = []
for label, series, times in gen_candidates(df):
r = evaluate_unsupervised(label, series, times,
min_smooth=args.min_smooth,
max_slope_frac_raw=args.max_slope_frac_raw,
slope_quantile=args.slope_quantile)
if r is None:
continue
r["trace"] = trace.stem
results.append(r)
if not results:
if supervised:
print("Keine Kandidaten über Schwelle gefunden. Tipp: --min-hit senken oder --allow-neg-scale testen.", file=sys.stderr)
else:
if not results:
print("Keine plausiblen Rohsignale gefunden. Tipp: --min-smooth senken.", file=sys.stderr)
sys.exit(3)
sys.exit(3)
outdir = Path(args.outdir); outdir.mkdir(parents=True, exist_ok=True)
df_res = pd.DataFrame(results).sort_values(
["smoothness", "span_raw", "raw_var", "rate_hz_est", "n"],
ascending=[False, False, False, False, False]
)
if supervised:
df_res = pd.DataFrame(results).sort_values(["hit_ratio", "p95_absdiff_phys", "rate_hz_est", "n"], ascending=[False, True, False, False])
csv_path = outdir / f"{trace.stem}_encoding_candidates.csv"
df_res.to_csv(csv_path, index=False)
print(f"Kandidaten-CSV: {csv_path}")
# Plots
for _, row in df_res.head(args.plots_top).iterrows():
# decode again
times = df["time_s"].to_numpy(dtype=float)
data = df["data"].tolist()
label = row["label"]
if label.startswith("byte["):
i = int(label.split("[")[1].split("]")[0])
vals = np.array([d[i] for d in data if len(d)>i], dtype=float)
elif label.startswith("le16["):
i,j = map(int, label.split("[")[1].split("]")[0].split("-"))
vals = np.array([le16(d[i],d[j]) for d in data if len(d)>j], dtype=float)
elif label.startswith("be16["):
i,j = map(int, label.split("[")[1].split("]")[0].split("-"))
vals = np.array([be16(d[i],d[j]) for d in data if len(d)>j], dtype=float)
else:
continue
phys = vals*row["scale"] + row["offset"]
out_png = outdir / f"{trace.stem}_{label.replace('[','_').replace(']','')}.png"
plot_timeseries(times[:len(phys)], phys, out_png, f"{trace.name} {label} (scale={row['scale']:.6g}, offset={row['offset']:.6g})", "phys (geschätzt)")
# console
cols = ["label","hit_ratio","scale","offset","p95_absdiff_phys","rate_hz_est","n","phys_min","phys_max"]
print("\nTop-Kandidaten:")
print(df_res.head(10)[cols].to_string(index=False))
else:
# Unsupervised
df_res = pd.DataFrame(results).sort_values(["smoothness","span_raw","raw_var","rate_hz_est","n"], ascending=[False, False, False, False, False])
csv_path = outdir / f"{trace.stem}_unsupervised_candidates.csv"
df_res.to_csv(csv_path, index=False)
print(f"Unsupervised-CSV: {csv_path}")
# Plots
for _, row in df_res.head(max(1, args.plots_top)).iterrows():
# regenerate series for plot
times = df["time_s"].to_numpy(dtype=float)
data = df["data"].tolist()
label = row["label"]
# Plots der Top-N (Rohwerte)
data = df["data"].tolist()
times_all = df["time_s"].to_numpy(dtype=float)
def reconstruct_raw(label: str) -> Tuple[np.ndarray, np.ndarray] | None:
if label.startswith("byte["):
i = int(label.split("[")[1].split("]")[0])
vals = np.array([d[i] for d in data if len(d)>i], dtype=float)
elif label.startswith("le16["):
i,j = map(int, label.split("[")[1].split("]")[0].split("-"))
vals = np.array([le16(d[i],d[j]) for d in data if len(d)>j], dtype=float)
elif label.startswith("be16["):
i,j = map(int, label.split("[")[1].split("]")[0].split("-"))
vals = np.array([be16(d[i],d[j]) for d in data if len(d)>j], dtype=float)
else:
idx = [k for k, d in enumerate(data) if len(d) > i]
if not idx: return None
return np.array([data[k][i] for k in idx], dtype=float), times_all[idx]
elif label.startswith(("le16", "be16", "le16s", "be16s")):
signed = label.startswith(("le16s", "be16s"))
i, j = map(int, label.split("[")[1].split("]")[0].split("-"))
idx = [k for k, d in enumerate(data) if len(d) > j]
if not idx: return None
a = [data[k][i] for k in idx]
b = [data[k][j] for k in idx]
if label.startswith("le16"):
v = [le16(x, y) for x, y in zip(a, b)]
else:
v = [be16(x, y) for x, y in zip(a, b)]
if signed:
v = [s16(int(x)) for x in v]
return np.array(v, dtype=float), times_all[idx]
return None
for _, row in df_res.head(max(1, args.plots_top)).iterrows():
rec = reconstruct_raw(row["label"])
if rec is None:
continue
out_png = outdir / f"{trace.stem}_{label.replace('[','_').replace(']','')}_raw.png"
plot_timeseries(times[:len(vals)], vals, out_png, f"{trace.name} {label} (raw)", "raw")
# console
cols = ["label","smoothness","span_raw","raw_var","rate_hz_est","n","uniq_ratio","p95_absdiff_raw"]
print("\nTop plausible Rohsignale:")
print(df_res.head(10)[cols].to_string(index=False))
vals, tt = rec
out_png = outdir / f"{trace.stem}_{row['label'].replace('[','_').replace(']','')}_raw.png"
plot_timeseries(tt[:len(vals)], vals, out_png,
f"{trace.name} {row['label']} (raw)", "raw")
# Bericht
hdr = {
"trace_name": trace.name,
"mode": "unsupervised",
"min_smooth": args.min_smooth,
"max_slope_frac_raw": args.max_slope_frac_raw,
}
top_view = df_res.head(12)[
["label", "smoothness", "span_raw", "raw_var",
"rate_hz_est", "period_std_ms", "slope_frac_raw", "uniq_ratio"]
]
md_path = outdir / f"{trace.stem}_report.md"
json_path = outdir / f"{trace.stem}_report.json"
write_report_md(md_path, hdr, top_view, pd.DataFrame(), "unsupervised",
{"candidates_csv": str(csv_path)})
with open(json_path, "w", encoding="utf-8") as f:
json.dump({
"header": hdr,
"accepted": df_res.to_dict(orient="records"),
}, f, ensure_ascii=False, indent=2)
print(f"Report: {md_path}")
print(f"Report JSON: {json_path}")
if __name__ == "__main__":
main()
main()