#!/usr/bin/env python3 """ trace_signal_fitter.py ---------------------- Zwei Betriebsarten für eine einzelne .trace-Datei: 1) Range-Fit (überwacht): --rmin/--rmax gesetzt Sucht für alle 8-bit (D0..D7) und adjazenten 16-bit (LE/BE) eine lineare Abbildung phys = raw*scale + offset, die möglichst viele Samples in [rmin, rmax] bringt. Ranking primär nach hit_ratio. 2) Unsupervised (ohne Range): --rmin/--rmax weggelassen Findet „plausible“ physikalische Kandidaten nach Glattheit/Varianz/Spannweite/Rate, ohne Scale/Offset zu schätzen (raw-Werte direkt). Ranking primär nach „smoothness“. Logformat (Kettenöler): 0x ... Outputs: - Range-Fit: _encoding_candidates.csv + optional Plots - Unsupervised:_unsupervised_candidates.csv + optional Plots """ import re import sys import argparse from pathlib import Path import numpy as np import pandas as pd import matplotlib.pyplot as plt LOG_PATTERN = re.compile(r"(\d+)\s+(TX|RX)\s+0x([0-9A-Fa-f]+)\s+\d+\s+((?:[0-9A-Fa-f]{2}\s+)+)") def parse_trace(path: Path, rx_only=False) -> pd.DataFrame: rows = [] with open(path, "r", errors="ignore") as f: for line in f: m = LOG_PATTERN.match(line) if not m: continue ts = int(m.group(1)); dr = m.group(2) if rx_only and dr != "RX": continue cid = int(m.group(3), 16) data = [int(x, 16) for x in m.group(4).split() if x.strip()] rows.append((ts, dr, cid, data)) df = pd.DataFrame(rows, columns=["ts","dir","id","data"]) if df.empty: return df df["time_s"] = (df["ts"] - df["ts"].min())/1000.0 return df def be16(a,b): return (a<<8)|b def le16(a,b): return a | (b<<8) def p95_abs_diff(arr: np.ndarray) -> float: if arr.size < 2: return 0.0 d = np.abs(np.diff(arr)) return float(np.percentile(d, 95)) def basic_rate(times: np.ndarray) -> float: if times.size < 2: return 0.0 dur = times.max() - times.min() if dur <= 0: return 0.0 return float(times.size / dur) def interval_best_offset(raw: np.ndarray, scale: float, rmin: float, rmax: float): a = rmin - scale*raw b = rmax - scale*raw lo = np.minimum(a,b) hi = np.maximum(a,b) events = [] for L,H in zip(lo,hi): events.append((L, +1)) events.append((H, -1)) events.sort(key=lambda t: (t[0], -t[1])) best = -1; cur = 0; best_x = None for x, v in events: cur += v if cur > best: best = cur; best_x = x return best_x, float(best)/float(len(raw)) def gen_candidates(df: pd.DataFrame): times = df["time_s"].to_numpy(dtype=float) data = df["data"].tolist() # 8-bit for i in range(8): vals = [d[i] for d in data if len(d)>i] if not vals: continue yield (f"byte[{i}]", np.array(vals, dtype=float)), times[:len(vals)] # 16-bit (adjacent) pairs = [(i,i+1) for i in range(7)] for i,j in pairs: vals = [le16(d[i],d[j]) for d in data if len(d)>j] if vals: yield (f"le16[{i}-{j}]", np.array(vals, dtype=float)), times[:len(vals)] vals = [be16(d[i],d[j]) for d in data if len(d)>j] if vals: yield (f"be16[{i}-{j}]", np.array(vals, dtype=float)), times[:len(vals)] def prefilter(vals: np.ndarray): if vals.size < 12: return False, {"reason":"too_few_samples"} uniq = np.unique(vals) if uniq.size <= 2: return False, {"reason":"too_constant"} p95 = p95_abs_diff(vals) if p95 == 0: return False, {"reason":"no_changes"} r = float(np.percentile(vals, 97) - np.percentile(vals, 3) + 1e-9) if p95 > 0.5*r: return False, {"reason":"too_jumpi"} return True, {"p95_abs_diff":p95, "span_est":r} def try_scaleset(): base = [1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 0.1, 0.2, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0, 0.0625, 0.125, 0.75, 0.8, 1.25] return sorted(set(base)) def evaluate_supervised(label, vals: np.ndarray, times: np.ndarray, rmin: float, rmax: float, allow_neg_scale=False): ok, meta = prefilter(vals) if not ok: return None scales = try_scaleset() if allow_neg_scale: scales = scales + [-s for s in scales if s>0] best = {"hit_ratio": -1.0} for s in scales: o, hr = interval_best_offset(vals, s, rmin, rmax) if hr > best["hit_ratio"]: best = {"scale":s, "offset":float(o), "hit_ratio":hr} phys = vals*best["scale"] + best["offset"] within = (phys>=rmin) & (phys<=rmax) in_count = int(np.count_nonzero(within)) p95_raw = p95_abs_diff(vals) p95_phys = p95_abs_diff(phys) rate = basic_rate(times[:len(vals)]) return { "label": label, "mode": "range_fit", "n": int(vals.size), "rate_hz_est": float(rate), "raw_min": float(np.min(vals)), "raw_max": float(np.max(vals)), "raw_var": float(np.var(vals)), "p95_absdiff_raw": float(p95_raw), "scale": float(best["scale"]), "offset": float(best["offset"]), "hit_ratio": float(best["hit_ratio"]), "in_count": in_count, "phys_min": float(np.min(phys)), "phys_max": float(np.max(phys)), "p95_absdiff_phys": float(p95_phys), "span_phys": float(np.percentile(phys, 97) - np.percentile(phys, 3)), "prefilter_span_est": float(meta.get("span_est", 0.0)), "prefilter_p95_absdiff": float(meta.get("p95_abs_diff", 0.0)), } def evaluate_unsupervised(label, vals: np.ndarray, times: np.ndarray, min_smooth=0.2): """ Liefert nur Plausibilitätsmetriken (keine scale/offset). smoothness = 1 - clamp(p95(|Δ|) / span, 0..1) uniq_ratio = |unique| / n Ranking: smoothness desc, span desc, var desc, rate desc, n desc """ if vals.size < 12: return None p95 = p95_abs_diff(vals) span = float(np.percentile(vals, 97) - np.percentile(vals, 3) + 1e-9) smooth = 1.0 - min(max(p95/span, 0.0), 1.0) uniq = len(np.unique(vals)) uniq_ratio = float(uniq) / float(vals.size) var = float(np.var(vals)) rate = basic_rate(times[:len(vals)]) # Filter: zu konstant, zu sprunghaft if uniq_ratio <= 0.02: return None if smooth < min_smooth: return None return { "label": label, "mode": "unsupervised", "n": int(vals.size), "rate_hz_est": float(rate), "raw_min": float(np.min(vals)), "raw_max": float(np.max(vals)), "raw_var": var, "span_raw": span, "p95_absdiff_raw": float(p95), "smoothness": float(smooth), "uniq_ratio": float(uniq_ratio), } def plot_timeseries(times, series, out_png: Path, title: str, ylabel: str): plt.figure(figsize=(10,4)) plt.plot(times[:len(series)], series, marker=".", linestyle="-") plt.xlabel("Zeit (s)"); plt.ylabel(ylabel) plt.title(title); plt.grid(True); plt.tight_layout() out_png.parent.mkdir(parents=True, exist_ok=True) plt.savefig(out_png, dpi=150); plt.close() def main(): ap = argparse.ArgumentParser(description="Finde Encoding-Kandidaten (mit Range) oder plausible Rohsignale (ohne Range) in einer .trace-Datei") ap.add_argument("trace", help="Pfad zur .trace Datei (aus can_split_by_id.py)") ap.add_argument("--rmin", type=float, default=None, help="untere Grenze des Zielbereichs (phys)") ap.add_argument("--rmax", type=float, default=None, help="obere Grenze des Zielbereichs (phys)") ap.add_argument("--rx-only", action="store_true", help="Nur RX Frames nutzen") ap.add_argument("--allow-neg-scale", action="store_true", help="Auch negative scale testen (nur Range-Fit)") ap.add_argument("--outdir", default=".", help="Output-Verzeichnis (CSV/Plots)") ap.add_argument("--plots-top", type=int, default=8, help="Erzeuge Plots für die Top-N Kandidaten") ap.add_argument("--min-hit", type=float, default=0.5, help="Mindest-Hit-Ratio für Range-Fit (0..1)") ap.add_argument("--min-smooth", type=float, default=0.2, help="Mindest-Smoothness für Unsupervised (0..1)") args = ap.parse_args() trace = Path(args.trace) df = parse_trace(trace, rx_only=args.rx_only) if df.empty: print("Keine Daten in Trace.", file=sys.stderr); sys.exit(2) supervised = (args.rmin is not None) and (args.rmax is not None) results = [] for (label, series), times in gen_candidates(df): if supervised: r = evaluate_supervised(label, series, times, args.rmin, args.rmax, allow_neg_scale=args.allow_neg_scale) if r is None: continue if r["hit_ratio"] >= args.min_hit: r["trace"] = trace.stem results.append(r) else: r = evaluate_unsupervised(label, series, times, min_smooth=args.min_smooth) if r is None: continue r["trace"] = trace.stem results.append(r) if not results: if supervised: print("Keine Kandidaten über Schwelle gefunden. Tipp: --min-hit senken oder --allow-neg-scale testen.", file=sys.stderr) else: print("Keine plausiblen Rohsignale gefunden. Tipp: --min-smooth senken.", file=sys.stderr) sys.exit(3) outdir = Path(args.outdir); outdir.mkdir(parents=True, exist_ok=True) if supervised: df_res = pd.DataFrame(results).sort_values(["hit_ratio", "p95_absdiff_phys", "rate_hz_est", "n"], ascending=[False, True, False, False]) csv_path = outdir / f"{trace.stem}_encoding_candidates.csv" df_res.to_csv(csv_path, index=False) print(f"Kandidaten-CSV: {csv_path}") # Plots for _, row in df_res.head(args.plots_top).iterrows(): # decode again times = df["time_s"].to_numpy(dtype=float) data = df["data"].tolist() label = row["label"] if label.startswith("byte["): i = int(label.split("[")[1].split("]")[0]) vals = np.array([d[i] for d in data if len(d)>i], dtype=float) elif label.startswith("le16["): i,j = map(int, label.split("[")[1].split("]")[0].split("-")) vals = np.array([le16(d[i],d[j]) for d in data if len(d)>j], dtype=float) elif label.startswith("be16["): i,j = map(int, label.split("[")[1].split("]")[0].split("-")) vals = np.array([be16(d[i],d[j]) for d in data if len(d)>j], dtype=float) else: continue phys = vals*row["scale"] + row["offset"] out_png = outdir / f"{trace.stem}_{label.replace('[','_').replace(']','')}.png" plot_timeseries(times[:len(phys)], phys, out_png, f"{trace.name} – {label} (scale={row['scale']:.6g}, offset={row['offset']:.6g})", "phys (geschätzt)") # console cols = ["label","hit_ratio","scale","offset","p95_absdiff_phys","rate_hz_est","n","phys_min","phys_max"] print("\nTop-Kandidaten:") print(df_res.head(10)[cols].to_string(index=False)) else: # Unsupervised df_res = pd.DataFrame(results).sort_values(["smoothness","span_raw","raw_var","rate_hz_est","n"], ascending=[False, False, False, False, False]) csv_path = outdir / f"{trace.stem}_unsupervised_candidates.csv" df_res.to_csv(csv_path, index=False) print(f"Unsupervised-CSV: {csv_path}") # Plots for _, row in df_res.head(max(1, args.plots_top)).iterrows(): # regenerate series for plot times = df["time_s"].to_numpy(dtype=float) data = df["data"].tolist() label = row["label"] if label.startswith("byte["): i = int(label.split("[")[1].split("]")[0]) vals = np.array([d[i] for d in data if len(d)>i], dtype=float) elif label.startswith("le16["): i,j = map(int, label.split("[")[1].split("]")[0].split("-")) vals = np.array([le16(d[i],d[j]) for d in data if len(d)>j], dtype=float) elif label.startswith("be16["): i,j = map(int, label.split("[")[1].split("]")[0].split("-")) vals = np.array([be16(d[i],d[j]) for d in data if len(d)>j], dtype=float) else: continue out_png = outdir / f"{trace.stem}_{label.replace('[','_').replace(']','')}_raw.png" plot_timeseries(times[:len(vals)], vals, out_png, f"{trace.name} – {label} (raw)", "raw") # console cols = ["label","smoothness","span_raw","raw_var","rate_hz_est","n","uniq_ratio","p95_absdiff_raw"] print("\nTop plausible Rohsignale:") print(df_res.head(10)[cols].to_string(index=False)) if __name__ == "__main__": main()