#!/usr/bin/env python3 import re import sys import argparse from pathlib import Path from typing import List, Tuple, Optional, Dict import pandas as pd import numpy as np import matplotlib.pyplot as plt LOG_PATTERN = re.compile(r"(\d+)\s+(TX|RX)\s+0x([0-9A-Fa-f]+)\s+\d+\s+((?:[0-9A-Fa-f]{2}\s+)+)") def parse_log(path: Path) -> pd.DataFrame: rows = [] with open(path, "r", errors="ignore") as f: for line in f: m = LOG_PATTERN.match(line) if not m: continue ts = int(m.group(1)) direction = m.group(2) can_id = int(m.group(3), 16) data = [int(x, 16) for x in m.group(4).split() if x.strip()] rows.append((path.name, ts, direction, can_id, data)) df = pd.DataFrame(rows, columns=["file","ts","dir","id","data"]) if df.empty: return df # time base per file → seconds from file start df["time_s"] = df.groupby("file")["ts"].transform(lambda s: (s - s.min())/1000.0) return df def le16(data: List[int], offset: int) -> Optional[int]: if len(data) < offset+2: return None return data[offset] | (data[offset+1] << 8) def be16(data: List[int], offset: int) -> Optional[int]: if len(data) < offset+2: return None return (data[offset] << 8) | data[offset+1] def phys(val: float, scale: float, offs: float) -> float: return val*scale + offs def decode_series(arr_data: List[List[int]], endian: str, offset: int) -> List[Optional[int]]: out = [] for d in arr_data: v = le16(d, offset) if endian == "le" else be16(d, offset) out.append(v) return out def score_values(vals: np.ndarray) -> Dict[str, float]: if len(vals) < 3: return {"variance":0.0, "changes":0, "unique_ratio":0.0} var = float(np.var(vals)) changes = int(np.count_nonzero(np.diff(vals))) unique_ratio = len(set(vals.tolist()))/len(vals) return {"variance":var, "changes":changes, "unique_ratio":unique_ratio} def analyze(df: pd.DataFrame, include_ids: Optional[List[int]], exclude_ids: Optional[List[int]]): # Group by ID and try each 16-bit word combos = [] ids = sorted(df["id"].unique().tolist()) if include_ids: ids = [i for i in ids if i in include_ids] if exclude_ids: ids = [i for i in ids if i not in exclude_ids] for cid in ids: grp = df[df["id"]==cid] for endian in ("le","be"): for off in (0,2,4,6): dec = decode_series(grp["data"].tolist(), endian, off) # filter Nones pairs = [(t, v) for t, v in zip(grp["time_s"].tolist(), dec) if v is not None] if len(pairs) < 4: continue times = np.array([p[0] for p in pairs], dtype=float) vals = np.array([p[1] for p in pairs], dtype=float) sc = score_values(vals) combos.append({ "id": cid, "endian": endian, "offset": off, "n": len(vals), "variance": sc["variance"], "changes": sc["changes"], "unique_ratio": sc["unique_ratio"], "rate_hz": float(len(vals)) / (times.max()-times.min()+1e-9) }) cand_df = pd.DataFrame(combos) return cand_df def range_filter_stats(vals: np.ndarray, scale: float, offs: float, rmin: Optional[float], rmax: Optional[float]) -> Dict[str, float]: if vals.size == 0: return {"hit_ratio":0.0, "min_phys":np.nan, "max_phys":np.nan} phys_vals = vals*scale + offs if rmin is None and rmax is None: return {"hit_ratio":1.0, "min_phys":float(np.min(phys_vals)), "max_phys":float(np.max(phys_vals))} mask = np.ones_like(phys_vals, dtype=bool) if rmin is not None: mask &= (phys_vals >= rmin) if rmax is not None: mask &= (phys_vals <= rmax) hit_ratio = float(np.count_nonzero(mask))/len(phys_vals) return {"hit_ratio":hit_ratio, "min_phys":float(np.min(phys_vals)), "max_phys":float(np.max(phys_vals))} def export_candidate_timeseries(df: pd.DataFrame, cid: int, endian: str, off: int, scale: float, offs: float, outdir: Path, basename_hint: str): sub = df[df["id"]==cid].copy() if sub.empty: return False, None dec = decode_series(sub["data"].tolist(), endian, off) sub["raw16"] = dec sub = sub.dropna(subset=["raw16"]).copy() if sub.empty: return False, None sub["phys"] = sub["raw16"].astype(float)*scale + offs # Save CSV csv_path = outdir / f"{basename_hint}_0x{cid:X}_{endian}_off{off}.csv" sub[["file","time_s","id","raw16","phys"]].to_csv(csv_path, index=False) # Plot (single-plot image) plt.figure(figsize=(10,5)) plt.plot(sub["time_s"].to_numpy(), sub["phys"].to_numpy(), marker="o") plt.xlabel("Zeit (s)") plt.ylabel("Wert (phys)") plt.title(f"{basename_hint} 0x{cid:X} ({endian} @ +{off})") plt.grid(True) plt.tight_layout() img_path = outdir / f"{basename_hint}_0x{cid:X}_{endian}_off{off}.png" plt.savefig(img_path, dpi=150) plt.close() return True, (csv_path, img_path) def main(): ap = argparse.ArgumentParser(description="Universal CAN signal finder (WheelSpeed etc.) for Kettenöler logs") ap.add_argument("logs", nargs="+", help="Log-Dateien (gleiche Struktur wie Kettenöler)") ap.add_argument("--outdir", default="analyze_out", help="Ausgabeverzeichnis") ap.add_argument("--top", type=int, default=20, help="Top-N Kandidaten global (nach Variance) exportieren, falls Range-Filter nichts findet") ap.add_argument("--include-ids", default="", help="Nur diese IDs (kommagetrennt, z.B. 0x208,0x209)") ap.add_argument("--exclude-ids", default="", help="Diese IDs ausschließen (kommagetrennt)") ap.add_argument("--scale", type=float, default=1.0, help="Skalierung: phys = raw*scale + offset") ap.add_argument("--offset", type=float, default=0.0, help="Offset: phys = raw*scale + offset") ap.add_argument("--range-min", type=float, default=None, help="Min physischer Zielbereich (nach Scale/Offset)") ap.add_argument("--range-max", type=float, default=None, help="Max physischer Zielbereich (nach Scale/Offset)") ap.add_argument("--range-hit-ratio", type=float, default=0.6, help="Mindestanteil der Werte im Zielbereich [0..1]") ap.add_argument("--per-id-limit", type=int, default=2, help="Max Anzahl Dekodierungen pro ID (z.B. beste zwei Offsets/Endianness)") args = ap.parse_args() # Parse include/exclude lists def parse_ids(s: str): if not s.strip(): return None out = [] for tok in s.split(","): tok = tok.strip() if not tok: continue if tok.lower().startswith("0x"): out.append(int(tok,16)) else: out.append(int(tok)) return out include_ids = parse_ids(args.include_ids) exclude_ids = parse_ids(args.exclude_ids) # Load logs frames = [] for p in args.logs: df = parse_log(Path(p)) if df.empty: print(f"Warn: {p} ergab keine Daten oder passte nicht zum Muster.", file=sys.stderr) else: frames.append(df) if not frames: print("Keine Daten.", file=sys.stderr) sys.exit(2) df_all = pd.concat(frames, ignore_index=True) outdir = Path(args.outdir) outdir.mkdir(parents=True, exist_ok=True) # Analyze all combos cand = analyze(df_all, include_ids, exclude_ids) if cand.empty: print("Keine dekodierbaren 16-bit Felder gefunden.", file=sys.stderr) sys.exit(3) # Range filter pass cand = cand.sort_values(["variance","changes","unique_ratio"], ascending=[False, False, False]).reset_index(drop=True) # For each candidate row, compute range-hit stats hits = [] for _, row in cand.iterrows(): cid = int(row["id"]) endian = row["endian"] off = int(row["offset"]) sub = df_all[df_all["id"]==cid] dec = decode_series(sub["data"].tolist(), endian, off) vals = np.array([v for v in dec if v is not None], dtype=float) if vals.size == 0: continue rng = range_filter_stats(vals, args.scale, args.offset, args.range_min, args.range_max) hits.append((rng["hit_ratio"], rng["min_phys"], rng["max_phys"])) if hits: cand[["hit_ratio","min_phys","max_phys"]] = pd.DataFrame(hits, index=cand.index) else: cand["hit_ratio"] = 0.0 cand["min_phys"] = np.nan cand["max_phys"] = np.nan # Export global candidate table cand_out = outdir / "candidates_global.csv" cand.to_csv(cand_out, index=False) print(f"Globales Kandidaten-CSV: {cand_out}") # Decide which candidates to export as timeseries selected = [] if args.range_min is not None or args.range_max is not None: # choose those meeting ratio threshold; group by ID and take best few per ID ok = cand[cand["hit_ratio"] >= args.range_hit_ratio].copy() if ok.empty: print("Range-Filter hat keine Kandidaten gefunden; falle zurück auf Top-N nach Varianz.", file=sys.stderr) else: # per ID, take best by hit_ratio then variance for cid, grp in ok.groupby("id"): grp = grp.sort_values(["hit_ratio","variance","changes","unique_ratio"], ascending=[False, False, False, False]) selected.extend(grp.head(args.per_id_limit).to_dict("records")) if not selected: # fallback → global top-N by variance (limit per ID) per_id_count = {} for _, row in cand.iterrows(): cid = int(row["id"]); per_id_count.setdefault(cid,0) if len(selected) >= args.top: break if per_id_count[cid] >= args.per_id_limit: continue selected.append(row.to_dict()) per_id_count[cid] += 1 # Export per-candidate CSVs and plots exp_index = [] base_hint = "decoded" for row in selected: cid = int(row["id"]) endian = row["endian"] off = int(row["offset"]) ok, pair = export_candidate_timeseries(df_all, cid, endian, off, args.scale, args.offset, outdir, base_hint) if ok and pair: exp_index.append({ "id": cid, "endian": endian, "offset": off, "csv": str(pair[0]), "plot": str(pair[1]) }) idx_df = pd.DataFrame(exp_index) idx_path = outdir / "exports_index.csv" idx_df.to_csv(idx_path, index=False) print(f"Export-Index: {idx_path}") print("Fertig. Tipp: Mit --range-min/--range-max und --scale/--offset kannst du auf plausible physikalische Bereiche filtern.") print("Beispiel: --scale 0.01 --range-min 0 --range-max 250 (wenn raw≈cm/s → km/h)") if __name__ == "__main__": main()