Files
Kettenoeler/Reverse-Engineering CAN-Bus/can_universal_signal_finder.py

273 lines
11 KiB
Python

#!/usr/bin/env python3
import re
import sys
import argparse
from pathlib import Path
from typing import List, Tuple, Optional, Dict
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
LOG_PATTERN = re.compile(r"(\d+)\s+(TX|RX)\s+0x([0-9A-Fa-f]+)\s+\d+\s+((?:[0-9A-Fa-f]{2}\s+)+)")
def parse_log(path: Path) -> pd.DataFrame:
rows = []
with open(path, "r", errors="ignore") as f:
for line in f:
m = LOG_PATTERN.match(line)
if not m:
continue
ts = int(m.group(1))
direction = m.group(2)
can_id = int(m.group(3), 16)
data = [int(x, 16) for x in m.group(4).split() if x.strip()]
rows.append((path.name, ts, direction, can_id, data))
df = pd.DataFrame(rows, columns=["file","ts","dir","id","data"])
if df.empty:
return df
# time base per file → seconds from file start
df["time_s"] = df.groupby("file")["ts"].transform(lambda s: (s - s.min())/1000.0)
return df
def le16(data: List[int], offset: int) -> Optional[int]:
if len(data) < offset+2:
return None
return data[offset] | (data[offset+1] << 8)
def be16(data: List[int], offset: int) -> Optional[int]:
if len(data) < offset+2:
return None
return (data[offset] << 8) | data[offset+1]
def phys(val: float, scale: float, offs: float) -> float:
return val*scale + offs
def decode_series(arr_data: List[List[int]], endian: str, offset: int) -> List[Optional[int]]:
out = []
for d in arr_data:
v = le16(d, offset) if endian == "le" else be16(d, offset)
out.append(v)
return out
def score_values(vals: np.ndarray) -> Dict[str, float]:
if len(vals) < 3:
return {"variance":0.0, "changes":0, "unique_ratio":0.0}
var = float(np.var(vals))
changes = int(np.count_nonzero(np.diff(vals)))
unique_ratio = len(set(vals.tolist()))/len(vals)
return {"variance":var, "changes":changes, "unique_ratio":unique_ratio}
def analyze(df: pd.DataFrame, include_ids: Optional[List[int]], exclude_ids: Optional[List[int]]):
# Group by ID and try each 16-bit word
combos = []
ids = sorted(df["id"].unique().tolist())
if include_ids:
ids = [i for i in ids if i in include_ids]
if exclude_ids:
ids = [i for i in ids if i not in exclude_ids]
for cid in ids:
grp = df[df["id"]==cid]
for endian in ("le","be"):
for off in (0,2,4,6):
dec = decode_series(grp["data"].tolist(), endian, off)
# filter Nones
pairs = [(t, v) for t, v in zip(grp["time_s"].tolist(), dec) if v is not None]
if len(pairs) < 4:
continue
times = np.array([p[0] for p in pairs], dtype=float)
vals = np.array([p[1] for p in pairs], dtype=float)
sc = score_values(vals)
combos.append({
"id": cid,
"endian": endian,
"offset": off,
"n": len(vals),
"variance": sc["variance"],
"changes": sc["changes"],
"unique_ratio": sc["unique_ratio"],
"rate_hz": float(len(vals)) / (times.max()-times.min()+1e-9)
})
cand_df = pd.DataFrame(combos)
return cand_df
def range_filter_stats(vals: np.ndarray, scale: float, offs: float, rmin: Optional[float], rmax: Optional[float]) -> Dict[str, float]:
if vals.size == 0:
return {"hit_ratio":0.0, "min_phys":np.nan, "max_phys":np.nan}
phys_vals = vals*scale + offs
if rmin is None and rmax is None:
return {"hit_ratio":1.0, "min_phys":float(np.min(phys_vals)), "max_phys":float(np.max(phys_vals))}
mask = np.ones_like(phys_vals, dtype=bool)
if rmin is not None:
mask &= (phys_vals >= rmin)
if rmax is not None:
mask &= (phys_vals <= rmax)
hit_ratio = float(np.count_nonzero(mask))/len(phys_vals)
return {"hit_ratio":hit_ratio, "min_phys":float(np.min(phys_vals)), "max_phys":float(np.max(phys_vals))}
def export_candidate_timeseries(df: pd.DataFrame, cid: int, endian: str, off: int, scale: float, offs: float, outdir: Path, basename_hint: str):
sub = df[df["id"]==cid].copy()
if sub.empty:
return False, None
dec = decode_series(sub["data"].tolist(), endian, off)
sub["raw16"] = dec
sub = sub.dropna(subset=["raw16"]).copy()
if sub.empty:
return False, None
sub["phys"] = sub["raw16"].astype(float)*scale + offs
# Save CSV
csv_path = outdir / f"{basename_hint}_0x{cid:X}_{endian}_off{off}.csv"
sub[["file","time_s","id","raw16","phys"]].to_csv(csv_path, index=False)
# Plot (single-plot image)
plt.figure(figsize=(10,5))
plt.plot(sub["time_s"].to_numpy(), sub["phys"].to_numpy(), marker="o")
plt.xlabel("Zeit (s)")
plt.ylabel("Wert (phys)")
plt.title(f"{basename_hint} 0x{cid:X} ({endian} @ +{off})")
plt.grid(True)
plt.tight_layout()
img_path = outdir / f"{basename_hint}_0x{cid:X}_{endian}_off{off}.png"
plt.savefig(img_path, dpi=150)
plt.close()
return True, (csv_path, img_path)
def main():
ap = argparse.ArgumentParser(description="Universal CAN signal finder (WheelSpeed etc.) for Kettenöler logs")
ap.add_argument("logs", nargs="+", help="Log-Dateien (gleiche Struktur wie Kettenöler)")
ap.add_argument("--outdir", default="analyze_out", help="Ausgabeverzeichnis")
ap.add_argument("--top", type=int, default=20, help="Top-N Kandidaten global (nach Variance) exportieren, falls Range-Filter nichts findet")
ap.add_argument("--include-ids", default="", help="Nur diese IDs (kommagetrennt, z.B. 0x208,0x209)")
ap.add_argument("--exclude-ids", default="", help="Diese IDs ausschließen (kommagetrennt)")
ap.add_argument("--scale", type=float, default=1.0, help="Skalierung: phys = raw*scale + offset")
ap.add_argument("--offset", type=float, default=0.0, help="Offset: phys = raw*scale + offset")
ap.add_argument("--range-min", type=float, default=None, help="Min physischer Zielbereich (nach Scale/Offset)")
ap.add_argument("--range-max", type=float, default=None, help="Max physischer Zielbereich (nach Scale/Offset)")
ap.add_argument("--range-hit-ratio", type=float, default=0.6, help="Mindestanteil der Werte im Zielbereich [0..1]")
ap.add_argument("--per-id-limit", type=int, default=2, help="Max Anzahl Dekodierungen pro ID (z.B. beste zwei Offsets/Endianness)")
args = ap.parse_args()
# Parse include/exclude lists
def parse_ids(s: str):
if not s.strip():
return None
out = []
for tok in s.split(","):
tok = tok.strip()
if not tok:
continue
if tok.lower().startswith("0x"):
out.append(int(tok,16))
else:
out.append(int(tok))
return out
include_ids = parse_ids(args.include_ids)
exclude_ids = parse_ids(args.exclude_ids)
# Load logs
frames = []
for p in args.logs:
df = parse_log(Path(p))
if df.empty:
print(f"Warn: {p} ergab keine Daten oder passte nicht zum Muster.", file=sys.stderr)
else:
frames.append(df)
if not frames:
print("Keine Daten.", file=sys.stderr)
sys.exit(2)
df_all = pd.concat(frames, ignore_index=True)
outdir = Path(args.outdir)
outdir.mkdir(parents=True, exist_ok=True)
# Analyze all combos
cand = analyze(df_all, include_ids, exclude_ids)
if cand.empty:
print("Keine dekodierbaren 16-bit Felder gefunden.", file=sys.stderr)
sys.exit(3)
# Range filter pass
cand = cand.sort_values(["variance","changes","unique_ratio"], ascending=[False, False, False]).reset_index(drop=True)
# For each candidate row, compute range-hit stats
hits = []
for _, row in cand.iterrows():
cid = int(row["id"])
endian = row["endian"]
off = int(row["offset"])
sub = df_all[df_all["id"]==cid]
dec = decode_series(sub["data"].tolist(), endian, off)
vals = np.array([v for v in dec if v is not None], dtype=float)
if vals.size == 0:
continue
rng = range_filter_stats(vals, args.scale, args.offset, args.range_min, args.range_max)
hits.append((rng["hit_ratio"], rng["min_phys"], rng["max_phys"]))
if hits:
cand[["hit_ratio","min_phys","max_phys"]] = pd.DataFrame(hits, index=cand.index)
else:
cand["hit_ratio"] = 0.0
cand["min_phys"] = np.nan
cand["max_phys"] = np.nan
# Export global candidate table
cand_out = outdir / "candidates_global.csv"
cand.to_csv(cand_out, index=False)
print(f"Globales Kandidaten-CSV: {cand_out}")
# Decide which candidates to export as timeseries
selected = []
if args.range_min is not None or args.range_max is not None:
# choose those meeting ratio threshold; group by ID and take best few per ID
ok = cand[cand["hit_ratio"] >= args.range_hit_ratio].copy()
if ok.empty:
print("Range-Filter hat keine Kandidaten gefunden; falle zurück auf Top-N nach Varianz.", file=sys.stderr)
else:
# per ID, take best by hit_ratio then variance
for cid, grp in ok.groupby("id"):
grp = grp.sort_values(["hit_ratio","variance","changes","unique_ratio"], ascending=[False, False, False, False])
selected.extend(grp.head(args.per_id_limit).to_dict("records"))
if not selected:
# fallback → global top-N by variance (limit per ID)
per_id_count = {}
for _, row in cand.iterrows():
cid = int(row["id"]); per_id_count.setdefault(cid,0)
if len(selected) >= args.top:
break
if per_id_count[cid] >= args.per_id_limit:
continue
selected.append(row.to_dict())
per_id_count[cid] += 1
# Export per-candidate CSVs and plots
exp_index = []
base_hint = "decoded"
for row in selected:
cid = int(row["id"])
endian = row["endian"]
off = int(row["offset"])
ok, pair = export_candidate_timeseries(df_all, cid, endian, off, args.scale, args.offset, outdir, base_hint)
if ok and pair:
exp_index.append({
"id": cid,
"endian": endian,
"offset": off,
"csv": str(pair[0]),
"plot": str(pair[1])
})
idx_df = pd.DataFrame(exp_index)
idx_path = outdir / "exports_index.csv"
idx_df.to_csv(idx_path, index=False)
print(f"Export-Index: {idx_path}")
print("Fertig. Tipp: Mit --range-min/--range-max und --scale/--offset kannst du auf plausible physikalische Bereiche filtern.")
print("Beispiel: --scale 0.01 --range-min 0 --range-max 250 (wenn raw≈cm/s → km/h)")
if __name__ == "__main__":
main()