Kettenoeler/Reverse-Engineering CAN-Bus/can_split_by_id.py

#!/usr/bin/env python3
import re
import sys
import argparse
from pathlib import Path
from collections import defaultdict

LOG_PATTERN = re.compile(r"(\d+)\s+(TX|RX)\s+0x([0-9A-Fa-f]+)\s+(\d+)\s+((?:[0-9A-Fa-f]{2}\s+)+)")

def main():
    ap = argparse.ArgumentParser(description="Split Kettenöler CAN log(s) into per-ID .trace files and build an overview")
    ap.add_argument("logs", nargs="+", help="Input log file(s)")
    ap.add_argument("--outdir", default="traces", help="Output directory for per-ID trace files")
    ap.add_argument("--rx-only", action="store_true", help="Keep only RX frames in traces and stats")
    args = ap.parse_args()

    outdir = Path(args.outdir)
    outdir.mkdir(parents=True, exist_ok=True)

    writers = {}
    stats = defaultdict(lambda: {
        "id_hex": None, "rx":0, "tx":0, "count":0, "first_ts":None, "last_ts":None,
        "first_file":None, "dlc_set": set()
    })

    def get_writer(can_id_hex: str, src_name: str):
        # filename pattern: 0xID_<srcfile>.trace
        safe_src = Path(src_name).name
        fn = outdir / f"{can_id_hex}_{safe_src}.trace"
        if fn not in writers:
            writers[fn] = fn.open("a", encoding="utf-8")
        return writers[fn]

    total = 0
    written = 0
    for p in args.logs:
        with open(p, "r", errors="ignore") as f:
            for line in f:
                m = LOG_PATTERN.match(line)
                if not m:
                    continue
                ts = int(m.group(1))
                dr = m.group(2)
                cid_hex = m.group(3).upper()
                dlc = int(m.group(4))
                data = m.group(5)

                total += 1
                if args.rx_only and dr != "RX":
                    continue

                key = int(cid_hex, 16)
                s = stats[key]
                s["id_hex"] = f"0x{cid_hex}"
                s["count"] += 1
                s["rx"] += 1 if dr == "RX" else 0
                s["tx"] += 1 if dr == "TX" else 0
                s["first_ts"] = ts if s["first_ts"] is None else min(s["first_ts"], ts)
                s["last_ts"] = ts if s["last_ts"] is None else max(s["last_ts"], ts)
                s["first_file"] = s["first_file"] or Path(p).name
                s["dlc_set"].add(dlc)

                w = get_writer(f"0x{cid_hex}", Path(p).name)
                w.write(line)
                written += 1

    for fh in writers.values():
        fh.close()

    # build overview CSV
    import pandas as pd
    rows = []
    for cid, s in stats.items():
        dur_ms = 0 if s["first_ts"] is None else (s["last_ts"] - s["first_ts"])
        rate_hz = (s["rx"] if args.rx_only else s["count"]) / (dur_ms/1000.0) if dur_ms > 0 else 0.0
        rows.append({
            "id_dec": cid,
            "id_hex": s["id_hex"],
            "count": s["count"],
            "rx": s["rx"],
            "tx": s["tx"],
            "duration_s": round(dur_ms/1000.0, 6),
            "rate_hz_est": round(rate_hz, 6),
            "first_file": s["first_file"],
            "dlc_variants": ",".join(sorted(str(x) for x in s["dlc_set"])),
        })
    df = pd.DataFrame(rows).sort_values(["rate_hz_est","count"], ascending=[False, False])
    csv_path = outdir / "overview_ids.csv"
    df.to_csv(csv_path, index=False)

    print(f"Done. Parsed {total} lines, wrote {written} lines into per-ID traces at {outdir}.")
    print(f"Overview: {csv_path}")

if __name__ == "__main__":
    main()