1.1 Fallback, IPv4 bevorzugt (optional) * - Byte-Limit im Stream, Retries mit Backoff+Jitter * - Kein Echo/Output: RETURNTRANSFER überall an * * Rückgabe-Array: * [ * 'ok' => bool, * 'tmp_path' => string|null, * 'mime' => string|null, * 'http_code' => int|null, * 'curl_err' => string|null, * 'final_url' => string|null, * 'bytes' => int, * 'error' => string|null, * ] */ namespace WList\Net; final class ImageFetch { private static array $UA_LIST = [ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15', 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0', ]; private static array $RETRY_HTTP = [429, 500, 502, 503, 504, 520, 521, 522, 523, 524]; private static function retryCurlCodes(): array { $list = [ \defined('CURLE_OPERATION_TIMEDOUT') ? \constant('CURLE_OPERATION_TIMEDOUT') : null, \defined('CURLE_COULDNT_RESOLVE_HOST') ? \constant('CURLE_COULDNT_RESOLVE_HOST') : null, \defined('CURLE_COULDNT_CONNECT') ? \constant('CURLE_COULDNT_CONNECT') : null, \defined('CURLE_RECV_ERROR') ? \constant('CURLE_RECV_ERROR') : null, \defined('CURLE_SEND_ERROR') ? \constant('CURLE_SEND_ERROR') : null, \defined('CURLE_GOT_NOTHING') ? \constant('CURLE_GOT_NOTHING') : null, \defined('CURLE_HTTP2_STREAM') ? \constant('CURLE_HTTP2_STREAM') : null, \defined('CURLE_HTTP2') ? \constant('CURLE_HTTP2') : null, ]; return array_values(array_filter($list, static fn($v) => $v !== null)); } public static function download(string $url, array $opt = []): array { $defaults = [ 'page_url' => null, // Einbettungs-Seite (Angebotslink) – universell 'force_client_hints' => true, // sec-ch-ua etc. 'max_bytes' => 8_000_000, 'timeout' => 12, 'connect_timeout' => 5, 'max_redirects' => 5, 'retries' => 3, 'retry_backoff_ms' => 250, 'whitelist_hosts' => null, // ['ikea.com', ...] oder null 'ip_resolve_v4' => true, 'referer' => 'auto', // 'auto' | 'none' | 'custom' 'custom_referer' => null, 'user_agents' => null, 'log_prefix' => 'imgfetch', 'debug' => true, 'debug_peek_bytes' => 512, 'try_http_versions' => ['2', '1.1'], 'try_ip_resolve_combo' => ['v4', 'auto'], // Optional: bei harten Blockern nicht failen, sondern externen URL akzeptieren 'failopen_hosts' => [], // z.B. ['ikea.com'] ]; $cfg = array_replace($defaults, $opt); // URL + Host if (!self::isValidHttpUrl($url)) return self::fail(null, null, 0, 'Ungültige URL'); $p = parse_url($url); $host = strtolower($p['host'] ?? ''); if (!$host) return self::fail(null, null, 0, 'Ungültige URL (Host)'); // Whitelist if (is_array($cfg['whitelist_hosts']) && $cfg['whitelist_hosts']) { $ok = false; foreach ($cfg['whitelist_hosts'] as $allowed) { $allowed = strtolower($allowed); if ($host === $allowed || str_ends_with($host, '.' . $allowed)) { $ok = true; break; } } if (!$ok) return self::fail(null, null, 0, 'Host nicht erlaubt'); } // DNS → keine privaten IPs if (!self::hostResolvesPublic($host)) return self::fail(null, null, 0, 'Host nicht öffentlich erreichbar'); $uaList = (is_array($cfg['user_agents']) && $cfg['user_agents']) ? $cfg['user_agents'] : self::$UA_LIST; $originRef = self::originFromUrl($url); $defaultReferer = match ($cfg['referer']) { 'none' => null, 'custom' => (string) $cfg['custom_referer'], default => $originRef, }; $headers = [ 'Accept: image/avif,image/webp,image/*;q=0.8,*/*;q=0.5', 'Accept-Language: de-DE,de;q=0.9,en;q=0.8', 'Cache-Control: no-cache', 'Pragma: no-cache', 'Sec-Fetch-Dest: image', 'Sec-Fetch-Mode: no-cors', 'Sec-Fetch-Site: cross-site', // wird dynamisch ggf. same-origin ]; if (!empty($cfg['force_client_hints'])) { $headers[] = 'sec-ch-ua: "Chromium";v="128", "Not=A?Brand";v="99"'; $headers[] = 'sec-ch-ua-mobile: ?0'; $headers[] = 'sec-ch-ua-platform: "Linux"'; } // Cookie-Tools $cookieJar = []; $collectCookie = static function (array $respHeaders) use (&$cookieJar): void { if (!isset($respHeaders['set-cookie'])) return; foreach ($respHeaders['set-cookie'] as $line) { $parts = explode(';', $line); if (!$parts) continue; $nv = trim($parts[0]); $eq = strpos($nv, '='); if ($eq === false) continue; $name = substr($nv, 0, $eq); $value = substr($nv, $eq + 1); if ($name !== '' && $value !== '') $cookieJar[$name] = $value; } }; $cookieHeader = static function (array $jar): ?string { if (!$jar) return null; $buf = []; foreach ($jar as $k => $v) $buf[] = "$k=$v"; return implode('; ', $buf); }; // page_url (Einbettungs-Seite) $embeddingUrl = is_string($cfg['page_url']) ? $cfg['page_url'] : null; $embeddingHost = null; if ($embeddingUrl && self::isValidHttpUrl($embeddingUrl)) { $ep = parse_url($embeddingUrl); $embeddingHost = strtolower($ep['host'] ?? ''); } // Sec-Fetch-Site dynamisch $fetchSite = 'cross-site'; if ( $embeddingHost && ($embeddingHost === $host || str_ends_with($host, '.' . $embeddingHost) || str_ends_with($embeddingHost, '.' . $host)) ) { $fetchSite = 'same-origin'; } foreach ($headers as $i => $h) { if (stripos($h, 'Sec-Fetch-Site:') === 0) $headers[$i] = 'Sec-Fetch-Site: ' . $fetchSite; } // Cookie-Probe auf page_url if ($embeddingUrl) { $probeHeaders = []; $chp = @curl_init($embeddingUrl); if ($chp) { $uaProbe = (string) ($uaList[0] ?? 'Mozilla/5.0'); $probeOpts = [ CURLOPT_NOBODY => false, // GET (HEAD liefert oft kein Set-Cookie) CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => 3, CURLOPT_CONNECTTIMEOUT => (int) $cfg['connect_timeout'], CURLOPT_TIMEOUT => (int) $cfg['timeout'], CURLOPT_USERAGENT => $uaProbe, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_HTTPHEADER => $headers, CURLOPT_HEADERFUNCTION => function ($ch, $hdr) use (&$probeHeaders) { $line = trim($hdr); if ($line !== '' && stripos($line, 'http/') !== 0) { $pos = strpos($line, ':'); if ($pos !== false) { $k = strtolower(trim(substr($line, 0, $pos))); $v = trim(substr($line, $pos + 1)); $probeHeaders[$k][] = $v; } } return strlen($hdr); }, CURLOPT_WRITEFUNCTION => function ($ch, $d) { return strlen($d); }, // nix ausgeben CURLOPT_RETURNTRANSFER => true, // **wichtig**: niemals in Output schreiben CURLOPT_ACCEPT_ENCODING => '', ]; $probeOpts[CURLOPT_REFERER] = $embeddingUrl; @curl_setopt_array($chp, $probeOpts); @curl_exec($chp); // Body wird verworfen, aber nicht ausgegeben @curl_close($chp); $collectCookie($probeHeaders); } } // Versuchsstrategie $attempts = max(1, (int) $cfg['retries']); $lastHttp = null; $lastCurlErr = null; $finalUrl = null; $mime = null; $received = 0; $httpVersSeq = (array) $cfg['try_http_versions']; $ipSeq = (array) $cfg['try_ip_resolve_combo']; for ($i = 0; $i < $attempts; $i++) { $ua = $uaList[$i % count($uaList)]; $httpVers = $httpVersSeq[$i % count($httpVersSeq)]; $ipPref = $ipSeq[$i % count($ipSeq)]; $tmp = tempnam(sys_get_temp_dir(), 'wlimg_'); if ($tmp === false) return self::fail(null, null, 0, 'Temp-Datei Fehler'); $fh = fopen($tmp, 'wb'); if ($fh === false) { @unlink($tmp); return self::fail(null, null, 0, 'Temp-Datei Fehler'); } $respHeaders = []; $headerFn = function ($ch, $hdr) use (&$respHeaders) { $line = trim($hdr); if ($line === '' || strpos($line, 'HTTP/') === 0) { } else { $pos = strpos($line, ':'); if ($pos !== false) { $k = strtolower(trim(substr($line, 0, $pos))); $v = trim(substr($line, $pos + 1)); $respHeaders[$k][] = $v; } } return strlen($hdr); }; $ch = curl_init($url); if ($ch === false) { fclose($fh); @unlink($tmp); return self::fail(null, null, 0, 'Download Fehler (init)'); } $received = 0; $peekBuf = ''; $peekLimit = max(0, (int) $cfg['debug_peek_bytes']); $writeFn = function ($ch, $data) use (&$received, $cfg, $fh, &$peekBuf, $peekLimit) { $len = strlen($data); if ($peekLimit > 0 && strlen($peekBuf) < $peekLimit) { $need = $peekLimit - strlen($peekBuf); $peekBuf .= substr($data, 0, max(0, min($need, $len))); } $received += $len; if ($received > (int) $cfg['max_bytes']) return 0; return fwrite($fh, $data); }; $opts = [ CURLOPT_FOLLOWLOCATION => true, CURLOPT_MAXREDIRS => (int) $cfg['max_redirects'], CURLOPT_CONNECTTIMEOUT => (int) $cfg['connect_timeout'], CURLOPT_TIMEOUT => (int) $cfg['timeout'], CURLOPT_USERAGENT => $ua, CURLOPT_SSL_VERIFYPEER => true, CURLOPT_SSL_VERIFYHOST => 2, CURLOPT_HTTPHEADER => $headers, CURLOPT_HEADERFUNCTION => $headerFn, CURLOPT_WRITEFUNCTION => $writeFn, CURLOPT_RETURNTRANSFER => true, // **wichtig**: niemals direkt ausgeben CURLOPT_FILE => $fh, CURLOPT_ACCEPT_ENCODING => '', ]; if ($httpVers === '2' && \defined('CURL_HTTP_VERSION_2_0')) { $opts[CURLOPT_HTTP_VERSION] = \constant('CURL_HTTP_VERSION_2_0'); } elseif ($httpVers === '1.1' && \defined('CURL_HTTP_VERSION_1_1')) { $opts[CURLOPT_HTTP_VERSION] = \constant('CURL_HTTP_VERSION_1_1'); } if ($ipPref === 'v4' && \defined('CURLOPT_IPRESOLVE') && \defined('CURL_IPRESOLVE_V4')) { $opts[CURLOPT_IPRESOLVE] = \constant('CURL_IPRESOLVE_V4'); } // Referer if ($embeddingUrl) $opts[CURLOPT_REFERER] = $embeddingUrl; elseif ($defaultReferer) $opts[CURLOPT_REFERER] = $defaultReferer; // Cookies $cookieStr = $cookieHeader($cookieJar); if ($cookieStr) $opts[CURLOPT_COOKIE] = $cookieStr; curl_setopt_array($ch, $opts); // Wichtig: exec nie echoen lassen @curl_exec($ch); $http = (int) curl_getinfo($ch, CURLINFO_HTTP_CODE); $ctype = (string) (curl_getinfo($ch, CURLINFO_CONTENT_TYPE) ?: ''); $finalUrl = (string) (curl_getinfo($ch, CURLINFO_EFFECTIVE_URL) ?: $url); $cerr = curl_errno($ch); $cerrStr = curl_error($ch); curl_close($ch); fclose($fh); $lastHttp = $http; $lastCurlErr = $cerrStr; // Debug if ($cfg['debug']) { $hdrLog = ''; foreach ($respHeaders as $k => $vals) foreach ($vals as $v) $hdrLog .= "$k: $v | "; $hdrLog = rtrim($hdrLog, ' |'); $peekHex = bin2hex($peekBuf); $peekTxt = preg_replace('/[^\x20-\x7E]/', '.', $peekBuf); error_log(sprintf( "%s try#%d http=%d cerr=%d:%s ua='%s' ref='%s' vers=%s ip=%s eff='%s' ctype='%s' bytes=%d hdrs={%s} peek_hex=%s peek_txt=%s url=%s", $cfg['log_prefix'], $i + 1, $http, $cerr, $cerrStr ?: '-', $ua, ($embeddingUrl ?: ($defaultReferer ?: '-')), $httpVers, $ipPref, $finalUrl, $ctype, $received, $hdrLog, substr($peekHex, 0, 160), substr($peekTxt, 0, 160), $url )); } // Größenlimit if ($cerr === (\defined('CURLE_WRITE_ERROR') ? \constant('CURLE_WRITE_ERROR') : 23) && $received > (int) $cfg['max_bytes']) { @unlink($tmp); error_log("{$cfg['log_prefix']} size limit hit after {$received} bytes url=$url"); return self::fail(null, null, 413, 'Bild zu groß'); } // Erfolg if ($http >= 200 && $http < 300) { if (stripos($ctype, 'image/') === 0) { $mime = $ctype; } else { $probe = @getimagesize($tmp); if ($probe !== false) $mime = $probe['mime'] ?? 'image/*'; } if ($mime) { return [ 'ok' => true, 'tmp_path' => $tmp, 'mime' => $mime, 'http_code' => $http, 'curl_err' => null, 'final_url' => $finalUrl, 'bytes' => $received, 'error' => null ]; } @unlink($tmp); } else { @unlink($tmp); // Cookies aus Antwort sammeln $collectCookie($respHeaders); // Cloudflare-Heuristik $cf403 = ($http === 403) && ( (isset($respHeaders['server']) && stripos(implode(',', $respHeaders['server']), 'cloudflare') !== false) || isset($respHeaders['set-cookie']) ); $doRetry = ($i + 1) < $attempts && ( in_array($http, self::$RETRY_HTTP, true) || in_array($cerr, self::retryCurlCodes(), true) || $http === 0 || $cf403 ); if (!$doRetry) break; $sleepMs = (int) ($cfg['retry_backoff_ms'] * (2 ** $i) + random_int(0, 150)); usleep($sleepMs * 1000); continue; } } // Fail-open? (z. B. Ikea hart geblockt) foreach ((array) $cfg['failopen_hosts'] as $fo) { $fo = strtolower($fo); if ($host === $fo || str_ends_with($host, '.' . $fo)) { return [ 'ok' => false, 'tmp_path' => null, 'mime' => null, 'http_code' => $lastHttp ?? 403, 'curl_err' => $lastCurlErr, 'final_url' => $url, 'bytes' => 0, 'error' => 'failopen', ]; } } return self::fail(null, $lastCurlErr, $lastHttp ?? 0, 'Bild-Download fehlgeschlagen'); } public static function safeFileNameFromUrl(string $url): string { $stripped = strtok($url, '?#'); $ext = strtolower(pathinfo((string) $stripped, PATHINFO_EXTENSION)); if (!preg_match('/^[a-z0-9]{1,5}$/i', $ext)) $ext = 'jpg'; return bin2hex(random_bytes(10)) . '.' . $ext; } private static function isValidHttpUrl(string $url): bool { if (!filter_var($url, FILTER_VALIDATE_URL)) return false; $p = parse_url($url); if (!$p || empty($p['scheme']) || empty($p['host'])) return false; $s = strtolower($p['scheme']); return $s === 'http' || $s === 'https'; } private static function originFromUrl(string $url): string { $p = parse_url($url); if (!$p || empty($p['scheme']) || empty($p['host'])) return ''; $port = ''; if (!empty($p['port'])) { $def = $p['scheme'] === 'https' ? 443 : 80; if ((int) $p['port'] !== $def) $port = ':' . $p['port']; } return $p['scheme'] . '://' . $p['host'] . $port . '/'; } private static function hostResolvesPublic(string $host): bool { $recs = @dns_get_record($host, DNS_A + DNS_AAAA); if (!$recs || !count($recs)) return false; foreach ($recs as $r) { $ip = $r['type'] === 'A' ? ($r['ip'] ?? null) : ($r['ipv6'] ?? null); if (!$ip) continue; if (self::isPrivateIp($ip)) return false; } return true; } private static function isPrivateIp(string $ip): bool { if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { $cidrs = ['10.0.0.0/8', '172.16.0.0/12', '192.168.0.0/16', '127.0.0.0/8', '169.254.0.0/16']; } elseif (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { $cidrs = ['::1/128', 'fc00::/7', 'fe80::/10']; } else return true; foreach ($cidrs as $c) if (self::ipInCidr($ip, $c)) return true; return false; } private static function ipInCidr(string $ip, string $cidr): bool { if (strpos($cidr, ':') !== false) { [$subnet, $mask] = array_pad(explode('/', $cidr, 2), 2, null); $mask = (int) $mask; $binIp = inet_pton($ip); $binSubnet = inet_pton($subnet); if ($binIp === false || $binSubnet === false) return false; $bytes = intdiv($mask, 8); $bits = $mask % 8; if ($bytes && substr($binIp, 0, $bytes) !== substr($binSubnet, 0, $bytes)) return false; if ($bits) { $b1 = ord($binIp[$bytes]) & (0xFF << (8 - $bits)); $b2 = ord($binSubnet[$bytes]) & (0xFF << (8 - $bits)); return $b1 === $b2; } return true; } else { [$subnet, $mask] = array_pad(explode('/', $cidr, 2), 2, null); $mask = (int) $mask; $ipL = ip2long($ip); $subL = ip2long($subnet); if ($ipL === false || $subL === false) return false; $maskL = -1 << (32 - $mask); return (($ipL & $maskL) === ($subL & $maskL)); } } private static function fail(?string $tmp, ?string $cerr, int $http, string $msg): array { if ($tmp && is_file($tmp)) @unlink($tmp); return [ 'ok' => false, 'tmp_path' => null, 'mime' => null, 'http_code' => $http > 0 ? $http : null, 'curl_err' => $cerr, 'final_url' => null, 'bytes' => 0, 'error' => $msg, ]; } }