first(); if (empty($model->id)) { $model = new Url(); $model->init($url); $model->check(); } $model->ensureBundleIsAssociated($bundle); $model->save(); return $model; } /** * Return the link as a HtmlElement */ public function toHtmlElement(?string $innerHtml = null): HtmlElement { $finalUrl = $this->final_url; if (Str::startsWith($finalUrl, '/') && !$this->is_fragment && app()->runningInConsole()) { $finalUrl = env('APP_URL') . $finalUrl; } return new HtmlElement('a', [ 'href' => $finalUrl, 'rel' => $this->rel ?? '', 'title' => $this->link_title ?? '', 'class' => $this->css_class ?? '', ], $innerHtml ?? $this->initial_url); } /** * Check/update URL data, optionally ignoring last check date */ public function check(bool $force = false): bool { // We add a random delay between checkings so we won't update all links // at the same time if ( !empty($this->checked_at) && $this->checked_at->gt(now()->subMonth()->addDays(rand(-4, 4))) && !$force ) { return false; } if ($this->is_fragment) { return false; } if ($this->is_external) { $this->checkExternal(); } else { $this->checkInternal(); } $this->setCssClass(); $changed = $this->isDirty(); $this->checked_at = now(); return $changed; } /** * Gives a list of bundle where the URL can be found. Initial list is the * one the URL was created with, minus the ones the URL was removed from * later. */ public function bundlesReferencingUrl(): array { $bundles = $this->bundles ?? []; foreach ($bundles as $index => $path) { $bundle = new Bundle($path, Storage::disk(env('CONTENT_DISK'))); $result = $bundle->render(true); foreach ($result as $path => $content) { $contained = Str::contains($content, [ $this->initial_url, $this->partner_url, $this->final_url, ]); if (!$contained) { unset($bundles[$index]); } } } return $bundles; } /** * Get the attributes that should be cast. * * @return array */ protected function casts(): array { return [ 'bundles' => 'array', 'is_fragment' => 'boolean', 'is_external' => 'boolean', 'is_dead' => 'boolean', 'dead_since' => 'datetime', 'checked_at' => 'datetime', ]; } /** * Check the URL as an external website */ private function checkExternal() { $host = parse_url($this->base_url, PHP_URL_HOST); $initialUrl = $this->initial_url; $options = [ 'verify' => false, 'version' => 2.0, ]; Http::macro('checkUrl', function () use ($options, $host, $initialUrl) { return Http::timeout(10) ->withOptions($options) ->withHeaders([ 'Host' => $host, 'Referer' => $initialUrl, 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', 'Accept-Encoding' => 'gzip, deflate, br', 'Accept-Language' => 'fr-FR,fr;q=0.9', 'Connection' => 'keep-alive', 'Sec-Fetch-Dest' => 'document', 'Sec-Fetch-Mode' => 'navigate', 'Sec-Fetch-Site' => 'same-origin', ]) ->accept('text/html,application/xhtml+xml,application/xml,application/pdf;q=0.9,*/*;q=0.8'); }); foreach (['head', 'get'] as $method) { try { $response = Http::checkUrl()->{$method}($this->base_url); } catch (ConnectionException $ex) { $this->is_dead = true; $this->dead_reason = $ex->getMessage(); if (empty($this->dead_since)) { $this->dead_since = now(); } $this->setTitleForExternalUrl(null); continue; } if ($response->successful() || $response->status() === 403) { $this->is_dead = false; $this->dead_reason = null; $this->dead_since = null; if (empty($this->partner_url)) { // Only set the final url if we didn't already set a partner // url as the partner url has precedence $this->final_url = (string) $response->effectiveUri(); } $this->setTitleForExternalUrl($response->body()); break; } else { $this->is_dead = true; $this->dead_reason = sprintf('%d - %s', $response->status(), $response->reason()); if (empty($this->dead_since)) { $this->dead_since = now(); } $this->setTitleForExternalUrl($response->body()); } } } /** * Check the URL as an internal bundle */ private function checkInternal() { $bundle = new Bundle($this->base_url, Storage::disk(env('CONTENT_DISK'))); $this->is_dead = !$bundle->exists(); if ($this->is_dead) { if (empty($this->dead_since)) { $this->dead_since = now(); } } else { $this->page_title = $bundle->getArticleTitle(); } $this->setTitleForInternalUrl($bundle); } /** * Calculate the title as it will be shown in the final HTML link, based on * internal bundle data */ private function setTitleForInternalUrl(Bundle $bundle) { $section = $bundle->getSection(); $sectionName = $section ? $section->getArticleTitle() : null; $titleParts = [ 'Lien interne :', ]; if (!empty($sectionName)) { $titleParts[] = sprintf('[%s]', $sectionName); } $titleParts[] = $this->page_title; if (!empty($this->dead_since)) { $titleParts[] = sprintf('(mort depuis le %s)', $this->dead_since->format('d/m/Y')); } $titleParts[] = sprintf('(vérifié le %s)', ($this->checked_at ?? now())->format('d/m/Y')); $this->link_title = implode(' ', $titleParts); } /** * Calculate the title as it will be shown in the final HTML link, based on * external url data */ private function setTitleForExternalUrl(?string $body = null) { $titleParts = [ 'Lien', ]; if (!empty($this->partner_url)) { $titleParts[] = 'affilié'; } else { $titleParts[] = 'externe'; } $titleParts[] = ':'; $title = parse_url($this->initial_url, PHP_URL_HOST); if (!empty($body)) { $html5 = new HTML5([ // Required to use xpath, see // https://github.com/Masterminds/html5-php/issues/123 'disable_html_ns' => true, ]); $dom = $html5->loadHTML($body); $xpath = new DOMXPath($dom); $nodes = $xpath->query('//title'); foreach ($nodes as $node) { $content = Str::trim($node->nodeValue); if (!empty($content)) { $title = $content; break; } } } if (Str::length($title) > 150) { $title = Str::limit($title, 150); } $titleParts[] = $title; if (!empty($this->dead_since)) { $titleParts[] = sprintf( '(mort depuis le %s : %s)', $this->dead_since->format('d/m/Y'), Str::limit($this->dead_reason, 100) ); } $titleParts[] = sprintf('(vérifié le %s)', ($this->checked_at ?? now())->format('d/m/Y')); $this->link_title = implode(' ', $titleParts); } /** * Set the appropriate CSS class */ private function setCssClass() { $classes = explode(' ', $this->css_class ?? ''); if ($this->is_dead) { if (!in_array('dead', $classes)) { $classes[] = 'dead'; } } else { $classes = array_filter($classes, fn ($item) => $item !== 'dead'); } $this->css_class = implode(' ', $classes); } /** * Touch related bundles to reflect changes, but only if necessary. Bundles * are not touched when Url is not dirty or if it was not saved yet. */ private function touchBundles() { if (!$this->isDirty() || empty($this->id)) { return; } foreach ($this->bundles as $bundlePath) { $bundle = new Bundle($bundlePath, Storage::disk(env('CONTENT_DISK'))); $bundle->touch(); } } /** * Associate a bundle with the model */ private function ensureBundleIsAssociated(Bundle $bundle) { $bundles = $this->bundles ?? []; if (!in_array($bundle->getPath(), $bundles)) { $bundles[] = $bundle->getPath(); $this->bundles = $bundles; } } /** * Initialise some properties from specified url */ private function init(string $url) { $this->initial_url = $url; $this->is_fragment = Str::startsWith($url, '#'); $this->base_url = (string) Uri::new($this->initial_url)->withFragment(null); $this->is_external = Str::startsWith($url, [ 'http://', 'https://', ]) && !Str::contains($url, [ '//localhost', '//' . env('APP_URL'), ]); if ($this->is_external) { $this->css_class = 'external'; $this->rel = 'nofollow noreferrer noopener'; try { $partner = PartnerFactory::getPartner($url); } catch (PartnerCannotBeFound $ex) { $partner = null; } $this->partner = $partner ? get_class($partner) : null; if (!empty($partner)) { $this->css_class .= ' affiliate'; $this->rel = 'nofollow'; $this->partner_url = $partner->getAffiliateLink(); if (!empty($this->partner_url)) { $this->final_url = $this->partner_url; } } } if (empty($this->final_url)) { $this->final_url = $this->initial_url; } } }