1
0
cms11/app/Models/Url.php

408 lines
11 KiB
PHP

<?php
namespace App\Models;
use App\Classes\Bundle;
use App\Exceptions\PartnerCannotBeFound;
use App\Services\Partners\Facades\Partner as PartnerFactory;
use DOMXPath;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Http\Client\ConnectionException;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use League\CommonMark\Util\HtmlElement;
use League\Uri\Uri;
use Masterminds\HTML5;
class Url extends Model
{
use HasFactory;
public static function from(string $url, Bundle $bundle): self
{
$url = (string) Uri::new($url);
$model = Url::where('initial_url', '=', $url)->first();
if (empty($model->id)) {
$model = new Url();
$model->init($url);
$model->check();
}
$model->ensureBundleIsAssociated($bundle);
$model->save();
return $model;
}
/**
* Return the link as a HtmlElement
*/
public function toHtmlElement(?string $innerHtml = null): HtmlElement
{
$finalUrl = $this->final_url;
if (Str::startsWith($finalUrl, '/') && !$this->is_fragment && app()->runningInConsole()) {
$finalUrl = env('APP_URL') . $finalUrl;
}
return new HtmlElement('a', [
'href' => $finalUrl,
'rel' => $this->rel ?? '',
'title' => $this->link_title ?? '',
'class' => $this->css_class ?? '',
], $innerHtml ?? $this->initial_url);
}
/**
* Check/update URL data, optionally ignoring last check date
*/
public function check(bool $force = false): bool
{
// We add a random delay between checkings so we won't update all links
// at the same time
if (
!empty($this->checked_at)
&& $this->checked_at->gt(now()->subMonth()->addDays(rand(-4, 4)))
&& !$force
) {
return false;
}
if ($this->is_fragment) {
return false;
}
if ($this->is_external) {
$this->checkExternal();
} else {
$this->checkInternal();
}
$this->setCssClass();
$changed = $this->isDirty();
$this->checked_at = now();
return $changed;
}
/**
* Gives a list of bundle where the URL can be found. Initial list is the
* one the URL was created with, minus the ones the URL was removed from
* later.
*/
public function bundlesReferencingUrl(): array
{
$bundles = $this->bundles ?? [];
foreach ($bundles as $index => $path) {
$bundle = new Bundle($path, Storage::disk(env('CONTENT_DISK')));
$result = $bundle->render(true);
foreach ($result as $path => $content) {
$contained = Str::contains($content, [
$this->initial_url,
$this->partner_url,
$this->final_url,
]);
if (!$contained) {
unset($bundles[$index]);
}
}
}
return $bundles;
}
/**
* Get the attributes that should be cast.
*
* @return array<string, string>
*/
protected function casts(): array
{
return [
'bundles' => 'array',
'is_fragment' => 'boolean',
'is_external' => 'boolean',
'is_dead' => 'boolean',
'dead_since' => 'datetime',
'checked_at' => 'datetime',
];
}
/**
* Check the URL as an external website
*/
private function checkExternal()
{
$host = parse_url($this->base_url, PHP_URL_HOST);
$initialUrl = $this->initial_url;
$options = [
'verify' => false,
'version' => 2.0,
];
Http::macro('checkUrl', function () use ($options, $host, $initialUrl) {
return Http::timeout(10)
->withOptions($options)
->withHeaders([
'Host' => $host,
'Referer' => $initialUrl,
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
'Accept-Encoding' => 'gzip, deflate, br',
'Accept-Language' => 'fr-FR,fr;q=0.9',
'Connection' => 'keep-alive',
'Sec-Fetch-Dest' => 'document',
'Sec-Fetch-Mode' => 'navigate',
'Sec-Fetch-Site' => 'same-origin',
])
->accept('text/html,application/xhtml+xml,application/xml,application/pdf;q=0.9,*/*;q=0.8');
});
try {
$response = Http::checkUrl()->get($this->base_url);
} catch (ConnectionException $ex) {
$this->is_dead = true;
$this->dead_reason = $ex->getMessage();
if (empty($this->dead_since)) {
$this->dead_since = now();
}
$this->setTitleForExternalUrl(null);
return;
}
if ($response->successful() || $response->status() === 403) {
$this->is_dead = false;
$this->dead_reason = null;
$this->dead_since = null;
if (empty($this->partner_url)) {
// Only set the final url if we didn't already set a partner
// url as the partner url has precedence
$this->final_url = (string) $response->effectiveUri();
}
} else {
$this->is_dead = true;
$this->dead_reason = sprintf('%d - %s', $response->status(), $response->reason());
if (empty($this->dead_since)) {
$this->dead_since = now();
}
}
$this->setTitleForExternalUrl($response->body());
}
/**
* Check the URL as an internal bundle
*/
private function checkInternal()
{
$bundle = new Bundle($this->base_url, Storage::disk(env('CONTENT_DISK')));
$this->is_dead = !$bundle->exists();
if ($this->is_dead) {
if (empty($this->dead_since)) {
$this->dead_since = now();
}
} else {
$this->page_title = $bundle->getArticleTitle();
}
$this->setTitleForInternalUrl($bundle);
}
/**
* Calculate the title as it will be shown in the final HTML link, based on
* internal bundle data
*/
private function setTitleForInternalUrl(Bundle $bundle)
{
$section = $bundle->getSection();
$sectionName = $section ? $section->getArticleTitle() : null;
$titleParts = [
'Lien interne :',
];
if (!empty($sectionName)) {
$titleParts[] = sprintf('[%s]', $sectionName);
}
$titleParts[] = $this->page_title;
if (!empty($this->dead_since)) {
$titleParts[] = sprintf('(mort depuis le %s)', $this->dead_since->format('d/m/Y'));
}
$titleParts[] = sprintf('(vérifié le %s)', ($this->checked_at ?? now())->format('d/m/Y'));
$this->link_title = implode(' ', $titleParts);
}
/**
* Calculate the title as it will be shown in the final HTML link, based on
* external url data
*/
private function setTitleForExternalUrl(?string $body = null)
{
$titleParts = [
'Lien',
];
if (!empty($this->partner_url)) {
$titleParts[] = 'affilié';
} else {
$titleParts[] = 'externe';
}
$titleParts[] = ':';
if (!empty($body)) {
$html5 = new HTML5([
// Required to use xpath, see
// https://github.com/Masterminds/html5-php/issues/123
'disable_html_ns' => true,
]);
$dom = $html5->loadHTML($body);
$xpath = new DOMXPath($dom);
$nodes = $xpath->query('//head/title');
foreach ($nodes as $node) {
$content = Str::trim($node->nodeValue);
if (!empty($content)) {
$this->page_title = Str::limit($content, 150);
break;
}
}
}
$title = $this->page_title ?? parse_url($this->initial_url, PHP_URL_HOST);
$titleParts[] = $title;
if (!empty($this->dead_since)) {
$titleParts[] = sprintf(
'(mort depuis le %s : %s)',
$this->dead_since->format('d/m/Y'),
Str::limit($this->dead_reason, 100)
);
}
$titleParts[] = sprintf('(vérifié le %s)', ($this->checked_at ?? now())->format('d/m/Y'));
$this->link_title = implode(' ', $titleParts);
}
/**
* Set the appropriate CSS class
*/
private function setCssClass()
{
$classes = explode(' ', $this->css_class ?? '');
if ($this->is_dead) {
if (!in_array('dead', $classes)) {
$classes[] = 'dead';
}
} else {
$classes = array_filter($classes, fn ($item) => $item !== 'dead');
}
$this->css_class = implode(' ', $classes);
}
/**
* Touch related bundles to reflect changes, but only if necessary. Bundles
* are not touched when Url is not dirty or if it was not saved yet.
*/
private function touchBundles()
{
if (!$this->isDirty() || empty($this->id)) {
return;
}
foreach ($this->bundles as $bundlePath) {
$bundle = new Bundle($bundlePath, Storage::disk(env('CONTENT_DISK')));
$bundle->touch();
}
}
/**
* Associate a bundle with the model
*/
private function ensureBundleIsAssociated(Bundle $bundle)
{
$bundles = $this->bundles ?? [];
if (!in_array($bundle->getPath(), $bundles)) {
$bundles[] = $bundle->getPath();
$this->bundles = $bundles;
}
}
/**
* Initialise some properties from specified url
*/
private function init(string $url)
{
$this->initial_url = $url;
$this->is_fragment = Str::startsWith($url, '#');
$this->base_url = (string) Uri::new($this->initial_url)->withFragment(null);
$this->is_external = Str::startsWith($url, [
'http://',
'https://',
]) && !Str::contains($url, [
'//localhost',
'//' . env('APP_URL'),
]);
if ($this->is_external) {
$this->css_class = 'external';
$this->rel = 'nofollow noreferrer noopener';
try {
$partner = PartnerFactory::getPartner($url);
} catch (PartnerCannotBeFound $ex) {
$partner = null;
}
$this->partner = $partner ? get_class($partner) : null;
if (!empty($partner)) {
$this->css_class .= ' affiliate';
$this->rel = 'nofollow';
$this->partner_url = $partner->getAffiliateLink();
if (!empty($this->partner_url)) {
$this->final_url = $this->partner_url;
}
}
}
if (empty($this->final_url)) {
$this->final_url = $this->initial_url;
}
}
}