418 lines
12 KiB
PHP
418 lines
12 KiB
PHP
<?php
|
|
|
|
namespace App\Models;
|
|
|
|
use App\Classes\Bundle;
|
|
use App\Exceptions\PartnerCannotBeFound;
|
|
use App\Services\Partners\Facades\Partner as PartnerFactory;
|
|
use DOMXPath;
|
|
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
|
use Illuminate\Database\Eloquent\Model;
|
|
use Illuminate\Http\Client\ConnectionException;
|
|
use Illuminate\Support\Facades\Http;
|
|
use Illuminate\Support\Facades\Storage;
|
|
use Illuminate\Support\Str;
|
|
use League\CommonMark\Util\HtmlElement;
|
|
use League\Uri\Uri;
|
|
use Masterminds\HTML5;
|
|
|
|
class Url extends Model
|
|
{
|
|
use HasFactory;
|
|
|
|
public static function from(string $url, Bundle $bundle): self
|
|
{
|
|
$url = (string) Uri::new($url);
|
|
$model = Url::where('initial_url', '=', $url)->first();
|
|
|
|
if (empty($model->id)) {
|
|
$model = new Url();
|
|
|
|
$model->init($url);
|
|
$model->check();
|
|
}
|
|
|
|
$model->ensureBundleIsAssociated($bundle);
|
|
|
|
$model->save();
|
|
|
|
return $model;
|
|
}
|
|
|
|
/**
|
|
* Return the link as a HtmlElement
|
|
*/
|
|
public function toHtmlElement(?string $innerHtml = null): HtmlElement
|
|
{
|
|
$finalUrl = $this->final_url;
|
|
|
|
if (Str::startsWith($finalUrl, '/') && !$this->is_fragment && app()->runningInConsole()) {
|
|
$finalUrl = env('APP_URL') . $finalUrl;
|
|
}
|
|
|
|
return new HtmlElement('a', [
|
|
'href' => $finalUrl,
|
|
'rel' => $this->rel ?? '',
|
|
'title' => $this->link_title ?? '',
|
|
'class' => $this->css_class ?? '',
|
|
], $innerHtml ?? $this->initial_url);
|
|
}
|
|
|
|
/**
|
|
* Check/update URL data, optionally ignoring last check date
|
|
*/
|
|
public function check(bool $force = false): bool
|
|
{
|
|
// We add a random delay between checkings so we won't update all links
|
|
// at the same time
|
|
if (
|
|
!empty($this->checked_at)
|
|
&& $this->checked_at->gt(now()->subMonth()->addDays(rand(-4, 4)))
|
|
&& !$force
|
|
) {
|
|
return false;
|
|
}
|
|
|
|
if ($this->is_fragment) {
|
|
return false;
|
|
}
|
|
|
|
if ($this->is_external) {
|
|
$this->checkExternal();
|
|
} else {
|
|
$this->checkInternal();
|
|
}
|
|
|
|
$this->setCssClass();
|
|
|
|
$changed = $this->isDirty();
|
|
|
|
$this->checked_at = now();
|
|
|
|
return $changed;
|
|
}
|
|
|
|
/**
|
|
* Gives a list of bundle where the URL can be found. Initial list is the
|
|
* one the URL was created with, minus the ones the URL was removed from
|
|
* later.
|
|
*/
|
|
public function bundlesReferencingUrl(): array
|
|
{
|
|
$bundles = $this->bundles ?? [];
|
|
|
|
foreach ($bundles as $index => $path) {
|
|
$bundle = new Bundle($path, Storage::disk(env('CONTENT_DISK')));
|
|
$result = $bundle->render(true);
|
|
|
|
foreach ($result as $path => $content) {
|
|
$contained = Str::contains($content, [
|
|
$this->initial_url,
|
|
$this->partner_url,
|
|
$this->final_url,
|
|
]);
|
|
|
|
if (!$contained) {
|
|
unset($bundles[$index]);
|
|
}
|
|
}
|
|
}
|
|
|
|
return $bundles;
|
|
}
|
|
|
|
/**
|
|
* Get the attributes that should be cast.
|
|
*
|
|
* @return array<string, string>
|
|
*/
|
|
protected function casts(): array
|
|
{
|
|
return [
|
|
'bundles' => 'array',
|
|
'is_fragment' => 'boolean',
|
|
'is_external' => 'boolean',
|
|
'is_dead' => 'boolean',
|
|
'dead_since' => 'datetime',
|
|
'checked_at' => 'datetime',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Check the URL as an external website
|
|
*/
|
|
private function checkExternal()
|
|
{
|
|
$host = parse_url($this->base_url, PHP_URL_HOST);
|
|
$initialUrl = $this->initial_url;
|
|
|
|
$options = [
|
|
'verify' => false,
|
|
'version' => 2.0,
|
|
];
|
|
|
|
Http::macro('checkUrl', function () use ($options, $host, $initialUrl) {
|
|
return Http::timeout(10)
|
|
->withOptions($options)
|
|
->withHeaders([
|
|
'Host' => $host,
|
|
'Referer' => $initialUrl,
|
|
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
|
'Accept-Encoding' => 'gzip, deflate, br',
|
|
'Accept-Language' => 'fr-FR,fr;q=0.9',
|
|
'Connection' => 'keep-alive',
|
|
'Sec-Fetch-Dest' => 'document',
|
|
'Sec-Fetch-Mode' => 'navigate',
|
|
'Sec-Fetch-Site' => 'same-origin',
|
|
])
|
|
->accept('text/html,application/xhtml+xml,application/xml,application/pdf;q=0.9,*/*;q=0.8');
|
|
});
|
|
|
|
foreach (['head', 'get'] as $method) {
|
|
try {
|
|
$response = Http::checkUrl()->{$method}($this->base_url);
|
|
} catch (ConnectionException $ex) {
|
|
$this->is_dead = true;
|
|
$this->dead_reason = $ex->getMessage();
|
|
|
|
if (empty($this->dead_since)) {
|
|
$this->dead_since = now();
|
|
}
|
|
|
|
$this->setTitleForExternalUrl(null);
|
|
|
|
continue;
|
|
}
|
|
|
|
if ($response->successful() || $response->status() === 403) {
|
|
$this->is_dead = false;
|
|
$this->dead_reason = null;
|
|
$this->dead_since = null;
|
|
|
|
if (empty($this->partner_url)) {
|
|
// Only set the final url if we didn't already set a partner
|
|
// url as the partner url has precedence
|
|
$this->final_url = (string) $response->effectiveUri();
|
|
}
|
|
|
|
$this->setTitleForExternalUrl($response->body());
|
|
|
|
break;
|
|
} else {
|
|
$this->is_dead = true;
|
|
$this->dead_reason = sprintf('%d - %s', $response->status(), $response->reason());
|
|
|
|
if (empty($this->dead_since)) {
|
|
$this->dead_since = now();
|
|
}
|
|
|
|
$this->setTitleForExternalUrl($response->body());
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check the URL as an internal bundle
|
|
*/
|
|
private function checkInternal()
|
|
{
|
|
$bundle = new Bundle($this->base_url, Storage::disk(env('CONTENT_DISK')));
|
|
|
|
$this->is_dead = !$bundle->exists();
|
|
|
|
if ($this->is_dead) {
|
|
if (empty($this->dead_since)) {
|
|
$this->dead_since = now();
|
|
}
|
|
} else {
|
|
$this->page_title = $bundle->getArticleTitle();
|
|
}
|
|
|
|
$this->setTitleForInternalUrl($bundle);
|
|
}
|
|
|
|
/**
|
|
* Calculate the title as it will be shown in the final HTML link, based on
|
|
* internal bundle data
|
|
*/
|
|
private function setTitleForInternalUrl(Bundle $bundle)
|
|
{
|
|
$section = $bundle->getSection();
|
|
$sectionName = $section ? $section->getArticleTitle() : null;
|
|
|
|
$titleParts = [
|
|
'Lien interne :',
|
|
];
|
|
|
|
if (!empty($sectionName)) {
|
|
$titleParts[] = sprintf('[%s]', $sectionName);
|
|
}
|
|
|
|
$titleParts[] = $this->page_title;
|
|
|
|
if (!empty($this->dead_since)) {
|
|
$titleParts[] = sprintf('(mort depuis le %s)', $this->dead_since->format('d/m/Y'));
|
|
}
|
|
|
|
$titleParts[] = sprintf('(vérifié le %s)', ($this->checked_at ?? now())->format('d/m/Y'));
|
|
|
|
$this->link_title = implode(' ', $titleParts);
|
|
}
|
|
|
|
/**
|
|
* Calculate the title as it will be shown in the final HTML link, based on
|
|
* external url data
|
|
*/
|
|
private function setTitleForExternalUrl(?string $body = null)
|
|
{
|
|
$titleParts = [
|
|
'Lien',
|
|
];
|
|
|
|
if (!empty($this->partner_url)) {
|
|
$titleParts[] = 'affilié';
|
|
} else {
|
|
$titleParts[] = 'externe';
|
|
}
|
|
|
|
$titleParts[] = ':';
|
|
|
|
$title = parse_url($this->initial_url, PHP_URL_HOST);
|
|
|
|
if (!empty($body)) {
|
|
$html5 = new HTML5([
|
|
// Required to use xpath, see
|
|
// https://github.com/Masterminds/html5-php/issues/123
|
|
'disable_html_ns' => true,
|
|
]);
|
|
|
|
$dom = $html5->loadHTML($body);
|
|
|
|
$xpath = new DOMXPath($dom);
|
|
$nodes = $xpath->query('//title');
|
|
|
|
foreach ($nodes as $node) {
|
|
$content = Str::trim($node->nodeValue);
|
|
|
|
if (!empty($content)) {
|
|
$title = $content;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (Str::length($title) > 150) {
|
|
$title = Str::limit($title, 150);
|
|
}
|
|
|
|
$titleParts[] = $title;
|
|
|
|
if (!empty($this->dead_since)) {
|
|
$titleParts[] = sprintf(
|
|
'(mort depuis le %s : %s)',
|
|
$this->dead_since->format('d/m/Y'),
|
|
Str::limit($this->dead_reason, 100)
|
|
);
|
|
}
|
|
|
|
$titleParts[] = sprintf('(vérifié le %s)', ($this->checked_at ?? now())->format('d/m/Y'));
|
|
|
|
$this->link_title = implode(' ', $titleParts);
|
|
}
|
|
|
|
/**
|
|
* Set the appropriate CSS class
|
|
*/
|
|
private function setCssClass()
|
|
{
|
|
$classes = explode(' ', $this->css_class ?? '');
|
|
|
|
if ($this->is_dead) {
|
|
if (!in_array('dead', $classes)) {
|
|
$classes[] = 'dead';
|
|
}
|
|
} else {
|
|
$classes = array_filter($classes, fn ($item) => $item !== 'dead');
|
|
}
|
|
|
|
$this->css_class = implode(' ', $classes);
|
|
}
|
|
|
|
/**
|
|
* Touch related bundles to reflect changes, but only if necessary. Bundles
|
|
* are not touched when Url is not dirty or if it was not saved yet.
|
|
*/
|
|
private function touchBundles()
|
|
{
|
|
if (!$this->isDirty() || empty($this->id)) {
|
|
return;
|
|
}
|
|
|
|
foreach ($this->bundles as $bundlePath) {
|
|
$bundle = new Bundle($bundlePath, Storage::disk(env('CONTENT_DISK')));
|
|
|
|
$bundle->touch();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Associate a bundle with the model
|
|
*/
|
|
private function ensureBundleIsAssociated(Bundle $bundle)
|
|
{
|
|
$bundles = $this->bundles ?? [];
|
|
|
|
if (!in_array($bundle->getPath(), $bundles)) {
|
|
$bundles[] = $bundle->getPath();
|
|
|
|
$this->bundles = $bundles;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initialise some properties from specified url
|
|
*/
|
|
private function init(string $url)
|
|
{
|
|
$this->initial_url = $url;
|
|
$this->is_fragment = Str::startsWith($url, '#');
|
|
$this->base_url = (string) Uri::new($this->initial_url)->withFragment(null);
|
|
|
|
$this->is_external = Str::startsWith($url, [
|
|
'http://',
|
|
'https://',
|
|
]) && !Str::contains($url, [
|
|
'//localhost',
|
|
'//' . env('APP_URL'),
|
|
]);
|
|
|
|
if ($this->is_external) {
|
|
$this->css_class = 'external';
|
|
$this->rel = 'nofollow noreferrer noopener';
|
|
|
|
try {
|
|
$partner = PartnerFactory::getPartner($url);
|
|
} catch (PartnerCannotBeFound $ex) {
|
|
$partner = null;
|
|
}
|
|
|
|
$this->partner = $partner ? get_class($partner) : null;
|
|
|
|
if (!empty($partner)) {
|
|
$this->css_class .= ' affiliate';
|
|
|
|
$this->rel = 'nofollow';
|
|
$this->partner_url = $partner->getAffiliateLink();
|
|
|
|
if (!empty($this->partner_url)) {
|
|
$this->final_url = $this->partner_url;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (empty($this->final_url)) {
|
|
$this->final_url = $this->initial_url;
|
|
}
|
|
}
|
|
}
|