richard
/
cyca
Archived
1
0
Fork 0
This repository has been archived on 2024-05-04. You can view files and clone it, but cannot push or open issues or pull requests.
cyca/app/Models/Traits/Document/AnalysesDocument.php

227 lines
6.3 KiB
PHP
Executable File

<?php
namespace App\Models\Traits\Document;
use App\Models\Bookmark;
use App\Models\Feed;
use Illuminate\Support\Facades\Http;
use SimplePie;
use Storage;
trait AnalysesDocument
{
// -------------------------------------------------------------------------
// ----| Properties |-------------------------------------------------------
// -------------------------------------------------------------------------
/**
* Provides temporary access to response to analyzers.
*
* @var \Illuminate\Http\Client\Response
*/
protected $response;
/**
* Provides temporary access to document's body to analyzers.
*
* @var string
*/
private $body;
// -------------------------------------------------------------------------
// ----| Methods |----------------------------------------------------------
// -------------------------------------------------------------------------
/**
* Begin document analysis.
*/
public function analyze()
{
// Don't bother if document isn't bookmarked anymore
if ($this->isOrphan() && $this->wasOrphanFor(config('cyca.maxOrphanAge.document'))) {
$this->delete();
return;
}
$this->fetchContent();
if (empty($this->response)) {
$this->checked_at = now();
$this->save();
return;
}
if ($homepage = $this->isFeed()) {
$this->convertToFeed($homepage);
return;
}
if ($this->existingDocumentsMerged()) {
return;
}
$this->runAnalyzers();
$this->checked_at = now();
$this->save();
}
/**
* Copy content of resource at document's URL in "local" storage.
*/
protected function fetchContent()
{
$storageRoot = $this->getStoragePath();
$bodyFilename = $storageRoot.'/body';
$responseFilename = $storageRoot.'/response.json';
$debugFilename = $storageRoot.'/debug';
Storage::put($debugFilename, null);
# $debugStream = fopen(storage_path('app/'.$debugFilename), 'w');
#
try {
$this->response = Http::withOptions(array_merge([
# 'debug' => env('APP_DEBUG') ? $debugStream : false,
], config('http_client')))->timeout(30)->get($this->url);
$this->body = $this->response->body();
} catch (\Exception $ex) {
report($ex);
# } finally {
# fclose($debugStream);
}
if (!$this->response) {
return;
}
$psrResponse = $this->response->toPsrResponse();
$responseData = [
'headers' => $this->response->headers(),
'protocol_version' => $psrResponse->getProtocolVersion(),
'response' => $this->response,
];
Storage::put($responseFilename, json_encode($responseData));
if ($this->response->ok()) {
Storage::put($bodyFilename, $this->body);
$this->mimetype = Storage::mimetype($bodyFilename);
}
$this->http_status_code = $this->response->status();
$this->http_status_text = $this->response->getReasonPhrase();
}
/**
* Quickly determine if document is, in fact, a feed, and return
* corresponding home page URL.
*
* @return bool|string Return feed's home page if really a feed, false otherwise
*/
protected function isFeed()
{
$client = new SimplePie();
$client->enable_cache(false);
$client->set_raw_data($this->body);
if ($client->init()) {
return urldecode($client->get_permalink());
}
return false;
}
/**
* Transform this document into a feed, by creating or using an existing
* document with provided homepage URL, creating or using an existing feed
* with current document's URL, linking both, updating any references to
* this document to point to the new document, and finally deleting this
* document.
*
* @param string $homepage
*/
protected function convertToFeed($homepage)
{
$document = self::firstOrCreate(['url' => $homepage]);
$feed = Feed::firstOrCreate(['url' => $this->url]);
if (!$document->feeds()->find($feed->id)) {
$document->feeds()->attach($feed);
}
Bookmark::where('document_id', $this->id)->update(['document_id' => $document->id, 'initial_url' => $homepage]);
$this->delete();
}
/**
* Find another document having the same real URL. If one is found, we will
* update all bookmarks to use the oldest document and delete this one.
*
* Returns true if documents have been merged.
*
* @return bool
*/
protected function existingDocumentsMerged()
{
$realUrl = urldecode((string) $this->response->effectiveUri());
if ($realUrl !== $this->url) {
$document = self::where('url', $realUrl)->first();
if ($document) {
Bookmark::where('document_id', $this->id)->update(['document_id' => $document->id]);
$allBookmarks = Bookmark::where('document_id', $this->id)->get()->groupBy('folder_id');
foreach ($allBookmarks as $folderId => $bookmarks) {
if ($bookmarks->count() > 1) {
array_shift($bookmarks);
foreach ($bookmarks as $bookmark) {
$bookmark->delete();
}
}
}
return true;
}
$this->url = $realUrl;
}
return false;
}
/**
* Select analyzers for this particular document then run them.
*/
protected function runAnalyzers()
{
if (array_key_exists($this->mimetype, config('analyzers'))) {
$this->launchAnalyzerFor($this->mimetype);
} else {
// In doubt, launch HtmlAnalyzer
$this->launchAnalyzerFor('text/html');
}
}
protected function launchAnalyzerFor($mimetype)
{
$className = config(sprintf('analyzers.%s', $mimetype));
$instance = new $className();
$instance->setDocument($this)->setBody($this->body)->setResponse($this->response)->analyze();
}
}