2024-04-19 11:21:33 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace App\Services\Wikidata;
|
|
|
|
|
|
|
|
use App\Models\WikidataProperty;
|
|
|
|
|
|
|
|
class WikidataExtractor
|
|
|
|
{
|
|
|
|
protected array $included;
|
|
|
|
|
|
|
|
protected array $excluded;
|
|
|
|
|
|
|
|
protected array $unused;
|
|
|
|
|
|
|
|
protected array $everythingElse;
|
|
|
|
|
|
|
|
protected string $entityId;
|
|
|
|
|
|
|
|
protected $properties;
|
|
|
|
|
|
|
|
protected $entities;
|
|
|
|
|
2024-04-20 23:27:47 +02:00
|
|
|
public function __construct(protected array $exclusions, protected array $inclusions)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2024-04-19 11:21:33 +02:00
|
|
|
public function included()
|
|
|
|
{
|
|
|
|
return $this->included;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function excluded()
|
|
|
|
{
|
|
|
|
return $this->excluded;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function unused()
|
|
|
|
{
|
|
|
|
return $this->unused;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function everythingElse()
|
|
|
|
{
|
|
|
|
return $this->everythingElse;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Split data from specified array in three arrays containing explicitely
|
|
|
|
* included properties, explicitely excluded properties and unused
|
|
|
|
* properties (neither included or excluded)
|
|
|
|
*/
|
|
|
|
public function extract(array $entityData, string $entityId)
|
|
|
|
{
|
|
|
|
$json = json_encode($entityData, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
|
|
|
|
|
|
|
$this->properties = $this->getDeclaredPropertiesInEntity($json);
|
|
|
|
$this->entities = $this->getDeclaredEntitiesInEntity($json, $entityId);
|
|
|
|
|
|
|
|
$result = $this->browse($entityData['claims']);
|
|
|
|
|
|
|
|
$this->included = $result['included'];
|
|
|
|
$this->excluded = $result['excluded'];
|
|
|
|
$this->unused = $result['unused'];
|
|
|
|
|
|
|
|
unset($entityData['claims']);
|
|
|
|
|
|
|
|
$this->everythingElse = $entityData;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return an array containing Wikidata Property ID as keys and corresponding
|
|
|
|
* label as values
|
|
|
|
*/
|
|
|
|
private function getDeclaredPropertiesInEntity(string $data)
|
|
|
|
{
|
|
|
|
preg_match_all('/P\d{1,}/', $data, $matches);
|
|
|
|
natsort($matches[0]);
|
|
|
|
|
|
|
|
$ids = collect(array_values($matches[0]))->unique()->all();
|
|
|
|
$properties = WikidataProperty::whereIn('property_id', $ids)->get();
|
|
|
|
$result = collect($ids)->combine($properties->pluck('label'));
|
|
|
|
|
|
|
|
return $result->toArray();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return an array containing Wikidata Property ID as keys and corresponding
|
|
|
|
* label as values
|
|
|
|
*/
|
|
|
|
private function getDeclaredEntitiesInEntity(string $data, string $entityId)
|
|
|
|
{
|
|
|
|
preg_match_all('/Q\d{1,}/', $data, $matches);
|
|
|
|
natsort($matches[0]);
|
|
|
|
|
|
|
|
$ids = collect(array_values($matches[0]))->except($entityId)->unique()->all();
|
|
|
|
|
|
|
|
return app()->make(WikidataClient::class)->getLabelsForEntities($ids);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Recursively browse Wikidata array
|
|
|
|
*/
|
|
|
|
private function browse(array $claims)
|
|
|
|
{
|
|
|
|
$included = [];
|
|
|
|
$excluded = [];
|
|
|
|
$unused = [];
|
|
|
|
|
|
|
|
foreach ($claims as $key => $data) {
|
|
|
|
$isExcluded = in_array($key, $this->exclusions);
|
|
|
|
$isIncluded = in_array($key, collect($this->inclusions)->flatten()->values()->toArray());
|
|
|
|
$isUnused = !$isExcluded && !$isIncluded;
|
|
|
|
|
|
|
|
$claim = $this->parseClaims($data, $isIncluded);
|
|
|
|
|
|
|
|
if ($isExcluded) {
|
|
|
|
$newKey = $this->replaceValue($key, true, true);
|
|
|
|
$excluded[$newKey] = $claim;
|
|
|
|
} elseif ($isIncluded) {
|
|
|
|
$newKey = $this->replaceValue($key, true);
|
|
|
|
$included[$key] = $claim;
|
|
|
|
} elseif ($isUnused) {
|
|
|
|
$newKey = $this->replaceValue($key, true, true);
|
|
|
|
$unused[$newKey] = $claim;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return [
|
|
|
|
'excluded' => $excluded,
|
|
|
|
'included' => $this->reorganizeIncluded($included),
|
|
|
|
'unused' => $unused,
|
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse claims of a specific property
|
|
|
|
*/
|
|
|
|
private function parseClaims(array $data, bool $parentIncluded)
|
|
|
|
{
|
|
|
|
$result = [];
|
|
|
|
|
|
|
|
foreach ($data as $claim) {
|
|
|
|
$result[] = $this->parseClaim($claim, $parentIncluded);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse a specific claim
|
|
|
|
*/
|
|
|
|
private function parseClaim(array $data, bool $parentIncluded)
|
|
|
|
{
|
|
|
|
$value = $this->parseSnak($data['mainsnak'], $parentIncluded);
|
|
|
|
|
|
|
|
if (!empty($data['qualifiers'])) {
|
|
|
|
$itemQualifiers = [];
|
|
|
|
|
|
|
|
foreach ($data['qualifiers'] as $qualifierProperty => $qualifiers) {
|
|
|
|
$qualifierKey = $this->replaceValue($qualifierProperty, true, !$parentIncluded);
|
|
|
|
|
|
|
|
foreach ($qualifiers as $qualifierData) {
|
|
|
|
$qualifierValue = $this->parseSnak($qualifierData, $parentIncluded);
|
|
|
|
|
|
|
|
$itemQualifiers[$qualifierKey][] = $qualifierValue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = [
|
|
|
|
$value => $itemQualifiers,
|
|
|
|
];
|
|
|
|
} else {
|
|
|
|
$result = $value;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse a specific snak
|
|
|
|
*/
|
|
|
|
private function parseSnak(array $data, bool $parentIncluded)
|
|
|
|
{
|
|
|
|
if (empty($data['datavalue']['value'])) {
|
|
|
|
dd($data);
|
|
|
|
}
|
|
|
|
|
|
|
|
$value = $data['datavalue']['value'];
|
|
|
|
$valueType = $data['datavalue']['type'];
|
|
|
|
|
|
|
|
switch ($valueType) {
|
|
|
|
case 'wikibase-entityid':
|
|
|
|
$value = $this->replaceValue($value['id'], true, !$parentIncluded);
|
|
|
|
break;
|
|
|
|
case 'string':
|
|
|
|
$value = $this->replaceValue($value, true, !$parentIncluded);
|
|
|
|
break;
|
|
|
|
case 'time':
|
|
|
|
$value = $value['time'];
|
|
|
|
break;
|
|
|
|
case 'quantity':
|
|
|
|
$value = $value['amount'];
|
|
|
|
break;
|
|
|
|
case 'monolingualtext':
|
|
|
|
$value = $value['text'];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
dd($data['mainsnak']);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $value;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Replace a value with a more human-friendly version. Basically replaces
|
|
|
|
* Wikidata entities and properties with labels stored in database, if it
|
|
|
|
* applies.
|
|
|
|
*/
|
|
|
|
private function replaceValue(string $value, bool $showCode = true, bool $showLabel = true)
|
|
|
|
{
|
|
|
|
$isExcluded = in_array($value, $this->exclusions);
|
|
|
|
$isIncluded = in_array($value, collect($this->inclusions)->flatten()->values()->toArray());
|
|
|
|
$isUnused = !$isExcluded && !$isIncluded;
|
|
|
|
|
|
|
|
$code = $value;
|
|
|
|
$label = $value;
|
|
|
|
|
|
|
|
if (array_key_exists($value, $this->properties)) {
|
|
|
|
$label = $this->properties[$value];
|
|
|
|
} elseif (array_key_exists($value, $this->entities)) {
|
|
|
|
$label = $this->entities[$value];
|
|
|
|
}
|
|
|
|
|
|
|
|
$both = $code !== $label ? sprintf('[%s] %s', $code, $label) : $value;
|
|
|
|
|
|
|
|
if ($showCode && $showLabel) {
|
|
|
|
return $both;
|
|
|
|
} elseif ($showCode) {
|
|
|
|
return $code;
|
|
|
|
} else {
|
|
|
|
return $label;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Take the "raw" included data and reorganize it according to the
|
|
|
|
* "inclusions" Wikidata configuration
|
|
|
|
*/
|
|
|
|
private function reorganizeIncluded(array $includedData)
|
|
|
|
{
|
|
|
|
$reorganized = [];
|
|
|
|
|
|
|
|
foreach ($this->inclusions as $category => $properties) {
|
|
|
|
$result = $this->includeProperties($includedData, $properties);
|
|
|
|
|
|
|
|
if (!empty($result)) {
|
|
|
|
$reorganized[$category] = $result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $reorganized;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Include specific properties
|
|
|
|
*/
|
|
|
|
private function includeProperties($includedData, $properties)
|
|
|
|
{
|
|
|
|
$result = [];
|
|
|
|
|
|
|
|
foreach ($properties as $propertyId) {
|
|
|
|
if (!array_key_exists($propertyId, $includedData)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$newKey = $this->replaceValue($propertyId, false, true);
|
|
|
|
|
|
|
|
$values = $includedData[$propertyId];
|
|
|
|
|
|
|
|
$result[$newKey] = $this->includeValues($values);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Include specific values
|
|
|
|
*/
|
|
|
|
private function includeValues(array $values)
|
|
|
|
{
|
|
|
|
$newValues = [];
|
|
|
|
|
|
|
|
foreach ($values as $key => $value) {
|
|
|
|
$newKey = $this->replaceValue($key, false, true);
|
|
|
|
|
|
|
|
if (is_array($value)) {
|
|
|
|
$value = $this->includeValues($value);
|
|
|
|
} else {
|
|
|
|
$value = $this->replaceValue($value, false, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
$newValues[$newKey] = $value;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $newValues;
|
|
|
|
}
|
|
|
|
}
|