/ Gists / Parser - PsiDetektiv
On gists

Parser - PsiDetektiv

PHP-PHPDOM

parser.php Raw #

<?php

libxml_use_internal_errors(true);
 
$data = file_get_contents('https://www.psidetektiv.cz/ztracena-zvirata/');

// Load HTML into DOMDocument
$dom = new DOMDocument();
$dom->loadHTML($data, LIBXML_NOERROR | LIBXML_NOWARNING);
$finder = new DomXPath($dom);

$wrapper = $finder->query("//div[@id='category-list']"); 

if ($wrapper->length > 0) {
    $catalogItems = $finder->query(".//div[contains(@class, 'catalog-item')]", $wrapper->item(0));
    

    $savedItems = [];
    foreach ($catalogItems as $index => $item) {
        // Safely get link
        $linkElement = $finder->query(".//a[contains(@href, '/zvire/')]", $item)->item(0);
        $link = $linkElement ? $linkElement->getAttribute('href') : '';
        
        // Safely get background image
        $bgImageElement = $finder->query(".//span[@class='bg-image']", $item)->item(0);
        $bgImageStyle = $bgImageElement ? $bgImageElement->getAttribute('style') : '';
        
        // Extract image URL from style
        preg_match('/background-image:url\((.*?)\)/', $bgImageStyle, $matches);
        $imageUrl = isset($matches[1]) ? $matches[1] : '';
        
        $name = trim($finder->query(".//div[contains(@class, 'name')]/span[contains(@class, 'label') and contains(text(), 'Jméno:')]/following::text()[1]", $item)->item(0)->nodeValue);
        $breed = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Plemeno:')]/following::text()[1]", $item)->item(0)->nodeValue);
        $lostLocation = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Místo ztráty:')]/following::text()[1]", $item)->item(0)->nodeValue);
        $region = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Kraj:')]/following::text()[1]", $item)->item(0)->nodeValue);
        $gender = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Pohlaví:')]/following::text()[1]", $item)->item(0)->nodeValue);
        $color = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Barva:')]/following::text()[1]", $item)->item(0)->nodeValue);
        $size = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Velikost:')]/following::text()[1]", $item)->item(0)->nodeValue);
        
        $animalData = [
            'odkaz' => $link,
            'jmeno' => $name,
            'plemeno' => $breed,
            'misto_ztraty' => $lostLocation,
            'kraj' => $region,
            'pohlavi' => $gender,
            'barva' => $color,
            'velikost' => $size,
            'obrazek' => $imageUrl
        ];
    
        $savedItems[] = $animalData;
    }
} else {
    echo "No elements found\n";
    foreach (libxml_get_errors() as $error) {
        echo "Line {$error->line}: {$error->message}\n";
    }
}


echo "<pre>";
print_r($savedItems);
echo "</pre>";