| Current Path : /home/wirbesti/public_html/php/ |
| Current File : /home/wirbesti/public_html/php/BkDataextractor.php |
<?php
class BkDataExtrator
{
const DETAIL_PAGE_URL = "https://www.bk.admin.ch/ch/d/pore/vi/vis%d.html";
const COLLECTION_PAGE = "https://www.bk.admin.ch/ch/%s/pore/vi/vis_1_3_1_1.html";
public function getCurled($url)
{
$ch1= curl_init();
curl_setopt($ch1, CURLOPT_URL, $url);
curl_setopt($ch1, CURLOPT_HEADER, 0);
curl_setopt($ch1, CURLOPT_VERBOSE, 1);
curl_setopt($ch1, CURLOPT_USERAGENT, 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0)');
curl_setopt($ch1, CURLOPT_REFERER, 'http://www.google.com'); //just a fake referer
curl_setopt($ch1, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch1, CURLOPT_POST, 0);
curl_setopt($ch1, CURLOPT_FOLLOWLOCATION, 20);
$htmlContent= curl_exec($ch1);
return $htmlContent;
}
public function getCampaignDates($bkid)
{
$result = array();
$detailurl = sprintf(self::DETAIL_PAGE_URL, $bkid);
$detailpage = $this->getCurled($detailurl);
preg_match('/.*Vorprüfung vom<\/td>\s*<td>(?<vorpruefung>.*)<\/td>\s*<td>\s*(<a href=\"(?<fundstelle>.*)\">)?(?<bbl>.*\s.*)?(<\/a>)?.*<\/td>/', $detailpage, $matches, PREG_OFFSET_CAPTURE, 0);
$vorpruefung = $matches['vorpruefung'][0];
$urlfundstelle1 = $matches['fundstelle'] ?? '';
if (is_array($urlfundstelle1))
{
$urlfundstelle1 = $urlfundstelle1[0];
}
$bundesblatt1 = $matches['bbl'] ?? '';
if (is_array($bundesblatt1))
{
$bundesblatt1 = $bundesblatt1[0];
}
preg_match("/.*Ablauf Sammelfrist<\/td>\s*<td>(?<ablauf>.*)<\/td>\s*<td>\s*(<a href=\"(?<fundstelle>.*)\">)?(?<bbl>.*\s.*)?(<\/a>)?.*<\/td>/", $detailpage, $matches, PREG_OFFSET_CAPTURE, 0);
$ablaufSammelfrist = $matches['ablauf'][0];
$urlfundstelle2 = $matches['fundstelle'] ?? '';
if (is_array($urlfundstelle2))
{
$urlfundstelle2 = $urlfundstelle2[0];
}
$bundesblatt2 = $matches['bbl'] ?? '';
if (is_array($bundesblatt2))
{
$bundesblatt2 = $bundesblatt2[0];
}
preg_match("/.*Sammelbeginn<\/td>\s*<td>(?<sammelbeginn>.*)<\/td>\s*<td>\s*(<a href=\"(?<fundstelle>.*)\">)?(?<bbl>.*\s.*)?(<\/a>)?.*<\/td>/", $detailpage, $matches, PREG_OFFSET_CAPTURE, 0);
$sammelbeginn = $matches['sammelbeginn'][0];
$urlfundstelle3 = $matches['fundstelle'] ?? '';
if (is_array($urlfundstelle3))
{
$urlfundstelle3 = $urlfundstelle3[0];
}
$bundesblatt3 = $matches['bbl'] ?? '';
if (is_array($bundesblatt3))
{
$bundesblatt3 = $bundesblatt3[0];
}
$result['pre-check-date'] = $vorpruefung;
$result['pre-check-url'] = $urlfundstelle1;
$result['pre-check-bbl'] = $bundesblatt1;
$result['collection-start-date'] = $sammelbeginn;
$result['collection-start-url'] = $urlfundstelle3;
$result['collection-start-bbl'] = $bundesblatt3;
$result['collection-expiry-date'] = $ablaufSammelfrist;
$result['collection-expiry-url'] = $urlfundstelle2;
$result['collection-expiry-bbl'] = $bundesblatt2;
return $result;
}
public function getInitiativesInCollectionState()
{
$languages = ['d','f','i'];
$langcodes = ['d' => 'de', 'f' => 'fr', 'i' => 'it'];
$re = '/.*a\shref="vis(?<key>[^"]*)\.html.*>(?<slogan>(?:[^<\n\r]|[\n\r]*(?<!
))*)<\/a>/m';
$result = array();
foreach ($languages as $lang) {
$urltoprocess = sprintf(self::COLLECTION_PAGE, $lang);
$page = $this->getCurled($urltoprocess);
preg_match_all($re, $page, $matches, PREG_SET_ORDER, 0);
foreach ($matches as $key) {
$bkid = $key['key'];
if (!isset($result[$bkid])) {
$result[$bkid] = array();
$result[$bkid] += ['key' => $bkid];
}
$langcode = $langcodes[$lang];
try {
$arraykey = 'slogan-'.$langcode;
$slogan = $key['slogan'];
$slogan = str_replace(["\r", "\n", "
"], "", $slogan);
$result[$bkid] += [$arraykey => $slogan];
} catch (Exception $e) {
echo $e->getMessage().'<br/>';
}
}
}
return $result;
}
}