Benutzer:Syntron/Skript PR-Glossar alphabetisch

Ich bin hier, weil ich eine alphabetische Liste für den Glossar wollte. Also habe ich sie mit Hilfe des folgenden PHP-Skriptes angelegt. Wobei ich nun an die Grenzen meines Wikipedia-Wissens komme. Wie erstelle ich eine Vorlage (zum einen Vorlage:Spalten aus der Wikipedia; zum anderen eine Vorlage, um die lange alphabetische Liste aufzusplitten)? Naja, für heute reicht es ...

--Syntron 15:38, 1. Mai. 2007 (CEST)

Überarbeitung des Skiptes um doppelte einträge zu berücksichtigen

--Syntron 16:03, 1. Mai. 2007 (CEST)

Heft-Liste bei mehrfach auftretenden Begriffen

--Syntron 16:15, 1. Mai. 2007 (CEST)

#!/usr/bin/php
<?php

// number of cols
$cols = 3;
// file name for the new list
$file = "glossar_alphabetical.txt";

/* === do not change below this line if you don't know that you are doing === */

/*

Version 0.3: Heft-Liste für mehrfach auftretende Begriffe
Version 0.2: Berücksichtigung mehrfach auftretender Begriffe
Version 0.1: Erste Version

*/


// ==================================================================
// part 1
// get all glosar subpages (Glossar chronologisch)
$url = perryPediaURL("Perry_Rhodan-Glossar_chronologisch", true);
$str = getURL($url);
$wiki = getWikiText($str);
preg_match_all("!\*(\[\[(.*?)\|(.*?)\]\])!", $wiki, $m, PREG_SET_ORDER);
/*
	result structure:
	$m[0][0] = *[[Perry_Rhodan-Glossar_2300_-_2399|Perry Rhodan-Glossar 2300 - 2399]]
	$m[0][1] = [[Perry_Rhodan-Glossar_2300_-_2399|Perry Rhodan-Glossar 2300 - 2399]]
	$m[0][2] = Perry_Rhodan-Glossar_2300_-_2399
	$m[0][3] = Perry Rhodan-Glossar 2300 - 2399
 */
$glossar_pages = $m;

// ==================================================================
// part 2
// get all glosar entries
$glossar = array();
foreach ($glossar_pages as $one) {
	$url = perryPediaURL($one[2], true);
	$str = getURL($url);
	$wiki = getWikiText($str);

	// get all entries for one PR
	preg_match_all("!\|\s+(\[\[Quelle:.*?)\|-\s+valign=!s", $wiki, $matchPR, PREG_SET_ORDER);
	foreach($matchPR as $onePR) {
		preg_match("!(\[\[(Quelle:.*?)\|(\d{4})\]\])!", $onePR[1], $m);
		$quelle = $m[2];
		$roman  = $m[3];
		$wiki   = $m[1];
		preg_match_all("!\| (.+)!", $onePR[1], $m, PREG_SET_ORDER);
		foreach ($m as $entry) {
			$str = wiki2str($entry[1]);

			if (!isset($glossar[$str])) {
				$glossar[$str] = array(
					"roman" =>	array($roman),
					"quelle" =>	array($quelle),
					"wiki" =>	array($wiki),
					"entry" =>	$entry[1]);
			} else {
				$glossar[$str]['roman'][] = $roman;
				$glossar[$str]['quelle'][] = $quelle;
				$glossar[$str]['wiki'][] = $wiki;
			}
		}
	}
}

// ==================================================================
// part 3
// sort all entries
ksort($glossar);

// sort by category
$glossar_sorted = array();
foreach ($glossar as $key => $val) {
	if (!preg_match("!^([a-z]{1})!", $key, $m)) {
		$glossar_sorted['0'][] = $val;
	} else {
		$glossar_sorted[$m[1]][] = $val;
	}
}

// ==================================================================
// part 4
// output alphabetical sorted glossar
$output = "";
foreach ($glossar_sorted as $key => $entry) {
	$width = floor(100/$cols);

	$output .= "\n";
	if ($key == "0") {
		$output .= "== Glossar: 0-9 ==\n\n";
	} else {
		$output .= sprintf("== Glossar: %s ==\n\n", strtoupper($key));
	}
	$output .= "{| valign=\"top\" border=\"0\" cellpadding=\"4\" cellspacing=\"2\" width=\"100%\"\n";
	$output .= "| width=\"". $width ."%\" style=\"background:#FFFFFF; border: 0px solid #000000;\" valign=\"top\" |\n";

	$count = count($entry);
	$entries_per_col = ceil($count/$cols);
	$col_old = 1;
	for ($ii=0; $ii<$count; $ii++) {
		$col = ceil(($ii+1)/$entries_per_col);
		if ($col != $col_old) {
			$output .= "| width=\"0%\" style=\"background:#FFFFFF; border: 0px solid #000000;\" valign=\"top\" |\n";
			$output .= "| width=\"". $width ."%\" style=\"background:#FFFFFF; border: 0px solid #000000;\" valign=\"top\" |\n";
			$col_old = $col;
		}

		$output .= sprintf("* %s (%s)\n",
			str_replace("&"."amp;nbsp;", "&"."nbsp;", $entry[$ii]['entry']),
			join(", ", $entry[$ii]['wiki']));
	}

	$output .= "|}\n\n";
}

// ==================================================================
// part 5
// save output to file
printMsg("I", sprintf("Write data to file: %s", $file));
$fh = fopen($file, 'w');
fwrite($fh, $output);
fclose($fh);

/*******************************************************************
 * subfunctions                                                    *
 *******************************************************************/

// get a url
function getURL($url) {
	printMsg("I", sprintf("Load URL: %s", $url));

	$ch = curl_init();

	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_HEADER, 0);
	curl_setopt ($ch, CURLOPT_RETURNTRANSFER, true);
	$str = curl_exec($ch);
	curl_close($ch);

	return $str;
}

// generate perrypedia url
function perryPediaURL($title, $edit = false) {
	if ($edit) {
		$url = sprintf("http://www.perrypedia.proc.org/index.php?title=%s&action=edit", $title);
	} else {
		$url = sprintf("http://www.perrypedia.proc.org/%s", $title);
	}

	return $url;
}

// get wiki text
function getWikiText($str) {
	if (!preg_match("!<textarea.*?>(.*?)</textarea>!s", $str, $m)) {
		return false;
	}

	return $m[1];
}

// convert a wiki str (with links, ...) to a normal string for the sort routine
function wiki2str($wiki) {
	$str = "";

	$parts = preg_split("!([\[\]]{2})!", $wiki);
	foreach ($parts as $one) {
		if (strpos($one, "|") !== false) {
			$tmp = explode("|", $one);
			$str .= $tmp[1];
		} else {
			$str .= $one;
		}
	}

	// replace all umlauts for sorting
	$from =	array("ä", "ö", "ü", "ß", "Ä", "Ö", "Ü", "&"."quot;");
	$to =	array("a", "o", "u", "s", "A", "O", "U", "");
	$str = str_replace($from, $to, $str);
	$str = strtolower($str);

	return $str;
}

// message to standard output
function printMsg($status, $str) {
	echo sprintf("[%s] %s\n", $status, $str);
}

Benutzer:Syntron/Skript PR-Glossar alphabetisch

Navigationsmenü

Suche