Gospel Translations:Technology/Multi-wiki project/trans bot.php

From Gospel Translations

Jump to:navigation, search
  1. <?php
  2.  
  3. // define these BEFORE including basic_bot.php if you want to override the settings in basic_bot.php:
  4. // define('USERID','2');
  5. // define('USERNAME','RCBot');
  6. // define('PASSWORD','RCBotPassword');
  7.  
  8. require_once('basic_bot.php'); // the bot framework we use when making "new BasicBot()"
  9. require_once('Snoopy.class.php'); // the bot framework is based on Snoopy
  10.  
  11. // get a list of languages supported by MediaWiki from the MediaWiki files
  12. require_once('/home/gospeltr/public_html/w/languages/Names.php'); 
  13. global $wgLanguageNames;	
  14.  
  15. // get a timestamp
  16. $timestamp = date("Ymd");
  17.  
  18. // this serialize()'d php array contains information about the translations
  19. // and languages that we have on the wiki
  20. $dbFile = '/home/gospeltr/public_html/w/extensions/dabble/cache/db.php';
  21.  
  22. // ------------------------------------------------------
  23. // Start the action by reading the query string and
  24. // determining what function to execute
  25. // ------------------------------------------------------
  26.  
  27. // Print a form if no instructions are passed through a query value 'whattodo'
  28. if(!isset($_GET['whattodo'])) {
  29. 	print <<<EOT
  30. 	<h2>What language should we process?</h2>
  31. 	<form action="trans_bot.php" method="GET" name="bot">
  32. 	<input type="hidden" name="lang" value="Spanish"> <!--//right now "es" is the only valid value, so this is hidden -->
  33. 	<input type="hidden" name="lc" size="2" value="es"> <!--// the language code, same as above -->
  34. 	<input type="text" name="page" value="page to scrape"> <!--//what page should this script pull data from? -->
  35. 	<select name="whattodo">
  36. 		<option selected value="">Select...</OPTION>
  37. 		<option>run</option>
  38. 		<option>print</option>
  39. 		<option>titles</option>
  40. 		<option>see db</option>
  41. 	</select>
  42. 	<input type="submit" value="submit">
  43. 	</form>
  44. EOT;
  45. 	exit; 
  46. } 
  47.  
  48. // get the library of translations loaded up
  49. $ldb = unserialize(file_get_contents($dbFile)); 
  50. $LANGS = $ldb['l'];
  51. $TERMS = $ldb['g'];
  52. $db = $ldb['t'];
  53. $RL = $_GET['lang'];
  54. $allTranslations;
  55.  
  56. // This is a debugging option to see if this script is building its array correctly
  57. // Use it to find out what titles are getting paired with what translations
  58. if($_GET['whattodo'] === 'print') {
  59.  
  60. 	// get the library of translations loaded up
  61. 	$ldb = unserialize(file_get_contents($dbFile)); 
  62.  
  63. 	// for simplicity, assign unique arrays to different parts of $ldb
  64. 	// $LANGS for all language info
  65. 	$LANGS = $ldb['l'];
  66. 	// $TERMS for some glossary words/translations used in the website interface
  67. 	$TERMS = $ldb['g'];
  68. 	// $db for translated resources, in the form $db['Language']['Resource title']['Attribute'] = > 'Value'
  69. 	// e.g., $db['Spanish']['Gospel Implications']['Publisher'] => 'Ligonier Ministries'
  70. 	$db = $ldb['t'];
  71. 	// $RL, short for resource language, as determined by the form input
  72. 	$RL = $_GET['lang'];
  73. 	// a blank array that we'll use later to load up title=>translated_title key/value pairs
  74. 	$allTranslations;
  75.  
  76. 	// go grab the list of articles and titles
  77. 	// this instance of BasicBot uses the function ScrapePage (see basic_bot.php for details)
  78. 	$fetcher = new BasicBot();
  79. 	$fetcher->wikiFilter($_GET['page'],'ScrapePage',$summary='');
  80.  
  81. 	// show the results of $fetcher
  82. 	print_r($allTranslations);
  83.  
  84. 	exit; 
  85. } 
  86.  
  87.  
  88. if($_GET['whattodo'] === 'see db')
  89. 		print_r($db);
  90.  
  91. // go grab the list of articles and titles
  92. $fetcher = new BasicBot();
  93. $fetcher->wikiFilter($_GET['page'],'ScrapePage',$summary='');
  94.  
  95. $myl = array_keys($allTranslations);
  96. $flip = array_flip($allTranslations);  
  97.  
  98. $copyBot = new BasicBot();
  99. $copyBot->ArrayFilterAll($myl,'PostCode',$summary='Automated: Added transclusion reference and categories to article source',$allTranslations);
  100.  
  101. function PostCode( $content, $params ){
  102.  
  103. 	global $wgLanguageNames;
  104. 	global $db;
  105. 	global $TERMS;
  106. 	global $RL;
  107. 	$resLang = $RL;
  108. 	global $passv;
  109. 	$res = $params[$passv];
  110.  
  111. 	/////////////////////
  112. 	// break the language code off the title
  113. 	$titleParts = explode('/', $res);
  114. 	$lastPart = end($titleParts);
  115. 	// test for any 2-3 letter sequence ending the title after a /
  116. 	if(isset($wgLanguageNames[$lastPart])) {
  117. 			$old_res = $res;
  118. 			$res = str_ireplace('/'.$lastPart, '', $res);
  119. 	}
  120. 	else {
  121. 		print "Error: $res is not a foreign resource"; 
  122. 		exit;
  123. 	}
  124. 	// Decide whether this belongs to a book or a series
  125. 	if ($db[$resLang][$res]['Media Type'] == 'Chapter' || $db[$resLang][$res]['Media Type'] == 'Book')
  126. 			$stext = 'Book';
  127. 	else
  128. 			$stext = 'Series'; 
  129.  
  130. 	$flip = array_flip($params);
  131. 	if ($db[$resLang][$res]['Media Type'] == 'Book'){
  132. 			$book = $db[$resLang][$res]['Book/series'];
  133. 			$toc = "===".$TERMS['Table of Contents'][$resLang] . "=== \n";
  134. 			$chaps = count($db['TOC'][$book]);
  135. 			for ($i=1; $i<=$chaps; $i++) {
  136. 					$chapter = $db['TOC'][$book][$i];
  137. 					//$chapter = str_ireplace($book.'/', '', $chapter);
  138. 					$chapter = $chapter . '/' . $lastPart;
  139. 					$tch = $flip[$chapter];
  140. 					$tch = str_replace('_', ' ', $tch);
  141. 					$short_tch = explode('/', $tch);
  142. 					$toc = $toc . "[[$tch|{$short_tch[1]}]] \n\n";
  143. 			}
  144.  
  145. //			print $allTranslations;
  146. 	//		print "\n\n";
  147. 	//		print $toc;
  148. 	//		exit;
  149.  
  150. 			return <<<TOC
  151. 	<div id="tran-mh">
  152. <div style="float:right">
  153. <table style="border: 1px solid rgb(148, 148, 148); margin: 0 0pt 10px 10px; padding: 3px 4px 5px 4px; background-color: rgb(233, 242, 254); color: rgb(51, 51, 51); font-family: Verdana,Arial,Helvetica,sans-serif; font-size: 10px; line-height: 13px; width: 200px;" cellspacing="0" cellpadding="0">
  154.  <tr>
  155.  <td colspan="2" style="padding: 10px 0; text-align:center"><font style="font-size: 12px; font-weight: bold;">{$TERMS['About This'][$resLang]} {$TERMS[$db[$resLang][$res]['Media Type']][$resLang]}</font></td>
  156.  </tr>
  157.  <tr>
  158.  <td valign="top" width="85"><strong>{$TERMS['Publisher'][$resLang]}:</strong></td>
  159.  <td valign="top">[[:Category:{$db[$resLang][$res]['Publisher']}|{$db[$resLang][$res]['Publisher']}]]</td>
  160.  </tr>
  161.  <tr>
  162.  <td valign="top"><strong>{$TERMS['Author'][$resLang]}:</strong></td>
  163.  <td valign="top">[[:Category:{$db[$resLang][$res]['Author']}|{$db[$resLang][$res]['Author']}]] $satext</td>
  164.  </tr> 
  165.  <tr> 
  166.  <td valign="top"><strong>{$TERMS[$stext][$resLang]}:</strong></td>
  167.  <td valign="top">[[:Category:{$db[$resLang][$res]['Book/series']}|{$db[$resLang][$res]['Book/series']}]]</td>
  168.  </tr>
  169.  <tr>       
  170.  <td valign="top"><strong>{$TERMS['Topic'][$resLang]}:</strong></td>
  171.  <td valign="top">[[:Category:{$TERMS[$db[$resLang][$res]['Topic']][$resLang]}|{$TERMS[$db[$resLang][$res]['Topic']][$resLang]}]]</td>
  172.  </tr>
  173.  <tr>
  174.  <td valign="top"><strong>{$TERMS['Subtopic'][$resLang]}:</strong></td>
  175.  <td valign="top">[[:Category:{$TERMS[$db[$resLang][$res]['Subtopic']][$resLang]}|{$TERMS[$db[$resLang][$res]['Subtopic']][$resLang]}]]</td>
  176.  </tr>
  177.  <tr>
  178.  <td valign="top" style="padding-bottom: 4px;"><strong>{$TERMS['Date'][$resLang]}:</strong></td>
  179.  <td valign="top" style="padding-bottom: 4px;">[[:Category:{$db[$resLang][$res]['Date']}|{$db[$resLang][$res]['Date']}]]</td>
  180.  </tr>
  181.  <tr>
  182.  <td valign="top" style="border-top: 1px solid #999; padding-top: 4px; margin-top: 4px;"><strong>{$TERMS['Status'][$resLang]}:</strong></td>
  183.  <td valign="top" style="border-top: 1px solid #999; padding-top: 4px; margin-top: 4px;">[[:Category:{$db[$resLang][$res]['Review Status']}|{$db[$resLang][$res]['Review Status']}]]</td>
  184.  </tr>
  185.  <tr>
  186.  <td valign="top"><strong>{$TERMS['Translator'][$resLang]}:</strong></td>
  187.  <td valign="top">[[:Category:{$db[$resLang][$res]['Translator']}|{$db[$resLang][$res]['Translator']}]]</td>
  188.  </tr>
  189.  <tr>
  190.  <td style="padding-bottom: 4px;"><strong>{$TERMS['Reviewer'][$resLang]}:</strong></td>
  191.  <td style="padding-bottom: 4px;">[[:Category:{$db[$resLang][$res]['Reviewer']}|{$db[$resLang][$res]['Reviewer']}]]</td>
  192.  </tr>
  193. </table>
  194. </div>
  195. [[Category:{$db[$resLang][$res]['Review Status']}]]
  196. [[Category:{$TERMS[$db[$resLang][$res]['Media Type']][$resLang]}]]
  197. [[Category:{$TERMS[$db[$resLang][$res]['Topic']][$resLang]}]]
  198. [[Category:{$TERMS[$db[$resLang][$res]['Subtopic']][$resLang]}]]
  199. [[Category:{$db[$resLang][$res]['Author']}]] 
  200. [[Category:{$db[$resLang][$res]['Publisher']}]]
  201. [[Category:{$db[$resLang][$res]['Book/series']}]]
  202. [[Category:$timestamp]]<!--
  203. ###### Needs Review template
  204. {{ #ifeq: {{#language:{{#titleparts:{{PAGENAME}}|{{{levels}}}|{{#expr: {{{levels}}} +1 }}}}}} | English | |{{ #ifeq: {{{reviewed}}} | Final Version | | {{ #ifeq: {{{reviewed}}} | Peer Reviewed | {{PeerReviewed}} | {{NeedsReview}}}}}}}}
  205. --></div>
  206. $toc
  207.  
  208. TOC;
  209. 	}
  210. 	else {
  211. 		return <<<TRB
  212. 	<div id="tran-mh">
  213. <div style="float:right">
  214. <table style="border: 1px solid rgb(148, 148, 148); margin: 0 0pt 10px 10px; padding: 3px 4px 5px 4px; background-color: rgb(233, 242, 254); color: rgb(51, 51, 51); font-family: Verdana,Arial,Helvetica,sans-serif; font-size: 10px; line-height: 13px; width: 200px;" cellspacing="0" cellpadding="0">
  215.  <tr>
  216.  <td colspan="2" style="padding: 10px 0; text-align:center"><font style="font-size: 12px; font-weight: bold;">{$TERMS['About This'][$resLang]} {$TERMS[$db[$resLang][$res]['Media Type']][$resLang]}</font></td>
  217.  </tr>
  218.  <tr>
  219.  <td valign="top" width="85"><strong>{$TERMS['Publisher'][$resLang]}:</strong></td>
  220.  <td valign="top">[[:Category:{$db[$resLang][$res]['Publisher']}|{$db[$resLang][$res]['Publisher']}]]</td>
  221.  </tr>
  222.  <tr>
  223.  <td valign="top"><strong>{$TERMS['Author'][$resLang]}:</strong></td>
  224.  <td valign="top">[[:Category:{$db[$resLang][$res]['Author']}|{$db[$resLang][$res]['Author']}]] $satext</td>
  225.  </tr> 
  226.  <tr> 
  227.  <td valign="top"><strong>{$TERMS[$stext][$resLang]}:</strong></td>
  228.  <td valign="top">[[:Category:{$db[$resLang][$res]['Book/series']}|{$db[$resLang][$res]['Book/series']}]]</td>
  229.  </tr>
  230.  <tr>       
  231.  <td valign="top"><strong>{$TERMS['Topic'][$resLang]}:</strong></td>
  232.  <td valign="top">[[:Category:{$TERMS[$db[$resLang][$res]['Topic']][$resLang]}|{$TERMS[$db[$resLang][$res]['Topic']][$resLang]}]]</td>
  233.  </tr>
  234.  <tr>
  235.  <td valign="top"><strong>{$TERMS['Subtopic'][$resLang]}:</strong></td>
  236.  <td valign="top">[[:Category:{$TERMS[$db[$resLang][$res]['Subtopic']][$resLang]}|{$TERMS[$db[$resLang][$res]['Subtopic']][$resLang]}]]</td>
  237.  </tr>
  238.  <tr>
  239.  <td valign="top" style="padding-bottom: 4px;"><strong>{$TERMS['Date'][$resLang]}:</strong></td>
  240.  <td valign="top" style="padding-bottom: 4px;">[[:Category:{$db[$resLang][$res]['Date']}|{$db[$resLang][$res]['Date']}]]</td>
  241.  </tr>
  242.  <tr>
  243.  <td valign="top" style="border-top: 1px solid #999; padding-top: 4px; margin-top: 4px;"><strong>{$TERMS['Status'][$resLang]}:</strong></td>
  244.  <td valign="top" style="border-top: 1px solid #999; padding-top: 4px; margin-top: 4px;">[[:Category:{$db[$resLang][$res]['Review Status']}|{$db[$resLang][$res]['Review Status']}]]</td>
  245.  </tr>
  246.  <tr>
  247.  <td valign="top"><strong>{$TERMS['Translator'][$resLang]}:</strong></td>
  248.  <td valign="top">[[:Category:{$db[$resLang][$res]['Translator']}|{$db[$resLang][$res]['Translator']}]]</td>
  249.  </tr>
  250.  <tr>
  251.  <td style="padding-bottom: 4px;"><strong>{$TERMS['Reviewer'][$resLang]}:</strong></td>
  252.  <td style="padding-bottom: 4px;">[[:Category:{$db[$resLang][$res]['Reviewer']}|{$db[$resLang][$res]['Reviewer']}]]</td>
  253.  </tr>
  254. </table>
  255. </div>
  256. [[Category:{$db[$resLang][$res]['Review Status']}]]
  257. [[Category:{$TERMS[$db[$resLang][$res]['Media Type']][$resLang]}]]
  258. [[Category:{$TERMS[$db[$resLang][$res]['Topic']][$resLang]}]]
  259. [[Category:{$TERMS[$db[$resLang][$res]['Subtopic']][$resLang]}]]
  260. [[Category:{$db[$resLang][$res]['Author']}]] 
  261. [[Category:{$db[$resLang][$res]['Publisher']}]]
  262. [[Category:{$db[$resLang][$res]['Book/series']}]]
  263. [[Category:$timestamp]]<!--
  264. ###### Needs Review template
  265. {{ #ifeq: {{#language:{{#titleparts:{{PAGENAME}}|{{{levels}}}|{{#expr: {{{levels}}} +1 }}}}}} | English | |{{ #ifeq: {{{reviewed}}} | Final Version | | {{ #ifeq: {{{reviewed}}} | Peer Reviewed | {{PeerReviewed}} | {{NeedsReview}}}}}}}}
  266. --></div>
  267. {{:gt_en:{$params[$passv]}}}
  268.  
  269. TRB;
  270. 	}
  271. }
  272.  
  273. function ScrapePage($content){
  274. 	global $allTranslations;
  275. 	global $wgLanguageNames;
  276. 	global $db;
  277. 	global $RL;
  278. 	$data_lines = explode(']]]', $content);
  279. 	array_pop($data_lines);
  280.  
  281. 	foreach ($data_lines as $tvalue){
  282. 		// $data_line is english_title:::trans_title
  283. 		$translation = explode(':::', $tvalue);
  284.  
  285. 		// load up $allTranslations[foreign] = english
  286.  
  287. 		// replace spaces with underscores in foreign title (for wiki compatibility)
  288. 		$foreign = str_ireplace(" ","_", $translation[1]);
  289. 		// make $titleParts by splitting up the english title by / marks
  290. 		$titleParts = explode('/', $translation[0]);
  291.  
  292. 		$lastPart = end($titleParts);
  293. 		$res = str_ireplace('/'.$lastPart, '', $translation[0]);
  294. 		// set aside the resource temporarily if it is a book part
  295. 		$short_res = $titleParts[0]."/".$titleParts[1];
  296. 		if(isset($db[$RL][$short_res]['Media Type'])){
  297. 			if ($db[$RL][$short_res]['Media Type'] == 'Chapter') {
  298. 				$temp_array[$translation[0]] = $foreign;
  299. 			}
  300. 		}
  301. 		// ........otherwise just load it up
  302. 		else
  303. 			$allTranslations[$foreign] = $translation[0];
  304.   	}	
  305.  
  306. 	if($_GET['whattodo'] === 'print')
  307. 		print_r($temp_array);
  308.  
  309. 	// $flip will be $..[eng] => foreign
  310. 	$flip = array_flip($allTranslations);
  311. 	foreach ($temp_array as $eng=>$for){
  312. 		$temp_res_array = explode('/', $eng);
  313. 		$temp_book = $temp_res_array[0].'/'.$temp_res_array[2];
  314. 		$for_book = $flip[$temp_book];
  315. 		$for_full_title = $for_book.'/'.$for;
  316. 		$allTranslations[$for_full_title] = $eng;
  317. 	}
  318.  
  319. 		//print_r($temp_array);
  320.  
  321. 	return $content;
  322. }
  323.  
  324. echo "Done --" . $passv;
  325.  
  326. ?>
Navigation
Volunteer Tools
Other Wikis
Toolbox