<?php
class Rech_Spider {
	private $indexeur ;
	public static $use_stats = false;
	private $timer_affichage_progression = 2; // secondes
	public function Rech_Spider() {
		//		include_once RECH_INCLUDE_PATH.'Rech_Indexeur.class.php';
		$this->indexeur = new Rech_Indexeur(true);
	}
	/**
	 * lance l'indexation de l'élément suivant dans la file d'attente
	 * @return Boolean : true si l'indexation est ok , et qu'il en reste d'autres
	 */
	public function indexe_page_suivante() {
		// recherche la page suivante à indexer
		$page_suivante = $this->get_page_suivante();
		if($page_suivante) {
			$retour= $this->indexeur->indexe_page($page_suivante["id_type_page"],$page_suivante["id_page_type"]);
			$this->indexeur->maj_poids_total();
			return $retour;
		}
		return false;
	}

	public static function est_bloque() {
		return testReq2("SELECT * FROM ".RECH_BASE.RECH_TABLE_SPIDER_LOCK." WHERE date_lock>now()");
	}
	/**
	 * 
	 * @param unknown_type $duree (minutes)
	 */
	public static function bloque_spider($duree) {
		if($duree<1) {
			$secondes = round($duree*60);
			return ifReq("INSERT INTO ".RECH_BASE.RECH_TABLE_SPIDER_LOCK." SET date_lock = now() + INTERVAL $secondes SECOND");
		} else {
			return ifReq("INSERT INTO ".RECH_BASE.RECH_TABLE_SPIDER_LOCK." SET date_lock = now() + INTERVAL $duree MINUTE");
		}
	}

	public static function debloque_spider() {
		return ifReq("DELETE FROM ".RECH_BASE.RECH_TABLE_SPIDER_LOCK);

	}


	/**
	 *
	 * @param Int $duree_max (en minutes)
	 * @param Boolean $maj_poids_total
	 */
	public function indexe_all($duree_max = 30,$maj_poids_total = true) {
		if(Rech_Spider::est_bloque()) {
			if(Rech_Spider::$use_stats === "cron"){
				echo "le spider est locké<br>";
			} else if(Rech_Spider::$use_stats === true) {
				RealTimeProgression::print_div("spider is locked");
			}
			return;
		}
		Rech_Spider::bloque_spider($duree_max);
		ini_set("max_execution_time", $duree_max*60+60);
		//include_once RECH_INCLUDE_PATH.'utils/CStat.class.php';
		if(Rech_Spider::$use_stats === "cron"){

			$stats_spider = new CStat();
		} else if(Rech_Spider::$use_stats === true){

			$stats_spider = new CStat();
			$nb_depart = $this->get_nb_pages_indexees_valides();
			$nb_total = $this->get_nb_pages()-$nb_depart;
			$nb_depart=0;
		}
		// cherche les specs non indexées
		$timer_init = $timer_aff = microtime(true);
		$timer = microtime(true);
		$temps_execution_restant=3600;
		if($query = ifReq("SELECT id_page,id_type_page, id_page_type FROM ".RECH_BASE.RECH_TABLE_PAGE." WHERE indexee=0 OR index_valide = 0 ORDER BY indexee ASC, index_valide ASC")) {
			if(Rech_Spider::$use_stats === true) $i=$nb_depart;
			while($page_suivante = mysql_fetch_assoc($query)) {
//				BDD_tryReconnectTime();
				if(Rech_Spider::$use_stats === true) $stats_spider->debutStat("indexe page");
				$this->indexeur->indexe_page($page_suivante["id_type_page"],$page_suivante["id_page_type"]);
				$timer2 = microtime(true);
				if(Rech_Spider::$use_stats === "cron"){
					echo "indexe page $page_suivante[id_type_page] : $page_suivante[id_page_type] (".round(($timer2-$timer)*1000,2)."ms)<br>";
					$timer = $timer2;
				} else if(Rech_Spider::$use_stats === true) {

					$i++;
					$timer2 = microtime(true);
					if($timer2-$timer_aff>$this->timer_affichage_progression) {
						$timer_aff=$timer2;
						$temps_restant = round((($timer2-$timer)/($i-$nb_depart)*($nb_total-$i)),2);
						$ms = floor($temps_restant/60)."m ".($temps_restant%60)."s";
						$ms2 = floor($temps_execution_restant/60)."m ".($temps_execution_restant%60)."s";
						if($temps_execution_restant<$temps_restant) $ms2="<span style='color:red'>$ms2</span>";
						RealTimeProgression::print_div("indexee $i/$nb_total<br>temps restant $ms <br>temps execution restant $ms2 <br>".$stats_spider->out(true));

						RealTimeProgression::setProgression($i/$nb_total);
					}
				}
				$temps_execution_restant = ($duree_max*60)-($timer2-$timer_init);
				if($temps_execution_restant<3) return;
			}
		}

		$this->indexe_all_specs();

		if($maj_poids_total) {
			if(Rech_Spider::$use_stats === true) $stats_spider->debutStat("RECH_maj_poids_total");
			$this->indexeur->maj_poids_total();
		}
		if(Rech_Spider::$use_stats === true) RealTimeProgression::print_div("indexee $i/$nb_total<br>".$stats_spider->out(true)
		."Rech_Parseur : ".print_rr(Rech_Parseur::get_stats(),true)
		."Rech_Indexeur : ".print_rr($this->indexeur->get_stats(),true)
		.CStat::SOut(true)
		);
		if(Rech_Spider::$use_stats === true) $stats_spider->finStat();
		Rech_Spider::debloque_spider();
	}

	public function efface_orphelins() {
		$req="DELETE mc FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." mc LEFT JOIN ".RECH_BASE.RECH_TABLE_MOT_CLE_PAGE." mcp ON mc.id_mot_cle=mcp.id_mot_cle
WHERE `id_mot_parent` IS NULL AND `poids_total` = 0
AND mcp.id_page IS NULL";
		if($query=ifReq($req)) {
			if(Rech_Spider::$use_stats === "cron"){
				echo (mysql_affected_rows2()." mots supprimés<br>");

			}
		}
	}
	public function indexe_all_specs() {
		// recherche les specs manquantes
		/*$req="select pts.* FROM
		 (select p.id_page,p.id_type_page,p.id_page_type, ts.id_type_spec, ts.identifiant FROM ".RECH_BASE.RECH_TABLE_PAGE." p,".RECH_BASE.RECH_TABLE_TYPE_SPEC." ts
		 WHERE ts.id_type_page = p.id_type_page) AS pts
			LEFT JOIN ".RECH_BASE.RECH_TABLE_SPEC_VAL." sv ON sv.id_type_spec=pts.id_type_spec AND sv.id_page=pts.id_page
			WHERE sv.id_page IS NULL OR sv.validite < NOW( )
			ORDER BY id_page";*/
		$req="SELECT p.id_page,p.id_type_page,p.id_page_type, ts.id_type_spec, ts.identifiant
			FROM ".RECH_BASE.RECH_TABLE_PAGE." p INNER JOIN ".RECH_BASE.RECH_TABLE_TYPE_SPEC." ts ON ts.id_type_page = p.id_type_page
			LEFT JOIN ".RECH_BASE.RECH_TABLE_SPEC_VAL." sv ON sv.id_type_spec = ts.id_type_spec AND sv.id_page = p.id_page
			WHERE sv.id_page IS NULL OR sv.validite < NOW()
			ORDER BY p.id_page;";
		if($query=ifReq($req)) {
			$old_id_page=-1;
			if(Rech_Spider::$use_stats === true) {
				$i=0;
				$nb_total=mysql_num_rows($query);
			}
			//			MysqlTransaction::begin(500,"My2");
			while($res_reindexe=mysql_fetch_assoc($query)) {
				$id_page=$res_reindexe["id_page"];
				$id_page_type=$res_reindexe["id_page_type"];

				if($id_page != $old_id_page) {
					// on a changé de page
					// optimise si plusieurs specs pour la meme page a refaire !
					$old_id_page = $id_page;
					$classe_page=$this->indexeur->get_classe_page($res_reindexe["id_type_page"],$id_page_type);
				}
				$val_spec = $classe_page->get_spec($res_reindexe["identifiant"]);
				$id_type_spec=$res_reindexe["id_type_spec"];
				$this->indexeur->set_spec_val($id_type_spec,$id_page,$val_spec);

				if(Rech_Spider::$use_stats === "cron"){
					echo ("specs ($id_page_type - $id_type_spec )<br>");

				} else if(Rech_Spider::$use_stats === true) {
					$i++;
					if(($i%round($nb_total/100))==1) {
						RealTimeProgression::print_div("indexation specs manquantes ($id_page_type - $id_type_spec ) : $i/$nb_total<br>");
						RealTimeProgression::setProgression($i/$nb_total);
					}
				}
				//				MysqlTransaction::next();
			}
			//			MysqlTransaction::commit();

		}
	}

	/**
	 * ajoute une page à la file d'attente des pages a analyser
	 * @param $type_page
	 * @param $id_page
	 * @param $priorite
	 * @return Boolean :
	 */
	public function ajoute_page($type_page,$id_page_type,$set_valide = NULL,$priorite = 0) {
		// teste l'existence de la page
		// si la page existe , alors on invalide l'index (on ne le retire pas)
		$id_page = $this->indexeur->ajoute_page_type($type_page,$id_page_type,$set_valide);
	}

	/**
	 * retire une page de l'index.
	 * @param $type_page
	 * @param $id_page
	 * @return unknown_type
	 */
	public function supprime_page_index($id_type_page,$id_page_type) {
		$this->indexeur->efface_page($id_type_page,$id_page_type);

	}


	private function get_page_suivante() {
		return testReq3("SELECT id_page,id_type_page, id_page_type FROM ".RECH_BASE.RECH_TABLE_PAGE." WHERE indexee=0 OR index_valide = 0 LIMIT 0,1");
	}


	/*
	 *
	 * maintenance
	 *
	 */


	public static function mtn_invalide_specs() {
		SQL_admin::trace("INVALIDATION DE TOUTES LES SPECS");
		if(ifReq("UPDATE ".RECH_BASE.RECH_TABLE_SPEC_VAL." SET validite=0")) {
			SQL_admin::trace("OK");
		} else {
			SQL_admin::trace("ERREUR : ".MysqlManager::mysql_error());
		}
	}
	public function mtn_reindex_all_now() {
		if($this->mtn_invalide_index()) {
			$this->indexe_all();
		}
	}
	/**
	 * invalide toutes les pages de l'index.
	 * Les index sont gardés, mais le spider réindexera toutes les pages
	 * @return unknown_type
	 */
	public function mtn_invalide_index($filtre_type="") {
		if($filtre != "*") {
			if($res_type=testReq3("SELECT id_type_page FROM ".RECH_BASE.RECH_TABLE_TYPE_PAGE." WHERE identifiant='$filtre_type'")) {
				$where=" WHERE id_type_page=$res_type[id_type_page]";
			} else {
				SQL_admin::trace("ERREUR : type de page inconnu");
			}
		}
		if(ifReq("UPDATE ".RECH_BASE.RECH_TABLE_PAGE." SET index_valide = 0 $where")) {
			SQL_admin::trace(mysql_affected_rows()." entrées réinitialisées (mais pas effacées)");
		}
	}


	/*
	 *
	 * statistiques
	 *
	 */

	/*
	 * comptes des pages
	 */
	/**
	 * renvoie le nombre de pages restantes à indexer
	 * @return unknown_type
	 */
	public function get_nb_pages_restantes() {
		return $this->get_nb_pages(0,0,"OR");
	}
	public function get_nb_pages_total() {
		return $this->get_nb_pages();
	}

	public function get_nb_pages_indexees() {
		return $this->get_nb_pages(1);
	}
	/**
	 * renvoie le nombre de pages indexées , qui sont a réindexer
	 * @return unknown_type
	 */
	public function get_nb_page_invalides() {
		return $this->get_nb_pages(1,0);
	}
	public function get_nb_pages_indexees_valides() {
		return $this->get_nb_pages(1,1);
	}
	/**
	 * fonction générique de comptage des pages (indexées, ou non, valides ou non)
	 * @param $indexee : NULL | 0 | 1
	 * @param $index_valide : NULL | 0 | 1
	 * @param $AND_OR : AND | OR , default AND
	 * @return int
	 */
	public function get_nb_pages($indexee = NULL,$index_valide=NULL,$AND_OR = "AND") {
		$wheres=array();
		if($indexee !== NULL) $wheres[]="indexee=$indexee";
		if($index_valide !== NULL) $wheres[]="index_valide=$index_valide";
		$where=calcule_where($wheres,$AND_OR);
		if($res = testReq2("SELECT count(*) FROM ".RECH_BASE.RECH_TABLE_PAGE." $where"))
		return $res[0];
		return -1;
	}


	/*
	 * compte des mots clés
	 */
	public function get_nb_mots_cles() {
		if($res = testReq2("SELECT count(*) FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." WHERE id_mot_parent IS NULL"))
		return $res[0];
		return -1;
	}
	public function get_nb_mots_cles_dictionnaire() {
		if($res = testReq2("SELECT count(*) FROM ".RECH_BASE.RECH_TABLE_MOT_CLE." WHERE id_mot_parent IS NOT NULL"))
		return $res[0];
		return -1;
	}
	public function get_nb_mots_cles_pages() {
		if($res = testReq2("SELECT count(*) FROM ".RECH_BASE.RECH_TABLE_MOT_CLE_PAGE))
		return $res[0];
		return -1;
	}


	/**
	 * renvoie les statistiques sur l'index en cours
	 * @return array
	 */
	public function stats_index() {
		$retour=array();
		$retour["nb_page_total"]=$this->get_nb_pages_total();
		$retour["nb_page_indexees"]=$this->get_nb_pages_indexees();
		$retour["nb_page_invalides"]=$this->get_nb_page_invalides();
		$retour["nb_page_restantes"]=$this->get_nb_pages_restantes();
		$retour["nb_mots_cles"]=$this->get_nb_mots_cles();
		$retour["nb_mots_cles_dictionnaire"]=$this->get_nb_mots_cles_dictionnaire();
		$retour["nb_mots_cles_pages"]=$this->get_nb_mots_cles_pages();

		return $retour;
	}
}
function calcule_where($wheres,$AND_OR,$premier="WHERE") {
	if(!is_array($wheres) || (count($wheres)==0)) return "";
	return "$premier ".implode(" $AND_OR ",$wheres);
}

?>
