1. 论坛系统升级为Xenforo,欢迎大家测试!
    排除公告

国外PHP代码的中文搜索

本帖由 xinli_zhao2006-11-12 发布。版面名称:源码讨论

  1. xinli_zhao

    xinli_zhao Member

    注册:
    2006-08-31
    帖子:
    201
    赞:
    1
    大家好,我用了askpert这个问吧程序,但是不支持中文搜索?如何修改才能支持中文搜索啊?默认源码是UTF8
    感谢

    下面为它的search代码:

    <?
    //
    // Askpert
    //
    // Basic Search engine class
    // search.php
    //
    // (C) 2004 W3matter LLC
    // This is commercial software!
    // Please read the license at:
    // http://www.w3matter.com/license
    //

    // Handles indexing and searching
    // Stores results in the database
    //

    class search
    {
    // The document being searched for
    var $query;

    // The document itself
    var $document;

    // query suff
    var $keywords;
    var $limit;
    var $page;
    var $pagesize;
    var $results;

    // Internal stuff
    var $db;
    var $words;
    var $stopwords;

    function search()
    {
    global $DEFAULT;

    // Connect to database
    $this->db = new database();
    $this->db->connect();

    // Loadup stopwords
    $this->stopwords = array_flip(explode("|", str_replace(array("\n", "\r"), "", implode("|", file("lang/stopwords.txt")))));

    // Create a document object
    $this->document = new document();

    return(TRUE);
    }

    // Conduct a query
    function query()
    {
    global $DEFAULT;

    // Create an empty set of results
    $this->results = array();

    // Parse the query string and remove stopwords
    $text = @str_replace(array("\n", "\r"), " ", @strtolower(@trim($this->keywords)));
    $words = @preg_split('/\W+/', $text, -1, PREG_SPLIT_NO_EMPTY);
    if(@count($words) == 0)
    return(FALSE);

    $words = array_count_values($words);
    $w = array();
    reset($words);
    while(list($word,$n) = each($words))
    {
    if(!$this->stopwords[$word])
    {
    $id = $this->db->get_keyword($word, FALSE);
    if($id)
    $out[] = $id;
    }
    }

    if(count($out) == 0)
    return(FALSE);

    // Get ranked list of documents that match
    $inlist = implode(",", array_values($out));
    $docs = $this->db->getsql("SELECT docid, sum(score) AS rank
    FROM ax_search
    WHERE keywordid IN ($inlist)
    GROUP BY docid
    ORDER BY rank DESC
    LIMIT 100");

    if(count($docs) == 0)
    return(FALSE);

    // Loadup the list of documents
    foreach($docs as $rec)
    {
    $d = $this->db->getsql("SELECT id,subject,content FROM ax_questions WHERE id='$rec[docid]'");
    if($d[0][id])
    {
    $x = new document();
    $x->id = $d[0][id];
    $x->title = stripslashes($d[0][subject]);
    $x->body = substr(stripslashes(str_replace(array("\n","\r")," ", $d[0][content])),0,128) . "...";
    $this->results[] = $x;
    }
    }

    return(TRUE);
    }


    // Index something existing in the document class
    function index()
    {
    if(!$this->document->id)
    return(FALSE);

    // Delete if this document was previously indexed
    $id = $this->document->id;
    $exists = $this->db->getsql("SELECT docid FROM ax_search WHERE docid='$id'");
    if($exists[0][docid] > 0)
    $this->db->getsql("DELETE FROM ax_search WHERE docid='$id'");

    // Parse the title and the body
    $this->words = array();
    $this->parse($this->document->title, $id, 1.5);
    $this->parse($this->document->body, $id, 1);

    if(count($this->words) > 0)
    {
    reset($this->words);
    while(list($keywordid,$score) = each($this->words))
    $this->db->getsql("INSERT INTO ax_search (keywordid, docid, score) VALUES ('$keywordid', '$id', '$score')");
    }

    return(TRUE);
    }


    // Parse a document and return its keyword properties
    // also kill stopwords, and store the document
    function parse($text="", $id=0, $punch=1)
    {
    $text = str_replace(array("\n", "\r"), " ", strtolower(trim($text)));
    $words = preg_split('/\W+/', $text, -1, PREG_SPLIT_NO_EMPTY);
    if(count($words) == 0)
    return(FALSE);

    // Grab the word count
    $words = array_count_values($words);

    // Store the document
    reset($words);
    while(list($word,$cnt) = each($words))
    {
    if($this->stopwords[$word])
    continue;

    $keywordid = $this->db->get_keyword($word, TRUE);
    $c = $cnt * $punch;
    $this->words[$keywordid] += $c;
    }

    return(TRUE);
    }

    }

    // The search document
    class document
    {
    var $id;
    var $title;
    var $body;
    }

    ?>