找回密码
 注册
搜索
热搜: 回贴
微赢网络技术论坛 门户 网络编程 PHP 查看内容

简单站内HTML文件搜索程序

2009-12-21 17:51| 发布者: admin| 查看: 43| 评论: 0|原作者: 云天河

  自己写来简单的搜索自己电脑上的技术资料的. 大概的思路就是寻找指定目录下的文本, 然后如果有与关键字相匹配的数据就把文件名返回, 最后把所有搜索结果显示出来. 显示结果类似于baidu/google, 呵呵, 这个是纯粹为了好玩.
  程序代码:
/**
* 文件: search.php
* 功能: 搜索指定目录下的HTML文件
* 创建: 2005-9-23
* 作者: heiyeluren
*/

/* 基本函数 */
//获取目录下文件函数
function getFile($dir)
{
$dp = opendir($dir);
$fileArr = array();
while (!false == $curFile = readdir($dp)) {
if ($curFile!="." && $curFile!=".." && $curFile!="") {
if (is_dir($curFile)) {
$fileArr = getFile($dir."/".$curFile);
} else {
$fileArr[] = $dir."/".$curFile;
}
}
}
return $fileArr;
}
//获取文件内容
function getFileContent($file)
{
if (!$fp = fopen($file, "r")) {
die("Cannot open file $file");
}
while ($text = fread($fp, 4096)) {
$fileContent .= $text;
}
return $fileContent;
}
//搜索指定文件
function searchText($file, $keyword)
{
$text = getFileContent($file);
if (preg_match("/$keyword/i", $text)) {
return true;
}
return false;
}
//搜索出文章的标题
function getFileTitle($file, $default="None subject")
{
$fileContent = getFileContent($file);
$sResult = preg_match("/.*<\/title>/i", $fileContent, $matchResult);<br> $title = preg_replace(array("/(<title>)/i","/(<\/title>)/i"), "", $matchResult[0]);<br> if (empty($title)) {<br> return $default;<br> } else {<br> return $title;<br> }<br>}<br>//获取文件描述信息<br>function getFileDescribe($file,$length=200, $default="None describe")<br>{<br> $metas = get_meta_tags($file);<br> if ($meta['description'] != "") {<br> return $metas['description'];<br> }<br> $fileContent = getFileContent($file);<br> preg_match("/(<body.*<\/body>)/is", $fileContent, $matchResult);<br> $pattern = array("/(<[^\x80-\xff]+>)/i","/(<input.*>)+/i", "/(<a.*>)+/i", "/(<img.*>)+/i", "/([<script.*>])+.*([<\/script>])+/i","/&/i","/"/i","/'/i", "/\s/");<br> $description = preg_replace($pattern, "", $matchResult[0]);<br> $description = mb_substr($description, 0, $length)." ...";<br> return $description;<br>}<br>//加亮搜索结果中的关键字<br>function highLightKeyword($text, $keyword, $color="#C60A00")<br>{<br> $newword = "<font color=$color>$keyword</font>";<br> $text = str_replace($keyword, $newword, $text);<br> return $text;<br>}<br>//获取文件大小(KB)<br>function getFileSize($file)<br>{<br> $filesize = intval(filesize($file)/1024)."K";<br> return $filesize;<br>}<br>//获取文件最后修改的时间<br>function getFileTime($file)<br>{<br> $filetime = date("Y-m-d", filemtime($file));<br> return $filetime;<br>}<br>//搜索目录下所有文件<br>function searchFile($dir, $keyword)<br>{<br> $sFile = getFile($dir);<br> if (count($sFile) <= 0) {<br> return false;<br> }<br> $sResult = array();<br> foreach ($sFile as $file) {<br> if (searchText($file, $keyword)) {<br> $sResult[] = $file;<br> }<br> }<br> if (count($sResult) <= 0) {<br> return false;<br> } else {<br> return $sResult;<br> }<br>}<br><br>/* 测试代码 */<br>//指定要搜索的目录<br>$dir = "./php_Linux";<br>//要搜索的关键字<br>$keyword = "sendmail";<br>$fileArr = searchFile($dir, $keyword);<br>$searchSum = count($fileArr);<br>echo "搜索关键字: <b>$keyword</b>   搜索目录: <b>$dir</b>   搜索结果: <b>$searchSum</b><br><hr size=1><br>";<br>if ($searchSum <= 0) { <br> echo "没有搜索到任何结果";<br>} else {<br> foreach ($fileArr as $file) {<br> echo "<a href='$file' target='_blank'>". highLightKeyword(getFileTitle($file), $keyword) .<br> "</a> - ".getFileSize($file)." ". getFileTime($file) .<br> "<br>\n<font size=2>".highLightKeyword(getFileDescribe($file), $keyword) .<br> "</font><br><br>";<br> }<br>}<br>?><br>  完全可以用在自己已经生成静态内容的的搜索, 但是程序效率不高. 如果能够适当的加上 索引/缓存 等机制的话, 我想程序会有趣很多. <br></td></tr></table> <!--[diy=diycontentbottom]--><div id="diycontentbottom" class="area"></div><!--[/diy]--> <script src="data/cache/home.js?R1y" type="text/javascript"></script> <div id="click_div"><table cellpadding="0" cellspacing="0" class="atd"> <tr></tr> </table> <script type="text/javascript"> function errorhandle_clickhandle(message, values) { if(values['id']) { showCreditPrompt(); show_click(values['idtype'], values['id'], values['clickid']); } } </script> </div> <!--[diy=diycontentclickbottom]--><div id="diycontentclickbottom" class="area"></div><!--[/diy]--> </div> <div class="o cl ptm pbm"> <a href="https://bbs.weiying.cn/home.php?mod=spacecp&ac=favorite&type=article&id=23542&handlekey=favoritearticlehk_23542" id="a_favorite" onclick="showWindow(this.id, this.href, 'get', 0);" class="oshr ofav">收藏</a> <a href="https://bbs.weiying.cn/home.php?mod=spacecp&ac=share&type=article&id=23542&handlekey=sharearticlehk_23542" id="a_share" onclick="showWindow(this.id, this.href, 'get', 0);" class="oshr">分享</a> <a href="misc.php?mod=invite&action=article&id=23542" id="a_invite" onclick="showWindow('invite', this.href, 'get', 0);" class="oshr oivt">邀请</a> </div> </div> <!--[diy=diycontentrelatetop]--><div id="diycontentrelatetop" class="area"></div><!--[/diy]--> <!--[diy=diycontentrelate]--><div id="diycontentrelate" class="area"></div><!--[/diy]--> <div id="comment" class="bm"> <div class="bm_h cl"> <h3>最新评论</h3> </div> <div id="comment_ul" class="bm_c"><form id="cform" name="cform" action="portal.php?mod=portalcp&ac=comment" method="post" autocomplete="off"> <div class="tedt"> <div class="area"> <textarea name="message" rows="3" class="pt" id="message" onkeydown="ctrlEnter(event, 'commentsubmit_btn');"></textarea> </div> </div> <input type="hidden" name="portal_referer" value="portal.php?mod=view&aid=23542#comment"> <input type="hidden" name="referer" value="portal.php?mod=view&aid=23542#comment" /> <input type="hidden" name="id" value="0" /> <input type="hidden" name="idtype" value="" /> <input type="hidden" name="aid" value="23542"> <input type="hidden" name="formhash" value="9552aab3"> <input type="hidden" name="replysubmit" value="true"> <input type="hidden" name="commentsubmit" value="true" /> <p class="ptn"><button type="submit" name="commentsubmit_btn" id="commentsubmit_btn" value="true" class="pn"><strong>评论</strong></button></p> </form> </div> </div> <!--[diy=diycontentcomment]--><div id="diycontentcomment" class="area"></div><!--[/diy]--> </div> <div class="sd pph"> <div class="drag"> <!--[diy=diyrighttop]--><div id="diyrighttop" class="area"></div><!--[/diy]--> </div> <div class="bm"> <div class="bm_h cl"> <h2>相关分类</h2> </div> <div class="bm_c"> <ul class="xl xl2 cl"><li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=38">ASP/.NET</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=59">PHP</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=60">JSP/Java</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=61">CGI/perl</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=62">VB/.NET</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=63">C/C++</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=64">Delphi</a></li> <li><a href="https://bbs.weiying.cn/portal.php?mod=list&catid=65">Xml</a></li> </ul> </div> </div> <div class="drag"> <!--[diy=diy2]--><div id="diy2" class="area"></div><!--[/diy]--> </div> </div> </div> <div class="wp mtn"> <!--[diy=diy3]--><div id="diy3" class="area"></div><!--[/diy]--> </div> <input type="hidden" id="portalview" value="1"> </div> <div id="ft" class="wp cl"> <div id="flk" class="y"> <p> <a href="//wpa.qq.com/msgrd?v=3&uin=8828254&site=微赢网络技术论坛&menu=yes&from=discuz" target="_blank" title="QQ"><img src="static/image/common/site_qq.jpg" alt="QQ" /></a><span class="pipe">|</span><a href="https://bbs.weiying.cn/forum.php?mod=misc&action=showdarkroom" >小黑屋</a><span class="pipe">|</span><a href="topic-sitemap.html" target="_blank" >最新主题</a><span class="pipe">|</span><a href="https://bbs.weiying.cn/forum.php?showmobile=yes" >手机版</a><span class="pipe">|</span><strong><a href="http://bbs.weiying.cn" target="_blank">微赢网络技术论坛</a></strong> ( <a href="https://beian.miit.gov.cn/" target="_blank">苏ICP备08020429号</a> )<script> var _hmt = _hmt || []; (function() { var hm = document.createElement("script"); hm.src = "https://hm.baidu.com/hm.js?a66c064273122db3a05de72f6cd924eb"; var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(hm, s); })(); </script></p> <p class="xs0"> GMT+8, 2024-9-29 23:37<span id="debuginfo"> , Processed in 0.156589 second(s), 12 queries , Gzip On, MemCache On. </span> </p> </div> <div id="frt"> <p>Powered by <strong><a href="https://www.discuz.vip/" target="_blank">Discuz!</a></strong> <em>X3.5</em></p> <p class="xs0">© 2001-2023 <a href="https://code.dismall.com/" target="_blank">Discuz! Team</a>.</p> </div></div> <script src="home.php?mod=misc&ac=sendmail&rand=1727624232" type="text/javascript"></script> <div id="scrolltop"> <span hidefocus="true"><a title="返回顶部" onclick="window.scrollTo('0','0')" class="scrolltopa" ><b>返回顶部</b></a></span> </div> <script type="text/javascript">_attachEvent(window, 'scroll', function () { showTopLink(); });checkBlind();</script> </body> </html>