news 2026/6/1 4:23:02

PHP全文搜索引擎与索引优化

作者头像

张小明

前端开发工程师

1.2k 24
文章封面图
PHP全文搜索引擎与索引优化

PHP全文搜索引擎与索引优化

全文搜索是应用的核心功能。从数据库全文索引到Elasticsearch,PHP有多种搜索方案。今天说说PHP中全文搜索引擎的构建和索引优化。

先看MySQL全文索引的使用和优化。

```php
class FulltextSearch
{
private PDO $pdo;

public function __construct(PDO $pdo)
{
$this->pdo = $pdo;
}

public function search(string $keyword, int $page = 1, int $perPage = 20): array
{
$offset = ($page - 1) * $perPage;

$stmt = $this->pdo->prepare("
SELECT *, MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE) AS relevance
FROM articles
WHERE MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE)
ORDER BY relevance DESC
LIMIT :limit OFFSET :offset
");
$stmt->execute([
'keyword' => $keyword,
'limit' => $perPage,
'offset' => $offset,
]);

$results = $stmt->fetchAll();

$countStmt = $this->pdo->prepare("
SELECT COUNT(*) FROM articles
WHERE MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE)
");
$countStmt->execute(['keyword' => $keyword]);
$total = (int)$countStmt->fetchColumn();

return [
'data' => $results,
'total' => $total,
'page' => $page,
'per_page' => $perPage,
'total_pages' => ceil($total / $perPage),
];
}

public function searchWithHighlight(string $keyword): array
{
$stmt = $this->pdo->prepare("
SELECT *,
MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE) AS relevance
FROM articles
WHERE MATCH(title, content) AGAINST(:keyword IN BOOLEAN MODE)
ORDER BY relevance DESC
LIMIT 20
");
$stmt->execute(['keyword' => $keyword]);
$results = $stmt->fetchAll();

foreach ($results as &$row) {
$row['title_highlight'] = $this->highlight($row['title'], $keyword);
$row['content_highlight'] = $this->highlight(substr($row['content'], 0, 300), $keyword);
}

return $results;
}

private function highlight(string $text, string $keyword): string
{
$keywords = explode(' ', $keyword);
foreach ($keywords as $word) {
$text = preg_replace("/({$word})/iu", '$1', $text);
}
return $text;
}

public function suggest(string $prefix, int $limit = 10): array
{
$stmt = $this->pdo->prepare("
SELECT DISTINCT title FROM articles
WHERE title LIKE :prefix
LIMIT :limit
");
$stmt->execute(['prefix' => "{$prefix}%", 'limit' => $limit]);
return $stmt->fetchAll(PDO::FETCH_COLUMN);
}

public function rebuildIndex(): void
{
$this->pdo->exec("REPAIR TABLE articles");
$this->pdo->exec("OPTIMIZE TABLE articles");
}
}
?>

Elasticsearch PHP客户端的实现:

```php
class ElasticsearchClient
{
private string $host;
private string $index;

public function __construct(string $host = 'http://localhost:9200', string $index = 'app')
{
$this->host = rtrim($host, '/');
$this->index = $index;
}

public function indexDocument(string $id, array $document): void
{
$this->request('PUT', "/{$this->index}/_doc/{$id}", $document);
}

public function bulkIndex(array $documents): void
{
$body = '';
foreach ($documents as $id => $doc) {
$body .= json_encode(['index' => ['_id' => $id]]) . "\n";
$body .= json_encode($doc) . "\n";
}
$this->request('POST', "/{$this->index}/_bulk", $body, 'application/x-ndjson');
}

public function search(string $keyword, int $page = 1, int $perPage = 20): array
{
$offset = ($page - 1) * $perPage;

$query = [
'query' => [
'bool' => [
'should' => [
['match' => ['title' => ['query' => $keyword, 'boost' => 3]]],
['match' => ['content' => ['query' => $keyword, 'boost' => 1]]],
],
],
],
'from' => $offset,
'size' => $perPage,
'sort' => ['_score' => 'desc'],
];

$result = $this->request('GET', "/{$this->index}/_search", $query);

$hits = $result['hits']['hits'] ?? [];
$total = $result['hits']['total']['value'] ?? 0;

return [
'data' => $hits,
'total' => $total,
'page' => $page,
'per_page' => $perPage,
];
}

public function deleteDocument(string $id): void
{
$this->request('DELETE', "/{$this->index}/_doc/{$id}");
}

public function createIndex(array $mapping = []): void
{
$defaultMapping = [
'settings' => [
'number_of_shards' => 1,
'number_of_replicas' => 1,
],
'mappings' => [
'properties' => [
'title' => ['type' => 'text', 'analyzer' => 'standard'],
'content' => ['type' => 'text', 'analyzer' => 'standard'],
],
],
];

$mapping = array_merge_recursive($defaultMapping, $mapping);
$this->request('PUT', "/{$this->index}", $mapping);
}

private function request(string $method, string $path, mixed $body = null, string $contentType = 'application/json'): mixed
{
$ch = curl_init($this->host . $path);
$options = [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CUSTOMREQUEST => $method,
CURLOPT_TIMEOUT => 10,
];

if ($body !== null) {
$options[CURLOPT_POSTFIELDS] = is_string($body) ? $body : json_encode($body);
$options[CURLOPT_HTTPHEADER] = ['Content-Type: ' . $contentType];
}

curl_setopt_array($ch, $options);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

return json_decode($response, true) ?: [];
}
}
?>

搜索是用户体验的关键环节。MySQL全文索引适合中小规模数据,Elasticsearch适合海量数据的搜索。索引优化可以提高搜索速度,分词器影响搜索的准确性。搜索结果的相关性排序比简单的LIKE查询好得多,高亮显示匹配词可以提示用户为什么看到这个结果。

版权声明: 本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!
网站建设 2026/6/1 4:17:31

从切角到枝节:用HFSS一步步优化圆极化微带天线的轴比与匹配

从切角到枝节:用HFSS一步步优化圆极化微带天线的轴比与匹配在无线通信系统中,圆极化微带天线因其结构紧凑、易于集成和良好的极化特性而备受青睐。然而,设计一个同时满足轴比和阻抗匹配要求的天线并非易事。本文将带您深入探索从初始设计到最…

作者头像 李华
网站建设 2026/6/1 4:15:57

远程开发实战:在AutoDL云服务器上通过VNC运行COLMAP GUI图形界面

云端三维重建实战:AutoDL服务器VNC可视化COLMAP全流程指南当你在深夜赶论文时,实验室的台式机却因为连续72小时运行三维重建任务而发烫死机——这可能是每个计算机视觉研究者都经历过的噩梦。如今,云端GPU服务器让这一切成为历史,…

作者头像 李华