344 lines
12 KiB
PHP
344 lines
12 KiB
PHP
<?php
|
||
namespace app\admin\addresmart;
|
||
use app\admin\model\Item;
|
||
use function fast\e;
|
||
|
||
class Address
|
||
{
|
||
/*
|
||
** 智能解析
|
||
*/
|
||
public static function smart($string, $user = true)
|
||
{
|
||
$items = new Item();
|
||
$array = $items->getAll();
|
||
$name_items = [];
|
||
$name_items_map = [];
|
||
foreach ($array as $item){
|
||
$name_items [] = $item->title;
|
||
$name_items_map [$item->title] = $item->id;
|
||
}
|
||
|
||
$titles = Item::where('status',1)->column('title');
|
||
$type = self::extractServiceTypes($string,$titles)[0] ?? '';
|
||
|
||
|
||
$string = str_replace($type,'',$string);
|
||
if ($user) {
|
||
$decompose = self::decompose($string);
|
||
$re = $decompose;
|
||
} else {
|
||
$re['addr'] = $string;
|
||
}
|
||
|
||
|
||
$fuzz = self::fuzz($re['addr']);
|
||
$parse = self::parse($fuzz['a1'], $fuzz['a2'], $fuzz['a3']);
|
||
|
||
$re['province'] = $parse['province'] ?? '';
|
||
$re['city'] = $parse['city'] ?? '';
|
||
$re['region'] = $parse['region'] ?? '';
|
||
$re['item'] = [
|
||
'id'=> $name_items_map[$type] ?? 0,
|
||
'item' => $type ?? ''
|
||
];
|
||
|
||
$re['street'] = ($fuzz['street']) ?: '';
|
||
$re['street'] = str_replace([$re['region'], $re['city'], $re['province']], ['', '', ''], $re['street']);
|
||
|
||
return $re;
|
||
}
|
||
public static function extractChineseWords($str) {
|
||
// 只保留中文
|
||
preg_match_all('/\p{Han}+/u', $str, $matches);
|
||
return $matches[0] ?? []; // 返回拆分后的数组
|
||
}
|
||
public static function findMostSimilar($input, $array) {
|
||
$words = self::extractChineseWords($input); // 拆分成多个部分
|
||
$finalScores = [];
|
||
|
||
foreach ($words as $word) {
|
||
$similarities = [];
|
||
|
||
foreach ($array as $item) {
|
||
similar_text($word, $item, $percent);
|
||
$similarities[$item] = max($percent, $similarities[$item] ?? 0);
|
||
}
|
||
|
||
arsort($similarities); // 按相似度降序
|
||
$bestMatch = array_key_first($similarities); // 取最高匹配项
|
||
$finalScores[$bestMatch.'__'.$word] = $similarities[$bestMatch];
|
||
}
|
||
arsort($finalScores); // 计算整体匹配度,选择最高的
|
||
if ($finalScores[array_key_first($finalScores)] > 51){
|
||
return array_key_first($finalScores); // 返回最终匹配结果
|
||
}else{
|
||
return '';
|
||
}
|
||
}
|
||
|
||
|
||
/*
|
||
** 分离手机号(座机),身份证号,姓名等用户信息
|
||
*/
|
||
public static function decompose($string)
|
||
{
|
||
|
||
$compose = array();
|
||
|
||
$search = array('收货地址', '详细地址', '地址', '收货人', '收件人', '收货', '所在地区', '邮编', '电话', '手机号码','身份证号码', '身份证号', '身份证', ':', ':', ';', ';', ',', ',', '。');
|
||
$replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
|
||
$string = str_replace($search, $replace, $string);
|
||
|
||
$string = preg_replace('/\s{1,}/', ' ', $string);
|
||
|
||
$string = preg_replace('/0?(\d{3})-(\d{4})-(\d{4})([-_]\d{1,})/', '$1$2$3$4', $string);
|
||
|
||
preg_match('/\d{18}|\d{17}X/i', $string, $match);
|
||
if ($match && $match[0]) {
|
||
$compose['idn'] = strtoupper($match[0]);
|
||
$string = str_replace($match[0], '', $string);
|
||
}
|
||
|
||
preg_match('/\d{7,11}[\-_]\d{2,6}|\d{7,11}|\d{3,4}-\d{6,8}/', $string, $match);
|
||
if ($match && $match[0]) {
|
||
$compose['mobile'] = $match[0];
|
||
$string = str_replace($match[0], '', $string);
|
||
}
|
||
|
||
preg_match('/\d{6}/', $string, $match);
|
||
if ($match && $match[0]) {
|
||
$compose['postcode'] = $match[0];
|
||
$string = str_replace($match[0], '', $string);
|
||
}
|
||
|
||
$string = trim(preg_replace('/ {2,}/', ' ', $string));
|
||
|
||
$split_arr = explode(' ', $string);
|
||
if (count($split_arr) > 1) {
|
||
$compose['name'] = $split_arr[0];
|
||
foreach ($split_arr as $value) {
|
||
if (strlen($value) < strlen($compose['name'])) {
|
||
$compose['name'] = $value;
|
||
}
|
||
}
|
||
$string = trim(str_replace($compose['name'], '', $string));
|
||
}
|
||
|
||
$compose['addr'] = $string;
|
||
|
||
return $compose;
|
||
}
|
||
|
||
/*
|
||
** 根据统计规律分析出二三级地址
|
||
*/
|
||
public static function fuzz($addr)
|
||
{
|
||
$addr_origin = $addr;
|
||
$addr = str_replace([' ', ','], ['', ''], $addr);
|
||
$addr = str_replace('自治区', '省', $addr);
|
||
$addr = str_replace('自治州', '州', $addr);
|
||
|
||
$addr = str_replace('小区', '', $addr);
|
||
$addr = str_replace('校区', '', $addr);
|
||
|
||
$a1 = '';
|
||
$a2 = '';
|
||
$a3 = '';
|
||
$street = '';
|
||
|
||
if (mb_strpos($addr, '县') !== false && mb_strpos($addr, '县') <= floor((mb_strlen($addr) / 3) * 2)
|
||
|| (mb_strpos($addr, '区') !== false && mb_strpos($addr, '区') <= floor((mb_strlen($addr) / 3) * 2))
|
||
|| mb_strpos($addr, '旗') !== false && mb_strpos($addr, '旗') <= floor((mb_strlen($addr) / 3) * 2)) {
|
||
|
||
if (mb_strstr($addr, '旗')) {
|
||
$deep3_keyword_pos = mb_strpos($addr, '旗');
|
||
$a3 = mb_substr($addr, $deep3_keyword_pos - 1, 2);
|
||
}
|
||
if (mb_strstr($addr, '区')) {
|
||
$deep3_keyword_pos = mb_strpos($addr, '区');
|
||
|
||
if (mb_strstr($addr, '市')) {
|
||
$city_pos = mb_strpos($addr, '市');
|
||
$zone_pos = mb_strpos($addr, '区');
|
||
$a3 = mb_substr($addr, $city_pos + 1, $zone_pos - $city_pos);
|
||
} else {
|
||
$a3 = mb_substr($addr, $deep3_keyword_pos - 2, 3);
|
||
}
|
||
}
|
||
if (mb_strstr($addr, '县')) {
|
||
$deep3_keyword_pos = mb_strpos($addr, '县');
|
||
|
||
if (mb_strstr($addr, '市')) {
|
||
$city_pos = mb_strpos($addr, '市');
|
||
$zone_pos = mb_strpos($addr, '县');
|
||
$a3 = mb_substr($addr, $city_pos + 1, $zone_pos - $city_pos);
|
||
} else {
|
||
|
||
if (mb_strstr($addr, '自治县')) {
|
||
$a3 = mb_substr($addr, $deep3_keyword_pos - 6, 7);
|
||
if (in_array(mb_substr($a3, 0, 1), ['省', '市', '州'])) {
|
||
$a3 = mb_substr($a3, 1);
|
||
}
|
||
} else {
|
||
$a3 = mb_substr($addr, $deep3_keyword_pos - 2, 3);
|
||
}
|
||
}
|
||
}
|
||
$street = mb_substr($addr_origin, $deep3_keyword_pos + 1);
|
||
} else {
|
||
if (mb_strripos($addr, '市')) {
|
||
|
||
if (mb_substr_count($addr, '市') == 1) {
|
||
$deep3_keyword_pos = mb_strripos($addr, '市');
|
||
$a3 = mb_substr($addr, $deep3_keyword_pos - 2, 3);
|
||
$street = mb_substr($addr_origin, $deep3_keyword_pos + 1);
|
||
} else if (mb_substr_count($addr, '市') >= 2) {
|
||
$deep3_keyword_pos = mb_strripos($addr, '市');
|
||
$a3 = mb_substr($addr, $deep3_keyword_pos - 2, 3);
|
||
$street = mb_substr($addr_origin, $deep3_keyword_pos + 1);
|
||
}
|
||
} else {
|
||
|
||
$a3 = '';
|
||
$street = $addr;
|
||
}
|
||
}
|
||
|
||
if (mb_strpos($addr, '市') || mb_strstr($addr, '盟') || mb_strstr($addr, '州')) {
|
||
if ($tmp_pos = mb_strpos($addr, '市')) {
|
||
$a2 = mb_substr($addr, $tmp_pos - 2, 3);
|
||
} else if ($tmp_pos = mb_strpos($addr, '盟')) {
|
||
$a2 = mb_substr($addr, $tmp_pos - 2, 3);
|
||
} else if ($tmp_pos = mb_strpos($addr, '州')) {
|
||
|
||
if ($tmp_pos = mb_strpos($addr, '自治州')) {
|
||
$a2 = mb_substr($addr, $tmp_pos - 4, 5);
|
||
} else {
|
||
$a2 = mb_substr($addr, $tmp_pos - 2, 3);
|
||
}
|
||
}
|
||
} else {
|
||
$a2 = '';
|
||
}
|
||
$a2;
|
||
|
||
$r = array(
|
||
'a1' => $a1,
|
||
'a2' => $a2,
|
||
'a3' => $a3,
|
||
'street' => $street,
|
||
);
|
||
|
||
return $r;
|
||
}
|
||
|
||
/*
|
||
** 智能解析出省市区+街道地址
|
||
*/
|
||
public static function parse($a1, $a2, $a3)
|
||
{
|
||
require 'data/a3.php';
|
||
require 'data/a2.php';
|
||
require 'data/a1.php';
|
||
|
||
$r = array();
|
||
|
||
if ($a3 != '') {
|
||
|
||
$area3_matches = array();
|
||
foreach ($a3_data as $id => $v) {
|
||
if (mb_strpos($v['name'], $a3) !== false) {
|
||
$area3_matches[$id] = $v;
|
||
}
|
||
}
|
||
|
||
if ($area3_matches && count($area3_matches) > 1) {
|
||
if ($a2) {
|
||
foreach ($a2_data as $id => $v) {
|
||
if (mb_strpos($v['name'], $a2) !== false) {
|
||
$area2_matches[$id] = $v;
|
||
}
|
||
}
|
||
|
||
if ($area2_matches) {
|
||
foreach ($area3_matches as $id => $v) {
|
||
|
||
if (isset($area2_matches[$v['pid']])) {
|
||
$r['city'] = $area2_matches[$v['pid']]['name'];
|
||
$r['region'] = $v['name'];
|
||
$sheng_id = $area2_matches[$v['pid']]['pid'];
|
||
$r['province'] = $a1_data[$sheng_id]['name'];
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
|
||
$r['province'] = '';
|
||
$r['city'] = '';
|
||
$r['region'] = $a3;
|
||
}
|
||
} else if ($area3_matches && count($area3_matches) == 1) {
|
||
foreach ($area3_matches as $id => $v) {
|
||
$city_id = $v['pid'];
|
||
$r['region'] = $v['name'];
|
||
}
|
||
$city = $a2_data[$city_id];
|
||
$province = $a1_data[$city['pid']];
|
||
|
||
$r['province'] = $province['name'];
|
||
$r['city'] = $city['name'];
|
||
} else if (empty($area3_matches) && $a2 == $a3) {
|
||
|
||
foreach ($a2_data as $id => $v) {
|
||
if (mb_strpos($v['name'], $a2) !== false) {
|
||
$area2_matches[$id] = $v;
|
||
$sheng_id = $v['pid'];
|
||
$r['city'] = $v['name'];
|
||
}
|
||
}
|
||
|
||
$r['province'] = $a1_data[$sheng_id]['name'];
|
||
$r['region'] = '';
|
||
}
|
||
}
|
||
|
||
return $r;
|
||
}
|
||
|
||
|
||
/**
|
||
* 从聊天内容中提取匹配的服务类型
|
||
*
|
||
* @param string $chatText 聊天内容
|
||
* @param array $serviceTypes 服务类型数组
|
||
* @param bool $returnAll 是否返回全部匹配,false 时只返回第一个匹配
|
||
* @return array|string|null 匹配的服务类型(数组或单个字符串)
|
||
*/
|
||
static function extractServiceTypes(string $chatText, array $serviceTypes, bool $returnAll = true): array|string|null
|
||
{
|
||
// 去重 + 去空
|
||
$cleaned = array_filter(array_map('trim', $serviceTypes));
|
||
|
||
// 优先匹配更长的词
|
||
usort($cleaned, fn($a, $b) => mb_strlen($b, 'UTF-8') - mb_strlen($a, 'UTF-8'));
|
||
|
||
$matched = [];
|
||
|
||
foreach ($cleaned as $service) {
|
||
if (mb_stripos($chatText, $service) !== false) {
|
||
if ($returnAll) {
|
||
$matched[] = $service;
|
||
} else {
|
||
return $service;
|
||
}
|
||
}
|
||
}
|
||
|
||
return $returnAll ? $matched : null;
|
||
}
|
||
|
||
|
||
}
|