$this->extractNickname($chatText), 'city' => $this->extractCity($chatText), 'district' => $this->extractDistrict($chatText), 'address' => $this->extractAddress($chatText), 'phone' => $this->extractPhone($chatText), 'remark' => $this->extractRemark($chatText), 'services' => $this->extractServices($chatText, $serviceTypes), ]; } protected function extractNickname(string $text): ?string { if (preg_match('/^(.*?)\s*-->/u', $text, $match)) { return trim($match[1]); } return null; } protected function extractCity(string $text): ?string { if (preg_match('/(北京|上海|广州|深圳|武汉|成都|重庆|杭州|南京|天津|西安|苏州|郑州|长沙|青岛|合肥|福州|厦门|南昌|昆明|大连|宁波|无锡|哈尔滨|长春|石家庄|南宁|贵阳|兰州|呼和浩特|乌鲁木齐)/u', $text, $match)) { return $match[1]; } return null; } protected function extractDistrict(string $text): ?string { if (preg_match('/([\p{Han}]{1,10}区)/u', $text, $match)) { return $match[1]; } return null; } protected function extractAddress(string $text): ?string { if (preg_match('/(湖北省|四川省|北京市|上海市|重庆市|[\p{Han}]+省)?[\p{Han}]+市\s*[\p{Han}]+区.*?(\d+栋.*?室)/u', $text, $match)) { return $match[0]; } return null; } protected function extractPhone(string $text): ?string { if (preg_match('/1[3-9]\d{9}/', $text, $match)) { return $match[0]; } return null; } protected function extractRemark(string $text): ?string { if (preg_match_all('/https?:\/\/[^\s]+/i', $text, $matches)) { return implode(', ', $matches[0]); } // 其他软件/售后/推广语也可加关键词检测 if (str_contains($text, '软件下载') || str_contains($text, '自动发货')) { return '可能包含软件下载或推广信息'; } return null; } protected function extractServices(string $text, array $serviceTypes): array { $cleaned = array_filter(array_map('trim', $serviceTypes)); usort($cleaned, fn($a, $b) => mb_strlen($b, 'UTF-8') - mb_strlen($a, 'UTF-8')); $matched = []; foreach ($cleaned as $service) { if (mb_stripos($text, $service) !== false) { $matched[] = $service; } } return $matched; } }