在PostgreSQL:
create TABLE tbl (c TEXT);
INSERT INTO tbl(c)
VALUES ('Max taps his pencil')
,('Max raps his pencil')
,('Max raps his drums')
,('Max raps his drums pencil')
;
SELECT nr, ARRAY_AGG(DISTINCT elem) FROM (
SELECT
elem,
row_number()
OVER (PARTITION BY id) AS nr
FROM (SELECT
id,
regexp_split_to_table(c, ' ') AS elem
FROM tbl) x
) a GROUP BY nr;
返回:
1,{Max}
2,{raps,taps}
3,{his}
4,{drums,pencil}
5,{pencil}
加入結果爲答案
SELECT string_agg(
CASE WHEN 2 > array_length(w,1) THEN w[1]::TEXT
ELSE '{'||array_to_string(w,'|')||'}'
END
,' ') FROM (SELECT nr, ARRAY_AGG(DISTINCT elem) as w FROM (
SELECT
elem,
row_number()
OVER (PARTITION BY id) AS nr
FROM (SELECT
id,
regexp_split_to_table(c, ' ') AS elem
FROM tbl) x
) a GROUP BY nr ORDER BY nr
) b
;
Retuns:Max {raps|taps} his {drums|pencil} pencil
PHP的解決方案:
/*Read this from database*/
$in = array('Max taps his pencil'
,'Max raps his pencil'
,'Max raps his drums'
,'Max raps his drums pencil');
$statWordsAtIndex = [];
$addWordAtIndex = function ($word,$index) use (&$statWordsAtIndex) {
if (!array_key_exists($index,$statWordsAtIndex)) {
$statWordsAtIndex[$index] = [$word];
}
else if (!in_array($word,$statWordsAtIndex[$index])) {
$statWordsAtIndex[$index][] = $word;
}
};
foreach ($in as $sid => $sentence) {
$words = explode(' ',$sentence);
foreach ($words as $pos => $word) {
$addWordAtIndex($word,$pos);
}
}
foreach ($statWordsAtIndex as $words) {
if (2 > count($words)) {
echo $words[0],' ';
}
else {
echo '{',implode('|',$words),'} ';
}
}
echo "\n";
以及 - 在第一步驟將被創建*所有* 10368次的結果(這是微不足道的),第二個是餵養它們在一個複雜的模式識別算法(這是遠的SO範圍之外) –