我對這個問題的邏輯是:提取所有單詞並數它們!
因此,創建一個表像你的存儲數據:
CREATE TABLE `tbltest` (
`Rev_id` int(11) NOT NULL AUTO_INCREMENT,
`place_id` int(11) DEFAULT NULL,
`Stars` int(11) DEFAULT NULL,
`Category` varchar(45) DEFAULT NULL,
`Text` varchar(255) DEFAULT NULL,
PRIMARY KEY (`Rev_id`),
UNIQUE KEY `id_UNIQUE` (`Rev_id`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8;
和創造字表:
CREATE TABLE `counting` (
`word` varchar(45) NOT NULL,
`counts` int(11) DEFAULT NULL,
PRIMARY KEY (`word`),
UNIQUE KEY `word_UNIQUE` (`word`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
現在,創建MySQL Stored Procedure的分裂句和計數的話:
drop procedure if exists sentence_words;
delimiter #
create procedure sentence_words(IN Cat VARCHAR(45))
begin
declare w_max int unsigned default 1;
declare w_counter int unsigned default 0;
declare done int unsigned default 0;
declare sentence varchar(255) default null;
declare cur cursor for select `text` from `tbltest` where `Category` = Cat;
declare continue handler for not found set done=1;
set done=0;
open cur;
myloop: loop
fetch cur into sentence;
if done = 1 then leave myloop; end if;
-- refine sentence!
set sentence = replace(replace(replace(replace(
sentence
,'.',' '),'!',' '),',',' '),';',' ');
set sentence = replace(trim(sentence),' ',' ');
set w_max = length(sentence)-length(replace(sentence,' ',''))+1;
start transaction;
while w_counter < w_max do
insert into `counting`(counts,word) values
(1, substring_index(substring_index(
sentence,' ',w_counter+1) ,' ',-1)
)
ON DUPLICATE KEY UPDATE counts=counts+1;
set w_counter=w_counter+1;
end while;
commit;
end loop;
close cur;
end #
delimiter ;
最後,您可以調用該程序並在中查找單詞和計數表。如果您需要將每個類別的單詞計數分開,請在每個類別的調用過程之前記住truncate
或備份counting
表。
truncate `counting`;
call sentence_words('Bar');
select * from `counting` order by counts desc; -- ? where length(word)>2
-- words | counts --
'audience', '1'
'bad', '1'
'place', '1'
'Poor', '1'
[使用SQL來確定文本字段的字數統計]的可能的複製(https://stackoverflow.com/questions/748276/using-sql-to-determine-word-count-stats-of -a-text-field) – pilsetnieks
這不一樣 - 我對每個文本的單詞數量不感興趣,但是每個單詞出現的總次數是一樣的。 – Dimgold
@MohaMad你能詳細說一下嗎? – Dimgold