2013-05-18 81 views
1

希望確認我使用計算以下疾病的後驗概率的技術是正確的下列貝葉斯網絡http://spark-public.s3.amazonaws.com/bigdata/documents/HW6.pdf貝葉斯規則使用SQL

SELECT Dyspnoea.d, SUM(Dyspnoea.p*Either.p*Tuberculosis.p*Asia.p*p*Smoking.p*Bronchitis.p) 
    FROM Dyspnoea 
    INNER JOIN Either ON Dyspnoea.e = Either.e 
    INNER JOIN Tuberculosis ON Either.t = Tuberculosis.t 
    INNER JOIN Asia ON Tuberculosis.a = Asia.a 
    INNER JOIN LungCancer ON Either.l = LungCancer.l 
    INNER JOIN Bronchitis ON Dyspnoea.b = Bronchitis.b 
    INNER JOIN Smoking ON LungCancer.s = Smoking.s AND Bronchitis.s = Smoking.s 
    WHERE Asia.a = 1 AND Smoking.s = 0 
    GROUP BY Dyspnoea.d 

還是應該這樣,因爲有些計算事件相互獨立

SELECT Dyspnoea.d, SUM(((Dyspnoea.p*Either.p*Tuberculosis.p*Asia.p)+(Dyspnoea.p*Either.p*LungCancer.p*Smoking.p)) + (Dyspnoea.p*Bronchitis.b*Smoking.p)) 
    FROM Dyspnoea 
    INNER JOIN Either ON Dyspnoea.e = Either.e 
    INNER JOIN Tuberculosis ON Either.t = Tuberculosis.t 
    INNER JOIN Asia ON Tuberculosis.a = Asia.a 
    INNER JOIN LungCancer ON Either.l = LungCancer.l 
    INNER JOIN Bronchitis ON Dyspnoea.b = Bronchitis.b 
    INNER JOIN Smoking ON LungCancer.s = Smoking.s AND Bronchitis.s = Smoking.s 
    WHERE Asia.a = 1 AND Smoking.s = 0 
    GROUP BY Dyspnoea.d 
+0

http://pastebin.com/dVLF1cT5 包含更好的語法高亮 – MrX

回答

1

它在完整查詢的基礎上工作。

我用的表是:

CREATE TABLE `taba` (
    `A` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 

insert into `taba`(`A`,`P`) values ('Y','0.01'),('N','0.99'); 

CREATE TABLE `tabbs` (
    `B` char(1) DEFAULT NULL, 
    `S` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 

insert into `tabbs`(`B`,`S`,`P`) 
values ('Y','Y','0.60'),('Y','N','0.30'),('N','Y','0.40'),('N','N','0.70'); 

CREATE TABLE `tabdeb` (
    `D` char(1) DEFAULT NULL, 
    `E` char(1) DEFAULT NULL, 
    `B` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 

insert into `tabdeb`(`D`,`E`,`B`,`P`) 
values ('Y','Y','Y','0.90'), ('Y','Y','N','0.70'), ('Y','N','Y','0.80') 
    , ('Y','N','N','0.10'), ('N','Y','Y','0.10'), ('N','Y','N','0.30') 
    , ('N','N','Y','0.20'), ('N','N','N','0.90'); 

CREATE TABLE `tabelt` (
    `E` char(1) DEFAULT NULL, 
    `L` char(1) DEFAULT NULL, 
    `T` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 


insert into `tabelt`(`E`,`L`,`T`,`P`) 
values ('Y','Y','Y','1.00'), ('Y','Y','N','1.00'), ('Y','N','Y','1.00') 
    , ('Y','N','N','0.00'), ('N','Y','Y','0.00'), ('N','Y','N','0.00') 
    , ('N','N','Y','0.00'), ('N','N','N','1.00'); 

CREATE TABLE `tabls` (
    `L` char(1) DEFAULT NULL, 
    `S` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 


insert into `tabls` (`L`,`S`,`P`) 
values ('Y','Y','0.10'), ('Y','N','0.01'), ('N','Y','0.90') 
    , ('N','N','0.99'); 

CREATE TABLE `tabs` (
    `S` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 

insert into `tabs`(`S`,`P`) values ('Y','0.50'), ('N','0.50'); 


CREATE TABLE `tabta` (
    `T` char(1) DEFAULT NULL, 
    `A` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 


insert into `tabta`(`T`,`A`,`P`) 
values ('Y','Y','0.05'), ('Y','N','0.01') 
    , ('N','Y','0.95'), ('N','N','0.99'); 

CREATE TABLE `tabxe` (
    `X` char(1) DEFAULT NULL, 
    `E` char(1) DEFAULT NULL, 
    `P` decimal(4,2) DEFAULT NULL 
) ENGINE=MyISAM DEFAULT CHARSET=latin1; 

insert into `tabxe`(`X`,`E`,`P`) 
values ('Y','Y','0.98'), ('Y','N','0.05') 
    , ('N','Y','0.02'), ('N','N','0.95'); 

和查詢:

SELECT tabdeb.e 
    , SUM(tabdeb.p * tabeLT.p * TaBTA.p * taba.p 
      * tabls.p * |tabbs.p * tabs.P * tabxe.P) prob 
    FROM tabdeb 
INNER JOIN tabelt ON tabdeb.e = tabelt.e 
INNER JOIN tabta ON tabelt.t = tabta.t 
INNER JOIN taba ON tabta.a = taba.a 
INNER JOIN tabls ON tabelt.l = tabls.l 
INNER JOIN tabbs ON tabdeb.b = tabbs.b 
INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s 
INNER JOIN tabxe ON tabxe.E=tabelt.E 
WHERE tabta.A = 'N' 
    AND tabxe.x='Y' -- AND tabdeb.D='Y' 
GROUP BY tabdeb.e 

SELECT tabelt.t, SUM(tabdeb.p* 
tabeLT.p* 
TaBTA.p* 
taba.p* 
tabls.p* 
tabbs.p* 
tabs.P*tabxe.P 
) AS prob 
      FROM tabdeb 
    INNER JOIN tabelt ON tabdeb.e = tabelt.e 
    INNER JOIN tabta ON tabelt.t = tabta.t 
    INNER JOIN taba ON tabta.a = taba.a 
    INNER JOIN tabls ON tabelt.l = tabls.l 
    INNER JOIN tabbs ON tabdeb.b = tabbs.b 
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s 
    INNER JOIN tabxe ON tabxe.E=tabelt.E 
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y' 
    GROUP BY tabelt.t 

===================================================== 
===================================================== 
-- lungcancer 
SELECT tabelt.l, SUM(tabdeb.p* 
tabeLT.p* 
TaBTA.p* 
taba.p* 
tabls.p* 
tabbs.p* 
tabs.P*tabxe.P 
) AS prob 
      FROM tabdeb 
    INNER JOIN tabelt ON tabdeb.e = tabelt.e 
    INNER JOIN tabta ON tabelt.t = tabta.t 
    INNER JOIN taba ON tabta.a = taba.a 
    INNER JOIN tabls ON tabelt.l = tabls.l 
    INNER JOIN tabbs ON tabdeb.b = tabbs.b 
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s 
    INNER JOIN tabxe ON tabxe.E=tabelt.E 
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y' 
    GROUP BY tabelt.l 
===================================================== 
===================================================== 
-- Bronchitis 
SELECT tabdeb.b, SUM(tabdeb.p* 
tabeLT.p* 
TaBTA.p* 
taba.p* 
tabls.p* 
tabbs.p* 
tabs.P*tabxe.P 
) AS prob 
      FROM tabdeb 
    INNER JOIN tabelt ON tabdeb.e = tabelt.e 
    INNER JOIN tabta ON tabelt.t = tabta.t 
    INNER JOIN taba ON tabta.a = taba.a 
    INNER JOIN tabls ON tabelt.l = tabls.l 
    INNER JOIN tabbs ON tabdeb.b = tabbs.b 
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s 
    INNER JOIN tabxe ON tabxe.E=tabelt.E 
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y' 
    GROUP BY tabdeb.B 

======================================================= 
======================================================= 
-- dysomnia 
SELECT tabdeb.d, SUM(tabdeb.p* 
tabeLT.p* 
TaBTA.p* 
taba.p* 
tabls.p* 
tabbs.p* 
tabs.P*tabxe.P 
) prob 
      FROM tabdeb 
    INNER JOIN tabelt ON tabdeb.e = tabelt.e 
    INNER JOIN tabta ON tabelt.t = tabta.t 
    INNER JOIN taba ON tabta.a = taba.a 
    INNER JOIN tabls ON tabelt.l = tabls.l 
    INNER JOIN tabbs ON tabdeb.b = tabbs.b 
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s 
    INNER JOIN tabxe ON tabxe.E=tabelt.E 
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y' 
    GROUP BY tabdeb.d 
+0

可以做出更簡潔如果你使用NATURAL JOIN。 – michau