900字范文 > 机器学习实战_朴素贝叶斯分类器_预测疾病

机器学习实战_朴素贝叶斯分类器_预测疾病

时间：2021-03-21 13:09:55

import numpy as np"""函数说明：创建实验样本Parameters：无Returns：postingList - 实验样本切分的词条classList - 类别标签Modify：-03-23"""def loadDataSet():postingList = [["打喷嚏", "护士"],["打喷嚏", "农夫"],["头痛", "建筑工人"],["头痛", "建筑工人"],["打喷嚏", "教师"],["头痛", "教师"]]classList = ["感冒", "过敏", "脑震荡", "感冒", "感冒", "脑震荡"]print("词条集合：\n", np.array(postingList))print("标签集合：\n", classList)return postingList, classList"""函数说明：生成“词汇表”Parameters：postingList - 实验样本划分的词条classList - 标签Returns：vocabulary - 词汇表labels - 标签表Modify:-03-23"""def createVocabulary(postingList, classList):vocabulary = []for words in postingList:for word in words:if word not in vocabulary:vocabulary.append(word)print("字典：\n", vocabulary)labels = []for lab in classList:if lab not in labels:labels.append(lab)print("标签表：\n", labels)return vocabulary, labels"""函数说明：词条向量化、标签向量化Parameters：postingList - 划分好的词条集合classList - 便签集合vocabulary - 词汇表labels - 标签表Returns：postingVec - 向量化词条classVec - 向量化标签Modify："""def vector(postingList, classList, vocabulary, labels):# 词条向量化postingVecSet = [] # 词条向量的集合for words in postingList:postingVec = [0] * len(vocabulary)for i in range(len(words)):if words[i] in vocabulary:postingVec[vocabulary.index(words[i])] = 1postingVecSet.append(postingVec)# 标签向量化classVec = []for i in range(len(classList)):classVec.append(labels.index(classList[i]))print("向量化词条；\n", np.array(postingVecSet))print("向量化标签：\n", classVec)return postingVecSet, classVec"""函数说明：朴素贝叶斯分类器训练器Parameters：postingVecSet - 向量化词条classVec - 向量化标签Returns：P0Vector - [ P( 感冒 |症状、职业) ]P1Vector - [ P( 过敏 |症状、职业) ]P2Vector - [ P(脑震荡|症状、职业) ]PA - P( 感冒 )PB - P( 过敏 )PC - P(脑震荡)Modify：-03-23"""def train(postingVecSet, classVec):PA = 0PB = 0PC = 0for disease in classVec:if disease == 0: # 统计感冒数PA += 1elif disease == 1: # 统计过敏数PB += 1else: # 统计脑震荡数PC += 1# 计算 P(感冒) P(过敏) P(脑震荡)PA = PA / float(len(classVec)) # P(感冒)PB = PB / float(len(classVec)) # P(过敏)PC = PC / float(len(classVec)) # P(脑震荡)print("感冒概率：", PA, "过敏概率：", PB, "脑震荡概率：", PC)P0Vector = np.ones(len(postingVecSet[0]))P1Vector = np.ones(len(postingVecSet[0]))P2Vector = np.ones(len(postingVecSet[0]))P0Demon = 2.0P1Demon = 2.0P2Demon = 2.0for i in range(len(classVec)):if classVec[i] == 0:P0Vector += postingVecSet[i]P0Demon += sum(postingVecSet[i])elif classVec[i] == 1:P1Vector += postingVecSet[i]P1Demon += sum(postingVecSet[i])else:P2Vector += postingVecSet[i]P2Demon += sum(postingVecSet[i])P0Vector = np.log(P0Vector / P0Demon)P1Vector = np.log(P1Vector / P1Demon)P2Vector = np.log(P2Vector / P2Demon)print("P0Vector:", P0Vector)print("P1Vector:", P1Vector)print("P2Vector:", P2Vector)return PA, PB, PC, P0Vector, P1Vector, P2Vector"""函数说明：使用分类器进行分类Parameters：test - 测试用例（向量）P0Vector - [ P( 感冒 |症状、职业) ]P1Vector - [ P( 过敏 |症状、职业) ]P2Vector - [ P(脑震荡|症状、职业) ]PA - P( 感冒 )PB - P( 过敏 )PC - P(脑震荡)Returns：0 - 感冒1 - 过敏2 - 脑震荡Modify：-03-23"""def classification(test, P0Vector, P1Vector, P2Vector, PA, PB, PC):PA = sum(P0Vector * test) + np.log(PA)PB = sum(P1Vector * test) + np.log(PB)PC = sum(P2Vector * test) + np.log(PC)print("感冒的概率：", PA, "过敏的概率：", PB, "脑震荡的概率：", PC)if max(PA, PB, PC) == PA:print("最可能的疾病：感冒")elif max(PA, PB, PC) == PB:print("最可能的疾病：过敏")else:print("最可能的疾病：脑震荡")return max(PA, PB, PC)if __name__ == '__main__':postingList, classList = loadDataSet()vocabulary, labels = createVocabulary(postingList, classList)postingVecSet, classVec = vector(postingList, classList, vocabulary, labels)PA, PB, PC, P0Vector, P1Vector, P2Vector = train(postingVecSet, classVec)for zhengZhuang in ['打喷嚏', '头痛']:for zhiYe in ['护士', '农夫', '建筑工人', '教师']:test = np.zeros(len(vocabulary))test[vocabulary.index(zhengZhuang)] = 1test[vocabulary.index(zhiYe)] = 1print("症状：", zhengZhuang, "职业：", zhiYe)classification(test, P0Vector, P1Vector, P2Vector, PA, PB, PC)print("\n")

运行结果：

D:\PyCharm\Projects\MachineLearning\venv\Scripts\python.exe D:/PyCharm/Projects/MachineLearning/BeiYesi_YiYuan.py词条集合：[['打喷嚏' '护士']['打喷嚏' '农夫']['头痛' '建筑工人']['头痛' '建筑工人']['打喷嚏' '教师']['头痛' '教师']]标签集合：['感冒', '过敏', '脑震荡', '感冒', '感冒', '脑震荡']字典：['打喷嚏', '护士', '农夫', '头痛', '建筑工人', '教师']标签表：['感冒', '过敏', '脑震荡']向量化词条；[[1 1 0 0 0 0][1 0 1 0 0 0][0 0 0 1 1 0][0 0 0 1 1 0][1 0 0 0 0 1][0 0 0 1 0 1]]向量化标签：[0, 1, 2, 0, 0, 2]感冒概率： 0.5 过敏概率： 0.16666666666666666 脑震荡概率： 0.3333333333333333P0Vector: [-0.98082925 -1.38629436 -2.07944154 -1.38629436 -1.38629436 -1.38629436]P1Vector: [-0.69314718 -1.38629436 -0.69314718 -1.38629436 -1.38629436 -1.38629436]P2Vector: [-1.79175947 -1.79175947 -1.79175947 -0.69314718 -1.09861229 -1.09861229]症状：打喷嚏职业：护士感冒的概率： -3.0602707946915624 过敏的概率： -3.871109078907 脑震荡的概率： -4.68213122712422最可能的疾病：感冒症状：打喷嚏职业：农夫感冒的概率： -3.7534179752515073 过敏的概率： -3.1780538303479453 脑震荡的概率： -4.68213122712422最可能的疾病：过敏症状：打喷嚏职业：建筑工人感冒的概率： -3.0602707946915624 过敏的概率： -3.871109078907 脑震荡的概率： -3.9889840465642745最可能的疾病：感冒症状：打喷嚏职业：教师感冒的概率： -3.0602707946915624 过敏的概率： -3.871109078907 脑震荡的概率： -3.9889840465642745最可能的疾病：感冒症状：头痛职业：护士感冒的概率： -3.4657359027997265 过敏的概率： -4.564348191467836 脑震荡的概率： -3.58351893845611最可能的疾病：感冒症状：头痛职业：农夫感冒的概率： -4.1588830833596715 过敏的概率： -3.871109078907 脑震荡的概率： -3.58351893845611最可能的疾病：脑震荡症状：头痛职业：建筑工人感冒的概率： -3.4657359027997265 过敏的概率： -4.564348191467836 脑震荡的概率： -2.8903717578961645最可能的疾病：脑震荡症状：头痛职业：教师感冒的概率： -3.4657359027997265 过敏的概率： -4.564348191467836 脑震荡的概率： -2.8903717578961645最可能的疾病：脑震荡Process finished with exit code 0

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。