|
72 | 72 | "We are intrested in finding the probability of a label given some observed features, $P(L~|~{\\rm features})$.\n", |
73 | 73 | "\n", |
74 | 74 | "Bayes's theorem:\n", |
75 | | - "$$P(L~|~{\\rm features}) = \\frac{P({\\rm features}~|~L)P(L)}{P({\\rm features})}$$\n", |
| 75 | + "$$P(L~|~{\\rm features}) = \\frac{P({\\rm features}~|~L)P(L)}{P({\\rm features})}=\\frac{P({\\rm features},~L)}{P({\\rm features})}$$\n", |
76 | 76 | "\n", |
77 | 77 | "$$\\mbox{posterior} = \\frac{\\mbox{likelihood}\\times \\mbox{prior}}{\\mbox{evidence}} \\$$\n", |
78 | 78 | "\n" |
|
149 | 149 | } |
150 | 150 | }, |
151 | 151 | "source": [ |
152 | | - "- **Discrimitave models** (判别式模型): \n", |
153 | | - " - 给定$x$,直接建模$P(L|x)$来预测$L$\n", |
154 | | - " - Decision Tree, SVM\n", |
| 152 | + "估计类条件概率(Likelihood)的常用策略:先假定其具有某种确定的概率分布形式,再基于训练样本对概率分布参数估计。\n", |
| 153 | + "\n", |
155 | 154 | "- **Generative models** (生成式模型): \n", |
156 | 155 | " - 先对联合概率分布$P(L, x)$建模, 再由此获得$P(L|x)$\n", |
157 | 156 | " - Naive Bayes\n", |
158 | | - " \n", |
159 | | - "估计类条件概率(Likelihood)的常用策略:先假定其具有某种确定的概率分布形式,再基于训练样本对概率分布参数估计。" |
| 157 | + "- **Discrimitave models** (判别式模型): \n", |
| 158 | + " - 给定$x$,直接建模$P(L|x)$来预测$L$\n", |
| 159 | + " - Decision Tree, SVM\n", |
| 160 | + " \n" |
160 | 161 | ] |
161 | 162 | }, |
162 | 163 | { |
|
448 | 449 | }, |
449 | 450 | { |
450 | 451 | "cell_type": "code", |
451 | | - "execution_count": 1, |
| 452 | + "execution_count": 3, |
452 | 453 | "metadata": { |
453 | 454 | "ExecuteTime": { |
454 | | - "end_time": "2020-06-07T08:35:58.024754Z", |
455 | | - "start_time": "2020-06-07T08:35:55.455356Z" |
| 455 | + "end_time": "2020-08-12T02:04:50.523048Z", |
| 456 | + "start_time": "2020-08-12T02:04:50.518097Z" |
456 | 457 | }, |
457 | 458 | "slideshow": { |
458 | 459 | "slide_type": "slide" |
|
463 | 464 | "%matplotlib inline\n", |
464 | 465 | "import numpy as np\n", |
465 | 466 | "import matplotlib.pyplot as plt\n", |
466 | | - "import seaborn as sns; sns.set()" |
| 467 | + "import seaborn as sns\n", |
| 468 | + "sns.set()" |
467 | 469 | ] |
468 | 470 | }, |
469 | 471 | { |
470 | 472 | "cell_type": "code", |
471 | | - "execution_count": 2, |
| 473 | + "execution_count": 5, |
472 | 474 | "metadata": { |
473 | 475 | "ExecuteTime": { |
474 | | - "end_time": "2020-06-07T08:36:02.397317Z", |
475 | | - "start_time": "2020-06-07T08:36:01.631419Z" |
| 476 | + "end_time": "2020-08-12T02:08:39.040229Z", |
| 477 | + "start_time": "2020-08-12T02:08:38.730153Z" |
476 | 478 | }, |
477 | 479 | "slideshow": { |
478 | 480 | "slide_type": "subslide" |
|
545 | 547 | }, |
546 | 548 | { |
547 | 549 | "cell_type": "code", |
548 | | - "execution_count": 3, |
| 550 | + "execution_count": 6, |
549 | 551 | "metadata": { |
550 | 552 | "ExecuteTime": { |
551 | | - "end_time": "2020-06-07T08:36:48.611732Z", |
552 | | - "start_time": "2020-06-07T08:36:48.599037Z" |
| 553 | + "end_time": "2020-08-12T02:08:43.850456Z", |
| 554 | + "start_time": "2020-08-12T02:08:43.845443Z" |
553 | 555 | }, |
554 | 556 | "slideshow": { |
555 | 557 | "slide_type": "subslide" |
|
575 | 577 | }, |
576 | 578 | { |
577 | 579 | "cell_type": "code", |
578 | | - "execution_count": 4, |
| 580 | + "execution_count": 7, |
579 | 581 | "metadata": { |
580 | 582 | "ExecuteTime": { |
581 | | - "end_time": "2020-06-07T08:36:57.076423Z", |
582 | | - "start_time": "2020-06-07T08:36:57.058964Z" |
| 583 | + "end_time": "2020-08-12T02:09:11.457280Z", |
| 584 | + "start_time": "2020-08-12T02:09:11.452597Z" |
583 | 585 | }, |
584 | 586 | "slideshow": { |
585 | 587 | "slide_type": "fragment" |
|
738 | 740 | }, |
739 | 741 | { |
740 | 742 | "cell_type": "code", |
741 | | - "execution_count": 6, |
| 743 | + "execution_count": 8, |
742 | 744 | "metadata": { |
743 | 745 | "ExecuteTime": { |
744 | | - "end_time": "2020-06-07T08:40:00.834720Z", |
745 | | - "start_time": "2020-06-07T08:40:00.308020Z" |
| 746 | + "end_time": "2020-08-12T02:12:08.328363Z", |
| 747 | + "start_time": "2020-08-12T02:12:08.048029Z" |
746 | 748 | }, |
747 | 749 | "slideshow": { |
748 | 750 | "slide_type": "subslide" |
749 | 751 | } |
750 | 752 | }, |
751 | 753 | "outputs": [ |
| 754 | + { |
| 755 | + "name": "stderr", |
| 756 | + "output_type": "stream", |
| 757 | + "text": [ |
| 758 | + "/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:144: FutureWarning: The sklearn.datasets.base module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.datasets. Anything that cannot be imported from sklearn.datasets is now part of the private API.\n", |
| 759 | + " warnings.warn(message, FutureWarning)\n" |
| 760 | + ] |
| 761 | + }, |
752 | 762 | { |
753 | 763 | "data": { |
754 | 764 | "text/plain": [ |
|
774 | 784 | " 'talk.religion.misc']" |
775 | 785 | ] |
776 | 786 | }, |
777 | | - "execution_count": 6, |
| 787 | + "execution_count": 8, |
778 | 788 | "metadata": {}, |
779 | 789 | "output_type": "execute_result" |
780 | 790 | } |
|
799 | 809 | }, |
800 | 810 | { |
801 | 811 | "cell_type": "code", |
802 | | - "execution_count": 7, |
| 812 | + "execution_count": 9, |
803 | 813 | "metadata": { |
804 | 814 | "ExecuteTime": { |
805 | | - "end_time": "2020-06-07T08:41:35.566570Z", |
806 | | - "start_time": "2020-06-07T08:41:34.611060Z" |
| 815 | + "end_time": "2020-08-12T02:13:31.352385Z", |
| 816 | + "start_time": "2020-08-12T02:13:30.846495Z" |
807 | 817 | }, |
808 | 818 | "slideshow": { |
809 | 819 | "slide_type": "fragment" |
|
885 | 895 | }, |
886 | 896 | { |
887 | 897 | "cell_type": "code", |
888 | | - "execution_count": 8, |
| 898 | + "execution_count": 10, |
889 | 899 | "metadata": { |
890 | 900 | "ExecuteTime": { |
891 | | - "end_time": "2020-06-07T08:43:21.096094Z", |
892 | | - "start_time": "2020-06-07T08:43:21.084334Z" |
| 901 | + "end_time": "2020-08-12T02:16:55.162506Z", |
| 902 | + "start_time": "2020-08-12T02:16:55.154107Z" |
893 | 903 | }, |
894 | 904 | "slideshow": { |
895 | 905 | "slide_type": "subslide" |
|
917 | 927 | }, |
918 | 928 | { |
919 | 929 | "cell_type": "code", |
920 | | - "execution_count": 9, |
| 930 | + "execution_count": 11, |
921 | 931 | "metadata": { |
922 | 932 | "ExecuteTime": { |
923 | | - "end_time": "2020-06-07T08:43:34.213419Z", |
924 | | - "start_time": "2020-06-07T08:43:32.353064Z" |
| 933 | + "end_time": "2020-08-12T02:17:10.509105Z", |
| 934 | + "start_time": "2020-08-12T02:17:09.528286Z" |
925 | 935 | }, |
926 | 936 | "slideshow": { |
927 | 937 | "slide_type": "fragment" |
|
1004 | 1014 | }, |
1005 | 1015 | { |
1006 | 1016 | "cell_type": "code", |
1007 | | - "execution_count": 10, |
| 1017 | + "execution_count": 12, |
1008 | 1018 | "metadata": { |
1009 | 1019 | "ExecuteTime": { |
1010 | | - "end_time": "2020-06-07T08:45:03.307208Z", |
1011 | | - "start_time": "2020-06-07T08:45:03.300694Z" |
| 1020 | + "end_time": "2020-08-12T02:19:27.699454Z", |
| 1021 | + "start_time": "2020-08-12T02:19:27.696304Z" |
1012 | 1022 | }, |
1013 | 1023 | "slideshow": { |
1014 | 1024 | "slide_type": "subslide" |
|
1035 | 1045 | }, |
1036 | 1046 | { |
1037 | 1047 | "cell_type": "code", |
1038 | | - "execution_count": 11, |
| 1048 | + "execution_count": 13, |
1039 | 1049 | "metadata": { |
1040 | 1050 | "ExecuteTime": { |
1041 | | - "end_time": "2020-06-07T08:45:27.750035Z", |
1042 | | - "start_time": "2020-06-07T08:45:27.737839Z" |
| 1051 | + "end_time": "2020-08-12T02:19:54.328917Z", |
| 1052 | + "start_time": "2020-08-12T02:19:54.323217Z" |
1043 | 1053 | }, |
1044 | 1054 | "slideshow": { |
1045 | 1055 | "slide_type": "fragment" |
|
1052 | 1062 | "'sci.space'" |
1053 | 1063 | ] |
1054 | 1064 | }, |
1055 | | - "execution_count": 11, |
| 1065 | + "execution_count": 13, |
1056 | 1066 | "metadata": {}, |
1057 | 1067 | "output_type": "execute_result" |
1058 | 1068 | } |
|
1063 | 1073 | }, |
1064 | 1074 | { |
1065 | 1075 | "cell_type": "code", |
1066 | | - "execution_count": 12, |
| 1076 | + "execution_count": 14, |
1067 | 1077 | "metadata": { |
1068 | 1078 | "ExecuteTime": { |
1069 | | - "end_time": "2020-06-07T08:45:52.957854Z", |
1070 | | - "start_time": "2020-06-07T08:45:52.933849Z" |
| 1079 | + "end_time": "2020-08-12T02:20:47.360059Z", |
| 1080 | + "start_time": "2020-08-12T02:20:47.354526Z" |
1071 | 1081 | }, |
1072 | 1082 | "slideshow": { |
1073 | 1083 | "slide_type": "fragment" |
|
1080 | 1090 | "'soc.religion.christian'" |
1081 | 1091 | ] |
1082 | 1092 | }, |
1083 | | - "execution_count": 12, |
| 1093 | + "execution_count": 14, |
1084 | 1094 | "metadata": {}, |
1085 | 1095 | "output_type": "execute_result" |
1086 | 1096 | } |
|
0 commit comments