diff --git a/.idea/hydra.xml b/.idea/hydra.xml
new file mode 100644
index 0000000..123e89c
--- /dev/null
+++ b/.idea/hydra.xml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..99ae653
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/sbt.xml b/.idea/sbt.xml
new file mode 100644
index 0000000..45cd6b3
--- /dev/null
+++ b/.idea/sbt.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Classification/DataMining_KNN/KNN.py b/Classification/DataMining_KNN/KNN.py
new file mode 100644
index 0000000..b9c17f7
--- /dev/null
+++ b/Classification/DataMining_KNN/KNN.py
@@ -0,0 +1,62 @@
+#!/usr/local/bin/python3
+
+'''
+ ********************************************
+ * Description :
+ * Date : 2018-10-12
+ * Author : liuyy
+ * E-mail : yyliu@dmo-sys.com
+ ********************************************
+'''
+import math
+
+def readfile(f, hasType = False):
+ res = []
+
+ with open(f) as fp:
+ line = fp.readline()
+
+ while line:
+ line = line.rstrip()
+ tmp = line.split()
+
+ if hasType:
+ res.append((tmp[0], [int(i) for i in tmp[1:]]))
+ else:
+ res.append([int(i) for i in tmp])
+
+ line = fp.readline()
+ return res
+
+trained=readfile("trainInput.txt", hasType = True)
+tested=readfile("testInput.txt")
+print(tested)
+print(trained)
+
+def classify(case, trained):
+ def dist(i, j):
+ s = 0
+ for v in zip(i, j):
+ s += pow(v[0] - v[1], 2)
+
+ return math.sqrt(s)
+
+ return [(i[0], dist(case, i[1])) for i in trained]
+
+def get_class(first_k):
+ res = {}
+ for i in first_k:
+ if i[0] not in res:
+ res[i[0]] = 0
+ res[i[0]] += 1
+
+ return sorted(res.items(), key = lambda x: x[1], reverse = True)[0]
+
+for case in tested:
+ ct = classify(case, trained)
+ print(ct)
+ first_k = sorted(ct, key = lambda i: i[1])[:3]
+ print(str(case) + " " + str(get_class(first_k)))
+
+
+
diff --git a/Classification/DataMining_KNN/KNN.scala b/Classification/DataMining_KNN/KNN.scala
new file mode 100644
index 0000000..e69de29
diff --git a/Classification/DataMining_NaiveBayes/NB.py b/Classification/DataMining_NaiveBayes/NB.py
new file mode 100644
index 0000000..912ea60
--- /dev/null
+++ b/Classification/DataMining_NaiveBayes/NB.py
@@ -0,0 +1,64 @@
+#!/usr/local/bin/python3
+
+'''
+ ********************************************
+ * Description :
+ * Date : 2018-10-15
+ * Author : liuyy
+ * E-mail : yyliu@dmo-sys.com
+ ********************************************
+'''
+
+def initprob(f):
+ pre_pro = []
+ classtypes = {}
+
+ with open(f) as fp:
+ line = fp.readline()
+ line = fp.readline()
+
+ while len(line) != 0:
+ tmp = line.rstrip().split()[1:]
+
+ classtype = tmp[-1]
+ if classtype not in classtypes:
+ classtypes[classtype] = 0
+ classtypes[classtype] += 1
+
+ tmp = tmp[:-1]
+ if len(pre_pro) == 0:
+ pre_pro = [{} for i in range(len(tmp))]
+
+ for i in range(len(tmp)):
+ attr = tmp[i]
+
+ k = (attr, classtype)
+ if k not in pre_pro[i]:
+ pre_pro[i][k] = 0
+ pre_pro[i][k] += 1
+
+ line = fp.readline()
+ print(classtypes)
+ res = [{k: v/classtypes[k[1]] for k, v in i.items()} for i in pre_pro]
+
+ return (classtypes, res)
+
+(cts, pre_prob) = initprob("input.txt")
+for i in pre_prob:
+ print(i)
+
+def get_test(to_test, cts, pre_prob):
+ def _get_test(tmp, pre_prob, ct):
+ m = 1.0
+ for i in range(len(tmp)):
+ m *= pre_prob[i].get((tmp[i], ct), 1.0)
+
+ return m
+
+ values = to_test.split()
+ re = [(_get_test(values, pre_prob, i), i) for i in cts.keys()]
+ return max(re, key = lambda x: x[0])[1]
+
+to_test = "Youth Medium Yes Fair"
+print(to_test, end = ": ")
+print(get_test(to_test, cts, pre_prob))
diff --git a/Classification/DataMining_NaiveBayes/input.txt b/Classification/DataMining_NaiveBayes/input.txt
index b5940a5..bc5286f 100644
--- a/Classification/DataMining_NaiveBayes/input.txt
+++ b/Classification/DataMining_NaiveBayes/input.txt
@@ -1,15 +1,15 @@
-Day OutLook Temperature Humidity Wind PlayTennis
-1 Sunny Hot High Weak No
-2 Sunny Hot High Strong No
-3 Overcast Hot High Weak Yes
-4 Rainy Mild High Weak Yes
-5 Rainy Cool Normal Weak Yes
-6 Rainy Cool Normal Strong No
-7 Overcast Cool Normal Strong Yes
-8 Sunny Mild High Weak No
-9 Sunny Cool Normal Weak Yes
-10 Rainy Mild Normal Weak Yes
-11 Sunny Mild Normal Strong Yes
-12 Overcast Mild High Strong Yes
-13 Overcast Hot Normal Weak Yes
-14 Rainy Mild High Strong No
\ No newline at end of file
+Rid Age Income Student CreditRating BuysComputer
+1 Youth High No Fair No
+2 Youth High No Excellent No
+3 MiddleAged High No Fair Yes
+4 Senior Medium No Fair Yes
+5 Senior Low Yes Fair Yes
+6 Senior Low Yes Excellent No
+7 MiddleAged Low Yes Excellent Yes
+8 Youth Medium No Fair No
+9 Youth Low Yes Fair Yes
+10 Senior Medium Yes Fair Yes
+11 Youth Medium Yes Excellent Yes
+12 MiddleAged Medium No Excellent Yes
+13 MiddleAged High Yes Fair Yes
+14 Senior Medium No Excellent No
diff --git a/Clustering/DataMining_KMeans/KMeans.py b/Clustering/DataMining_KMeans/KMeans.py
new file mode 100644
index 0000000..4ed3a39
--- /dev/null
+++ b/Clustering/DataMining_KMeans/KMeans.py
@@ -0,0 +1,61 @@
+#!/usr/local/bin/python3
+
+'''
+ ********************************************
+ * Description :
+ * Date : 2018-10-16
+ * Author : liuyy
+ * E-mail : yyliu@dmo-sys.com
+ ********************************************
+'''
+
+from functools import reduce
+
+def calc_center(l):
+ p = reduce(lambda x, y: (x[0] + y[0], x[1] + y[1]), l)
+ return (p[0] / len(l), p[1] / len(l))
+
+def dist(a, b):
+ return (a[0] - b[0]) * (a[0] - b[0]) + (a[1] - b[1]) * (a[1] - b[1])
+
+def iterate(ps, points):
+ res = [[i[0]] for i in ps]
+
+ for p in points:
+ min_dist = dist(p, res[0][0])
+ min_class = res[0]
+
+ for i in res:
+ d = dist(p, i[0])
+ if d < min_dist:
+ min_dist = d
+ min_class = i
+
+ min_class.append(p)
+
+ return res
+
+def init(f):
+ points = []
+
+ with open(f) as fp:
+ line = fp.readline()
+
+ while len(line) != 0:
+ (x, y) = line.rstrip().split()
+ points.append((int(x), int(y)))
+
+ line = fp.readline()
+
+ return points
+
+first_ps = [[(0, 0)], [(10, 10)]]
+points = init("input.txt")
+res = []
+
+for i in range(10):
+ res = iterate(first_ps, points)
+ first_ps = [[calc_center(i[1:])] for i in res]
+
+
+print(res)