From 9a241abd9e1416fd9a16361827e01d46f6f83d66 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 26 Mar 2015 09:17:19 +0800 Subject: [PATCH 01/58] =?UTF-8?q?GA=E9=81=97=E4=BC=A0=E7=AE=97=E6=B3=95?= =?UTF-8?q?=E7=9A=84=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GA遗传算法的实现 --- Others/DataMining_GA/Client.java | 19 ++ Others/DataMining_GA/GATool.java | 357 +++++++++++++++++++++++++++++++ 2 files changed, 376 insertions(+) create mode 100644 Others/DataMining_GA/Client.java create mode 100644 Others/DataMining_GA/GATool.java diff --git a/Others/DataMining_GA/Client.java b/Others/DataMining_GA/Client.java new file mode 100644 index 0000000..eff2dbc --- /dev/null +++ b/Others/DataMining_GA/Client.java @@ -0,0 +1,19 @@ +package GA; + +/** + * GeneticŴ㷨 + * @author lyq + * + */ +public class Client { + public static void main(String[] args){ + //Сֵֵ + int minNum = 1; + int maxNum = 7; + //ʼȺģ + int initSetsNum = 4; + + GATool tool = new GATool(minNum, maxNum, initSetsNum); + tool.geneticCal(); + } +} diff --git a/Others/DataMining_GA/GATool.java b/Others/DataMining_GA/GATool.java new file mode 100644 index 0000000..ca0121e --- /dev/null +++ b/Others/DataMining_GA/GATool.java @@ -0,0 +1,357 @@ +package GA; + +import java.util.ArrayList; +import java.util.Random; + +/** + * Ŵ㷨 + * + * @author lyq + * + */ +public class GATool { + // Сֵ + private int minNum; + // ֵ + private int maxNum; + // ıλ + private int codeNum; + // ʼȺ + private int initSetsNum; + // + private Random random; + // ʼȺ + private ArrayList initSets; + + public GATool(int minNum, int maxNum, int initSetsNum) { + this.minNum = minNum; + this.maxNum = maxNum; + this.initSetsNum = initSetsNum; + + this.random = new Random(); + produceInitSets(); + } + + /** + * ʼȺ + */ + private void produceInitSets() { + this.codeNum = 0; + int num = maxNum; + int[] array; + + initSets = new ArrayList<>(); + + // ȷλ + while (num != 0) { + codeNum++; + num /= 2; + } + + for (int i = 0; i < initSetsNum; i++) { + array = produceInitCode(); + initSets.add(array); + } + } + + /** + * ʼı + * + * @return + */ + private int[] produceInitCode() { + int num = 0; + int num2 = 0; + int[] tempArray; + int[] array1; + int[] array2; + + tempArray = new int[2 * codeNum]; + array1 = new int[codeNum]; + array2 = new int[codeNum]; + + num = 0; + while (num < minNum || num > maxNum) { + num = random.nextInt(maxNum) + 1; + } + numToBinaryArray(array1, num); + + while (num2 < minNum || num2 > maxNum) { + num2 = random.nextInt(maxNum) + 1; + } + numToBinaryArray(array2, num2); + + // ܵı + for (int i = 0, k = 0; i < tempArray.length; i++, k++) { + if (k < codeNum) { + tempArray[i] = array1[k]; + } else { + tempArray[i] = array2[k - codeNum]; + } + } + + return tempArray; + } + + /** + * ѡֵϸߵĸŴһ + * + * @param initCodes + * ʼ + * @return + */ + private ArrayList selectOperate(ArrayList initCodes) { + double randomNum = 0; + double sumAdaptiveValue = 0; + ArrayList resultCodes = new ArrayList<>(); + double[] adaptiveValue = new double[initSetsNum]; + + for (int i = 0; i < initSetsNum; i++) { + adaptiveValue[i] = calCodeAdaptiveValue(initCodes.get(i)); + sumAdaptiveValue += adaptiveValue[i]; + } + + // תɸʵʽһ + for (int i = 0; i < initSetsNum; i++) { + adaptiveValue[i] = adaptiveValue[i] / sumAdaptiveValue; + } + + for (int i = 0; i < initSetsNum; i++) { + randomNum = random.nextInt(100) + 1; + randomNum = randomNum / 100; + + sumAdaptiveValue = 0; + // ȷ + for (int j = 0; j < initSetsNum; j++) { + if (randomNum > sumAdaptiveValue + && randomNum <= sumAdaptiveValue + adaptiveValue[j]) { + //ÿķʽظ + resultCodes.add(initCodes.get(j).clone()); + break; + } else { + sumAdaptiveValue += adaptiveValue[j]; + } + } + } + + return resultCodes; + } + + /** + * + * + * @param selectedCodes + * ϲѡı + * @return + */ + private ArrayList crossOperate(ArrayList selectedCodes) { + int randomNum = 0; + // + int crossPoint = 0; + ArrayList resultCodes = new ArrayList<>(); + // У + ArrayList randomCodeSeqs = new ArrayList<>(); + + // + while (selectedCodes.size() > 0) { + randomNum = random.nextInt(selectedCodes.size()); + + randomCodeSeqs.add(selectedCodes.get(randomNum)); + selectedCodes.remove(randomNum); + } + + int temp = 0; + int[] array1; + int[] array2; + // + for (int i = 1; i < randomCodeSeqs.size(); i++) { + if (i % 2 == 1) { + array1 = randomCodeSeqs.get(i - 1); + array2 = randomCodeSeqs.get(i); + crossPoint = random.nextInt(2 * codeNum - 1) + 1; + + // нλúı + for (int j = 0; j < 2 * codeNum; j++) { + if (j >= crossPoint) { + temp = array1[j]; + array1[j] = array2[j]; + array2[j] = temp; + } + } + + // 뵽 + resultCodes.add(array1); + resultCodes.add(array2); + } + } + + return resultCodes; + } + + /** + * + * + * @param crossCodes + * Ľ + * @return + */ + private ArrayList variationOperate(ArrayList crossCodes) { + // + int variationPoint = 0; + ArrayList resultCodes = new ArrayList<>(); + + for (int[] array : crossCodes) { + variationPoint = random.nextInt(codeNum * 2); + + for (int i = 0; i < array.length; i++) { + // б + if (i == variationPoint) { + array[i] = (array[i] == 0 ? 1 : 0); + break; + } + } + + resultCodes.add(array); + } + + return resultCodes; + } + + /** + * תΪʽ + * + * @param binaryArray + * תĶʽ + * @param num + * ת + */ + private void numToBinaryArray(int[] binaryArray, int num) { + int index = 0; + int temp = 0; + while (num != 0) { + binaryArray[index] = num % 2; + index++; + num /= 2; + } + + //ǰβĵ + for(int i=0; i=0 ; i--, k++) { + if (binaryArray[i] == 1) { + result += Math.pow(2, k); + } + } + + return result; + } + + /** + * ֵ + * + * @param codeArray + */ + private int calCodeAdaptiveValue(int[] codeArray) { + int result = 0; + int x1 = 0; + int x2 = 0; + int[] array1 = new int[codeNum]; + int[] array2 = new int[codeNum]; + + for (int i = 0, k = 0; i < codeArray.length; i++, k++) { + if (k < codeNum) { + array1[k] = codeArray[i]; + } else { + array2[k - codeNum] = codeArray[i]; + } + } + + // ֵĵ + x1 = binaryArrayToNum(array1); + x2 = binaryArrayToNum(array2); + result = x1 * x1 + x2 * x2; + + return result; + } + + /** + * Ŵ㷨 + */ + public void geneticCal() { + // ֵ + int maxFitness; + //Ŵ + int loopCount = 0; + boolean canExit = false; + ArrayList initCodes; + ArrayList selectedCodes; + ArrayList crossedCodes; + ArrayList variationCodes; + + int[] maxCode = new int[2*codeNum]; + //ֵ + for(int i=0; i<2*codeNum; i++){ + maxCode[i] = 1; + } + maxFitness = calCodeAdaptiveValue(maxCode); + + initCodes = initSets; + while (true) { + for (int[] array : initCodes) { + // ŴֹΪڱﵽֵ + if (maxFitness == calCodeAdaptiveValue(array)) { + canExit = true; + break; + } + } + + if (canExit) { + break; + } + + selectedCodes = selectOperate(initCodes); + crossedCodes = crossOperate(selectedCodes); + variationCodes = variationOperate(crossedCodes); + initCodes = variationCodes; + + loopCount++; + } + + System.out.println("ܹŴ" + loopCount +"" ); + printFinalCodes(initCodes); + } + + /** + * ı뼯 + * + * @param finalCodes + * Ľ + */ + private void printFinalCodes(ArrayList finalCodes) { + int j = 0; + + for (int[] array : finalCodes) { + System.out.print("" + (j + 1) + ":"); + for (int i = 0; i < array.length; i++) { + System.out.print(array[i]); + } + System.out.println(); + j++; + } + } + +} From 5afe53a2d9097a349699f489a53addf8e49e851d Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 26 Mar 2015 09:18:49 +0800 Subject: [PATCH 02/58] =?UTF-8?q?dbscan=E5=9F=BA=E4=BA=8E=E5=AF=86?= =?UTF-8?q?=E5=BA=A6=E7=9A=84=E8=81=9A=E7=B1=BB=E7=AE=97=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dbscan基于密度的聚类算法 --- Others/DataMining_DBSCAN/Client.java | 19 +++ Others/DataMining_DBSCAN/DBSCANTool.java | 209 +++++++++++++++++++++++ Others/DataMining_DBSCAN/Point.java | 56 ++++++ Others/DataMining_DBSCAN/input.txt | 19 +++ 4 files changed, 303 insertions(+) create mode 100644 Others/DataMining_DBSCAN/Client.java create mode 100644 Others/DataMining_DBSCAN/DBSCANTool.java create mode 100644 Others/DataMining_DBSCAN/Point.java create mode 100644 Others/DataMining_DBSCAN/input.txt diff --git a/Others/DataMining_DBSCAN/Client.java b/Others/DataMining_DBSCAN/Client.java new file mode 100644 index 0000000..f3d810c --- /dev/null +++ b/Others/DataMining_DBSCAN/Client.java @@ -0,0 +1,19 @@ +package DataMining_DBSCAN; + +/** + * Dbscanܶȵľ㷨 + * @author lyq + * + */ +public class Client { + public static void main(String[] args){ + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + //ɨ뾶 + double eps = 3; + //Сֵ + int minPts = 3; + + DBSCANTool tool = new DBSCANTool(filePath, eps, minPts); + tool.dbScanCluster(); + } +} diff --git a/Others/DataMining_DBSCAN/DBSCANTool.java b/Others/DataMining_DBSCAN/DBSCANTool.java new file mode 100644 index 0000000..27f2f8e --- /dev/null +++ b/Others/DataMining_DBSCAN/DBSCANTool.java @@ -0,0 +1,209 @@ +package DataMining_DBSCAN; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.MessageFormat; +import java.util.ArrayList; + +/** + * DBSCANܶȾ㷨 + * + * @author lyq + * + */ +public class DBSCANTool { + // ļַ + private String filePath; + // ɨ뾶 + private double eps; + // Сֵ + private int minPts; + // е + private ArrayList totalPoints; + // ۴ؽ + private ArrayList> resultClusters; + // + private ArrayList noisePoint; + + public DBSCANTool(String filePath, double eps, int minPts) { + this.filePath = filePath; + this.eps = eps; + this.minPts = minPts; + readDataFile(); + } + + /** + * ļжȡ + */ + public void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + Point p; + totalPoints = new ArrayList<>(); + for (String[] array : dataArray) { + p = new Point(array[0], array[1]); + totalPoints.add(p); + } + } + + /** + * ݹѰҾ۴ + * + * @param pointList + * ǰĵб + * @param parentCluster + * ۴ + */ + private void recursiveCluster(Point point, ArrayList parentCluster) { + double distance = 0; + ArrayList cluster; + + // Ѿʹˣ + if (point.isVisited) { + return; + } + + point.isVisited = true; + cluster = new ArrayList<>(); + for (Point p2 : totalPoints) { + // ˵ + if (point.isTheSame(p2)) { + continue; + } + + distance = point.ouDistance(p2); + if (distance <= eps) { + // Сڸİ뾶 + cluster.add(p2); + } + } + + if (cluster.size() >= minPts) { + // ԼҲ뵽۴ + cluster.add(point); + // Ľڵֵ뵽۴,ͬʱȥظĵ + addCluster(parentCluster, cluster); + + for (Point p : cluster) { + recursiveCluster(p, parentCluster); + } + } + } + + /** + * ۴Ӿֲ + * + * @param parentCluster + * ԭʼ۴ + * @param cluster + * ϲľ۴ + */ + private void addCluster(ArrayList parentCluster, + ArrayList cluster) { + boolean isCotained = false; + ArrayList addPoints = new ArrayList<>(); + + for (Point p : cluster) { + isCotained = false; + for (Point p2 : parentCluster) { + if (p.isTheSame(p2)) { + isCotained = true; + break; + } + } + + if (!isCotained) { + addPoints.add(p); + } + } + + parentCluster.addAll(addPoints); + } + + /** + * dbScan㷨ܶȵľ + */ + public void dbScanCluster() { + ArrayList cluster = null; + resultClusters = new ArrayList<>(); + noisePoint = new ArrayList<>(); + + for (Point p : totalPoints) { + if(p.isVisited){ + continue; + } + + cluster = new ArrayList<>(); + recursiveCluster(p, cluster); + + if (cluster.size() > 0) { + resultClusters.add(cluster); + }else{ + noisePoint.add(p); + } + } + removeFalseNoise(); + + printClusters(); + } + + /** + * Ƴ + */ + private void removeFalseNoise(){ + ArrayList totalCluster = new ArrayList<>(); + ArrayList deletePoints = new ArrayList<>(); + + //۴غϲ + for(ArrayList list: resultClusters){ + totalCluster.addAll(list); + } + + for(Point p: noisePoint){ + for(Point p2: totalCluster){ + if(p2.isTheSame(p)){ + deletePoints.add(p); + } + } + } + + noisePoint.removeAll(deletePoints); + } + + /** + * + */ + private void printClusters() { + int i = 1; + for (ArrayList pList : resultClusters) { + System.out.print("۴" + (i++) + ":"); + for (Point p : pList) { + System.out.print(MessageFormat.format("({0},{1}) ", p.x, p.y)); + } + System.out.println(); + } + + System.out.println(); + System.out.print(":"); + for (Point p : noisePoint) { + System.out.print(MessageFormat.format("({0},{1}) ", p.x, p.y)); + } + System.out.println(); + } +} diff --git a/Others/DataMining_DBSCAN/Point.java b/Others/DataMining_DBSCAN/Point.java new file mode 100644 index 0000000..f773bad --- /dev/null +++ b/Others/DataMining_DBSCAN/Point.java @@ -0,0 +1,56 @@ +package DataMining_DBSCAN; + +/** + * + * + * @author lyq + * + */ +public class Point { + // + int x; + // + int y; + // ˽ڵǷѾʹ + boolean isVisited; + + public Point(String x, String y) { + this.x = (Integer.parseInt(x)); + this.y = (Integer.parseInt(y)); + this.isVisited = false; + } + + /** + * 㵱ǰƶ֮ŷʽ + * + * @param p + * p + * @return + */ + public double ouDistance(Point p) { + double distance = 0; + + distance = (this.x - p.x) * (this.x - p.x) + (this.y - p.y) + * (this.y - p.y); + distance = Math.sqrt(distance); + + return distance; + } + + /** + * ж2ǷΪø + * + * @param p + * Ƚ + * @return + */ + public boolean isTheSame(Point p) { + boolean isSamed = false; + + if (this.x == p.x && this.y == p.y) { + isSamed = true; + } + + return isSamed; + } +} diff --git a/Others/DataMining_DBSCAN/input.txt b/Others/DataMining_DBSCAN/input.txt new file mode 100644 index 0000000..5bd1c13 --- /dev/null +++ b/Others/DataMining_DBSCAN/input.txt @@ -0,0 +1,19 @@ +2 2 +3 1 +3 4 +3 14 +5 3 +8 3 +8 6 +9 8 +10 4 +10 7 +10 10 +10 14 +11 13 +12 8 +12 15 +14 7 +14 9 +14 15 +15 8 \ No newline at end of file From 1e9d488cde99f6c6ef887ded15709c9ecbab15e3 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 26 Mar 2015 09:19:57 +0800 Subject: [PATCH 03/58] =?UTF-8?q?=E5=9F=BA=E4=BA=8E=E8=BF=9E=E9=80=9A?= =?UTF-8?q?=E5=9B=BE=E7=9A=84=E5=88=86=E8=A3=82=E8=81=9A=E7=B1=BB=E7=AE=97?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 基于连通图的分裂聚类算法 --- Others/DataMining_CABDDCC/CABDDCCTool.java | 102 ++++++++ Others/DataMining_CABDDCC/Client.java | 17 ++ Others/DataMining_CABDDCC/Graph.java | 287 +++++++++++++++++++++ Others/DataMining_CABDDCC/Point.java | 69 +++++ Others/DataMining_CABDDCC/graphData.txt | 15 ++ 5 files changed, 490 insertions(+) create mode 100644 Others/DataMining_CABDDCC/CABDDCCTool.java create mode 100644 Others/DataMining_CABDDCC/Client.java create mode 100644 Others/DataMining_CABDDCC/Graph.java create mode 100644 Others/DataMining_CABDDCC/Point.java create mode 100644 Others/DataMining_CABDDCC/graphData.txt diff --git a/Others/DataMining_CABDDCC/CABDDCCTool.java b/Others/DataMining_CABDDCC/CABDDCCTool.java new file mode 100644 index 0000000..34081b4 --- /dev/null +++ b/Others/DataMining_CABDDCC/CABDDCCTool.java @@ -0,0 +1,102 @@ +package DataMining_CABDDCC; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.MessageFormat; +import java.util.ArrayList; + +/** + * ͨͼķѾ㷨 + * + * @author lyq + * + */ +public class CABDDCCTool { + // ݵ + private String filePath; + // ͨͼֵl + private int length; + // ԭʼ + public static ArrayList totalPoints; + // 㼯 + private ArrayList> resultClusters; + // ͨͼ + private Graph graph; + + public CABDDCCTool(String filePath, int length) { + this.filePath = filePath; + this.length = length; + + readDataFile(); + } + + /** + * ļжȡ + */ + public void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + Point p; + totalPoints = new ArrayList<>(); + for (String[] array : dataArray) { + p = new Point(array[0], array[1], array[2]); + totalPoints.add(p); + } + + // ñߺ͵㹹ͼ + graph = new Graph(null, totalPoints); + } + + /** + * ͨͼõ + */ + public void splitCluster() { + // ȡγͨͼ + ArrayList subGraphs; + ArrayList> pointList; + resultClusters = new ArrayList<>(); + + subGraphs = graph.splitGraphByLength(length); + + for (Graph g : subGraphs) { + // ȡÿͨͼѺľ + pointList = g.getClusterByDivding(); + resultClusters.addAll(pointList); + } + + printResultCluster(); + } + + /** + * ۴ + */ + private void printResultCluster() { + int i = 1; + for (ArrayList cluster : resultClusters) { + System.out.print("۴" + i + ":"); + for (Point p : cluster){ + System.out.print(MessageFormat.format("({0}, {1}) ", p.x, p.y)); + } + System.out.println(); + i++; + } + + } + +} diff --git a/Others/DataMining_CABDDCC/Client.java b/Others/DataMining_CABDDCC/Client.java new file mode 100644 index 0000000..c57e3f5 --- /dev/null +++ b/Others/DataMining_CABDDCC/Client.java @@ -0,0 +1,17 @@ +package DataMining_CABDDCC; + +/** + * ͨͼķѾ㷨 + * @author lyq + * + */ +public class Client { + public static void main(String[] agrs){ + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\graphData.txt"; + //ֵͨ + int length = 3; + + CABDDCCTool tool = new CABDDCCTool(filePath, length); + tool.splitCluster(); + } +} diff --git a/Others/DataMining_CABDDCC/Graph.java b/Others/DataMining_CABDDCC/Graph.java new file mode 100644 index 0000000..b59d06e --- /dev/null +++ b/Others/DataMining_CABDDCC/Graph.java @@ -0,0 +1,287 @@ +package DataMining_CABDDCC; + +import java.util.ArrayList; +import java.util.Collections; + +/** + * ͨͼ + * + * @author lyq + * + */ +public class Graph { + // ֮ԣΪid + int[][] edges; + // ͨͼڵ + ArrayList points; + // ͼ·ָľͼ + ArrayList> clusters; + + public Graph(int[][] edges) { + this.edges = edges; + this.points = getPointByEdges(edges); + } + + public Graph(int[][] edges, ArrayList points) { + this.edges = edges; + this.points = points; + } + + public int[][] getEdges() { + return edges; + } + + public void setEdges(int[][] edges) { + this.edges = edges; + } + + public ArrayList getPoints() { + return points; + } + + public void setPoints(ArrayList points) { + this.points = points; + } + + /** + * ݾֵͨͼĻ,ͨͼ + * + * @param length + * ֵ + * @return + */ + public ArrayList splitGraphByLength(int length) { + int[][] edges; + Graph tempGraph; + ArrayList graphs = new ArrayList<>(); + + for (Point p : points) { + if (!p.isVisited) { + // е±Ϊid + edges = new int[points.size()][points.size()]; + dfsExpand(p, length, edges); + + tempGraph = new Graph(edges); + graphs.add(tempGraph); + } else { + continue; + } + } + + return graphs; + } + + /** + * ȷʽչͨͼ + * + * @param points + * Ҫѵ + * @param length + * ֵ + * @param edges + * + */ + private void dfsExpand(Point point, int length, int edges[][]) { + int id1 = 0; + int id2 = 0; + double distance = 0; + ArrayList tempPoints; + + // ˣ + if (point.isVisited) { + return; + } + + id1 = point.id; + point.isVisited = true; + tempPoints = new ArrayList<>(); + for (Point p2 : points) { + id2 = p2.id; + + if (id1 == id2) { + continue; + } else { + distance = point.ouDistance(p2); + if (distance <= length) { + edges[id1][id2] = 1; + edges[id2][id1] = 1; + + tempPoints.add(p2); + } + } + } + + // ݹ + for (Point p : tempPoints) { + dfsExpand(p, length, edges); + } + } + + /** + * жͨͼǷҪٱ + * + * @param pointList1 + * 㼯1 + * @param pointList2 + * 㼯2 + * @return + */ + private boolean needDivided(ArrayList pointList1, + ArrayList pointList2) { + boolean needDivided = false; + // ϵt=ļϵ/2ӵı + double t = 0; + // ֵƽÿҪܵ + double landa = 0; + int pointNum1 = pointList1.size(); + int pointNum2 = pointList2.size(); + // ܱ + int totalEdgeNum = 0; + // 2ֵı + int connectedEdgeNum = 0; + ArrayList totalPoints = new ArrayList<>(); + + totalPoints.addAll(pointList1); + totalPoints.addAll(pointList2); + int id1 = 0; + int id2 = 0; + for (Point p1 : totalPoints) { + id1 = p1.id; + for (Point p2 : totalPoints) { + id2 = p2.id; + + if (edges[id1][id2] == 1 && id1 < id2) { + if ((pointList1.contains(p1) && pointList2.contains(p2)) + || (pointList1.contains(p2) && pointList2 + .contains(p1))) { + connectedEdgeNum++; + } + totalEdgeNum++; + } + } + } + + if (pointNum1 < pointNum2) { + // ϵt=ļϵ/2ֵı + t = 1.0 * pointNum1 / connectedEdgeNum; + } else { + t = 1.0 * pointNum2 / connectedEdgeNum; + } + + // ֵ,Ϊܱ/ܵƽÿܵĵ + landa = 0.5 * Math.exp((1.0 * totalEdgeNum / (pointNum1 + pointNum2))); + + // ϵСڷֵҪ + if (t >= landa) { + needDivided = true; + } + + return needDivided; + } + + /** + * ݹĻͨͼ + * + * @param pointList + * ֵͨͼ + */ + public void divideGraph(ArrayList pointList) { + // жϴ㼯Ƿָܹ + boolean canDivide = false; + ArrayList> pointGroup; + ArrayList pointList1 = new ArrayList<>(); + ArrayList pointList2 = new ArrayList<>(); + + for (int m = 2; m <= pointList.size() / 2; m++) { + // ķָ + pointGroup = removePoint(pointList, m); + pointList1 = pointGroup.get(0); + pointList2 = pointGroup.get(1); + + // жǷ + if (needDivided(pointList1, pointList2)) { + canDivide = true; + divideGraph(pointList1); + divideGraph(pointList2); + } + } + + // еķָ϶޷ָ˵Ѿһ + if (!canDivide) { + clusters.add(pointList); + } + } + + /** + * ȡѵõľ + * + * @return + */ + public ArrayList> getClusterByDivding() { + clusters = new ArrayList<>(); + + divideGraph(points); + + return clusters; + } + + /** + * ǰ㼯ƳremoveNum㣬2㼯 + * + * @param pointList + * ԭϵ + * @param removeNum + * Ƴ + */ + private ArrayList> removePoint(ArrayList pointList, + int removeNum) { + //dzһԭ + ArrayList copyPointList = (ArrayList) pointList.clone(); + ArrayList> pointGroup = new ArrayList<>(); + ArrayList pointList2 = new ArrayList<>(); + // аС + Collections.sort(copyPointList); + + for (int i = 0; i < removeNum; i++) { + pointList2.add(copyPointList.get(i)); + } + copyPointList.removeAll(pointList2); + + pointGroup.add(copyPointList); + pointGroup.add(pointList2); + + return pointGroup; + } + + /** + * ݱߵȡеĵ + * + * @param edges + * ǰ֪ıߵ + * @return + */ + private ArrayList getPointByEdges(int[][] edges) { + Point p1; + Point p2; + ArrayList pointList = new ArrayList<>(); + + for (int i = 0; i < edges.length; i++) { + for (int j = 0; j < edges[0].length; j++) { + if (edges[i][j] == 1) { + p1 = CABDDCCTool.totalPoints.get(i); + p2 = CABDDCCTool.totalPoints.get(j); + + if (!pointList.contains(p1)) { + pointList.add(p1); + } + + if (!pointList.contains(p2)) { + pointList.add(p2); + } + } + } + } + + return pointList; + } +} diff --git a/Others/DataMining_CABDDCC/Point.java b/Others/DataMining_CABDDCC/Point.java new file mode 100644 index 0000000..2763be4 --- /dev/null +++ b/Others/DataMining_CABDDCC/Point.java @@ -0,0 +1,69 @@ +package DataMining_CABDDCC; + + + +/** + * + * @author lyq + * + */ +public class Point implements Comparable{ + //id,idΨһ + int id; + // + Integer x; + // + Integer y; + //ǷѾ()ͨͼʱõ + boolean isVisited; + + public Point(String id, String x, String y){ + this.id = Integer.parseInt(id); + this.x = Integer.parseInt(x); + this.y = Integer.parseInt(y); + } + + /** + * 㵱ǰƶ֮ŷʽ + * + * @param p + * p + * @return + */ + public double ouDistance(Point p) { + double distance = 0; + + distance = (this.x - p.x) * (this.x - p.x) + (this.y - p.y) + * (this.y - p.y); + distance = Math.sqrt(distance); + + return distance; + } + + /** + * ж2ǷΪø + * + * @param p + * Ƚ + * @return + */ + public boolean isTheSame(Point p) { + boolean isSamed = false; + + if (this.x == p.x && this.y == p.y) { + isSamed = true; + } + + return isSamed; + } + + @Override + public int compareTo(Point p) { + if(this.x.compareTo(p.x) != 0){ + return this.x.compareTo(p.x); + }else{ + //xȵ±Ƚy + return this.y.compareTo(p.y); + } + } +} diff --git a/Others/DataMining_CABDDCC/graphData.txt b/Others/DataMining_CABDDCC/graphData.txt new file mode 100644 index 0000000..9a04431 --- /dev/null +++ b/Others/DataMining_CABDDCC/graphData.txt @@ -0,0 +1,15 @@ +0 1 12 +1 3 9 +2 3 12 +3 4 10 +4 4 4 +5 4 1 +6 6 1 +7 6 3 +8 6 9 +9 8 3 +10 8 10 +11 9 2 +12 9 11 +13 10 9 +14 11 12 \ No newline at end of file From c48861f3e88e10a51d4863861113a73335fbb8ca Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 26 Mar 2015 09:21:15 +0800 Subject: [PATCH 04/58] =?UTF-8?q?Chameleon=E4=B8=A4=E9=98=B6=E6=AE=B5?= =?UTF-8?q?=E5=90=88=E5=B9=B6=E8=81=9A=E7=B1=BB=E7=AE=97=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chameleon两阶段合并聚类算法 --- .../DataMining_Chameleon/ChameleonTool.java | 423 ++++++++++++++++++ Others/DataMining_Chameleon/Client.java | 19 + Others/DataMining_Chameleon/Cluster.java | 119 +++++ Others/DataMining_Chameleon/Point.java | 59 +++ Others/DataMining_Chameleon/graphData.txt | 19 + 5 files changed, 639 insertions(+) create mode 100644 Others/DataMining_Chameleon/ChameleonTool.java create mode 100644 Others/DataMining_Chameleon/Client.java create mode 100644 Others/DataMining_Chameleon/Cluster.java create mode 100644 Others/DataMining_Chameleon/Point.java create mode 100644 Others/DataMining_Chameleon/graphData.txt diff --git a/Others/DataMining_Chameleon/ChameleonTool.java b/Others/DataMining_Chameleon/ChameleonTool.java new file mode 100644 index 0000000..811ea3d --- /dev/null +++ b/Others/DataMining_Chameleon/ChameleonTool.java @@ -0,0 +1,423 @@ +package DataMining_Chameleon; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.MessageFormat; +import java.util.ArrayList; + +/** + * Chameleon ׶ξ㷨 + * + * @author lyq + * + */ +public class ChameleonTool { + // ݵļַ + private String filePath; + // һ׶εkڵkС + private int k; + // ضֵ + private double minMetric; + // ܵĸ + private int pointNum; + // ܵӾ,űʾid + public static int[][] edges; + // ֮ıߵȨ + public static double[][] weights; + // ԭʼ + private ArrayList totalPoints; + // һ׶βеͨͼΪʼľ + private ArrayList initClusters; + // ؽ + private ArrayList resultClusters; + + public ChameleonTool(String filePath, int k, double minMetric) { + this.filePath = filePath; + this.k = k; + this.minMetric = minMetric; + + readDataFile(); + } + + /** + * ļжȡ + */ + private void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + Point p; + totalPoints = new ArrayList<>(); + for (String[] array : dataArray) { + p = new Point(array[0], array[1], array[2]); + totalPoints.add(p); + } + pointNum = totalPoints.size(); + } + + /** + * ݹĺϲС۴ + */ + private void combineSubClusters() { + Cluster cluster = null; + + resultClusters = new ArrayList<>(); + + // ľ۴ֻʣһʱ˳ѭ + while (initClusters.size() > 1) { + cluster = initClusters.get(0); + combineAndRemove(cluster, initClusters); + } + } + + /** + * ݹĺϲ۴غƳ۴ + * + * @param clusterList + */ + private ArrayList combineAndRemove(Cluster cluster, + ArrayList clusterList) { + ArrayList remainClusters; + double metric = 0; + double maxMetric = -Integer.MAX_VALUE; + Cluster cluster1 = null; + Cluster cluster2 = null; + + for (Cluster c2 : clusterList) { + if(cluster.id == c2.id){ + continue; + } + + metric = calMetricfunction(cluster, c2, 1); + + if (metric > maxMetric) { + maxMetric = metric; + cluster1 = cluster; + cluster2 = c2; + } + } + + // ֵֵкϲ,ѰԺϲĴ + if (maxMetric > minMetric) { + clusterList.remove(cluster2); + //߽ + connectClusterToCluster(cluster1, cluster2); + // 1ʹ2ϲ + cluster1.points.addAll(cluster2.points); + remainClusters = combineAndRemove(cluster1, clusterList); + } else { + clusterList.remove(cluster); + remainClusters = clusterList; + resultClusters.add(cluster); + } + + return remainClusters; + } + + /** + * 2ؽбߵ + * @param c1 + * ۴1 + * @param c2 + * ۴2 + */ + private void connectClusterToCluster(Cluster c1, Cluster c2){ + ArrayList connectedEdges; + + connectedEdges = c1.calNearestEdge(c2, 2); + + for(int[] array: connectedEdges){ + edges[array[0]][array[1]] = 1; + edges[array[1]][array[0]] = 1; + } + } + + /** + * 㷨һ׶γɾֲͨͼ + */ + private void connectedGraph() { + double distance = 0; + Point p1; + Point p2; + + // ʼȨؾӾ + weights = new double[pointNum][pointNum]; + edges = new int[pointNum][pointNum]; + for (int i = 0; i < pointNum; i++) { + for (int j = 0; j < pointNum; j++) { + p1 = totalPoints.get(i); + p2 = totalPoints.get(j); + + distance = p1.ouDistance(p2); + if (distance == 0) { + // ΪĻȨΪ0 + weights[i][j] = 0; + } else { + // ߵȨزõֵΪĵ,ԽȨԽ + weights[i][j] = 1.0 / distance; + } + } + } + + double[] tempWeight; + int[] ids; + int id1 = 0; + int id2 = 0; + // ÿid㣬ȡȨǰkĵ + for (int i = 0; i < pointNum; i++) { + tempWeight = weights[i]; + // + ids = sortWeightArray(tempWeight); + + // ȡǰkȨı߽ + for (int j = 0; j < ids.length; j++) { + if (j < k) { + id1 = i; + id2 = ids[j]; + + edges[id1][id2] = 1; + edges[id2][id1] = 1; + } + } + } + } + + /** + * Ȩصð㷨 + * + * @param array + * + */ + private int[] sortWeightArray(double[] array) { + double[] copyArray = array.clone(); + int[] ids = null; + int k = 0; + double maxWeight = -1; + + ids = new int[pointNum]; + for(int i=0; i maxWeight){ + maxWeight = copyArray[j]; + k = j; + } + } + + ids[i] = k; + //ǰҵֵΪ-1Ѿҵ + copyArray[k] = -1; + } + + return ids; + } + + /** + * ݱߵͨȥеС۴ + */ + private void searchSmallCluster() { + int currentId = 0; + Point p; + Cluster cluster; + initClusters = new ArrayList<>(); + ArrayList pointList = null; + + // idķʽȥdfs + for (int i = 0; i < pointNum; i++) { + p = totalPoints.get(i); + + if (p.isVisited) { + continue; + } + + pointList = new ArrayList<>(); + pointList.add(p); + recusiveDfsSearch(p, -1, pointList); + + cluster = new Cluster(currentId, pointList); + initClusters.add(cluster); + + currentId++; + } + } + + /** + * ȵķʽҵŵ + * + * @param p + * ǰ + * @param lastId + * ˵ĸ + * @param pList + * б + */ + private void recusiveDfsSearch(Point p, int parentId, ArrayList pList) { + int id1 = 0; + int id2 = 0; + Point newPoint; + + if (p.isVisited) { + return; + } + + p.isVisited = true; + for (int j = 0; j < pointNum; j++) { + id1 = p.id; + id2 = j; + + if (edges[id1][id2] == 1 && id2 != parentId) { + newPoint = totalPoints.get(j); + pList.add(newPoint); + // Դ˵Ϊ㣬ݹ + recusiveDfsSearch(newPoint, id1, pList); + } + } + } + + /** + * 2صıߵȨ + * + * @param c1 + * ۴1 + * @param c2 + * ۴2 + * @return + */ + private double calEC(Cluster c1, Cluster c2) { + double resultEC = 0; + ArrayList connectedEdges = null; + + connectedEdges = c1.calNearestEdge(c2, 2); + + // 2ֵıߵȨغ + for (int[] array : connectedEdges) { + resultEC += weights[array[0]][array[1]]; + } + + return resultEC; + } + + /** + * 2صԻ + * + * @param c1 + * @param c2 + * @return + */ + private double calRI(Cluster c1, Cluster c2) { + double RI = 0; + double EC1 = 0; + double EC2 = 0; + double EC1To2 = 0; + + EC1 = c1.calEC(); + EC2 = c2.calEC(); + EC1To2 = calEC(c1, c2); + + RI = 2 * EC1To2 / (EC1 + EC2); + + return RI; + } + + /** + * صԽƶ + * + * @param c1 + * 1 + * @param c2 + * 2 + * @return + */ + private double calRC(Cluster c1, Cluster c2) { + double RC = 0; + double EC1 = 0; + double EC2 = 0; + double EC1To2 = 0; + int pNum1 = c1.points.size(); + int pNum2 = c2.points.size(); + + EC1 = c1.calEC(); + EC2 = c2.calEC(); + EC1To2 = calEC(c1, c2); + + RC = EC1To2 * (pNum1 + pNum2) / (pNum2 * EC1 + pNum1 * EC2); + + return RC; + } + + /** + * ֵ + * + * @param c1 + * 1 + * @param c2 + * 2 + * @param alpha + * ݵIJֵ + * @return + */ + private double calMetricfunction(Cluster c1, Cluster c2, int alpha) { + // ֵ + double metricValue = 0; + double RI = 0; + double RC = 0; + + RI = calRI(c1, c2); + RC = calRC(c1, c2); + // alpha1Խԣalphaң1עԻ + metricValue = RI * Math.pow(RC, alpha); + + return metricValue; + } + + /** + * ۴ + * @param clusterList + * ۴ + */ + private void printClusters(ArrayList clusterList) { + int i = 1; + + for (Cluster cluster : clusterList) { + System.out.print("۴" + i + ":"); + for (Point p : cluster.points) { + System.out.print(MessageFormat.format("({0}, {1}) ", p.x, p.y)); + } + System.out.println(); + i++; + } + + } + + /** + * ۴ + */ + public void buildCluster() { + // һ׶γС۴ + connectedGraph(); + searchSmallCluster(); + System.out.println("һ׶γɵСؼϣ"); + printClusters(initClusters); + + // ڶ׶θRIRCֵϲС۴γս۴ + combineSubClusters(); + System.out.println("յľ۴ؼϣ"); + printClusters(resultClusters); + } +} diff --git a/Others/DataMining_Chameleon/Client.java b/Others/DataMining_Chameleon/Client.java new file mode 100644 index 0000000..254f760 --- /dev/null +++ b/Others/DataMining_Chameleon/Client.java @@ -0,0 +1,19 @@ +package DataMining_Chameleon; + +/** + * Chameleon(ɫ)׶ξ㷨 + * @author lyq + * + */ +public class Client { + public static void main(String[] args){ + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\graphData.txt"; + //k-ڵk + int k = 1; + //ֵ + double minMetric = 0.1; + + ChameleonTool tool = new ChameleonTool(filePath, k, minMetric); + tool.buildCluster(); + } +} diff --git a/Others/DataMining_Chameleon/Cluster.java b/Others/DataMining_Chameleon/Cluster.java new file mode 100644 index 0000000..42e1f94 --- /dev/null +++ b/Others/DataMining_Chameleon/Cluster.java @@ -0,0 +1,119 @@ +package DataMining_Chameleon; + +import java.util.ArrayList; + +/** + * ۴ + * + * @author lyq + * + */ +public class Cluster implements Cloneable{ + //Ψһidʶ + int id; + // ۴ڵ㼯 + ArrayList points; + // ۴ڵбߵȨغ + double weightSum = 0; + + public Cluster(int id, ArrayList points) { + this.id = id; + this.points = points; + } + + /** + * ۴صڲıȨغ + * + * @return + */ + public double calEC() { + int id1 = 0; + int id2 = 0; + weightSum = 0; + + for (Point p1 : points) { + for (Point p2 : points) { + id1 = p1.id; + id2 = p2.id; + + // Ϊ˱ظ㣬ȡid1СĶӦ + if (id1 < id2 && ChameleonTool.edges[id1][id2] == 1) { + weightSum += ChameleonTool.weights[id1][id2]; + } + } + } + + return weightSum; + } + + /** + * 2֮n + * + * @param otherCluster + * ȽϵĴ + * @param n + * ıߵĿ + * @return + */ + public ArrayList calNearestEdge(Cluster otherCluster, int n){ + int count = 0; + double distance = 0; + double minDistance = Integer.MAX_VALUE; + Point point1 = null; + Point point2 = null; + ArrayList edgeList = new ArrayList<>(); + ArrayList pointList1 = (ArrayList) points.clone(); + ArrayList pointList2 = null; + Cluster c2 = null; + + try { + c2 = (Cluster) otherCluster.clone(); + pointList2 = c2.points; + } catch (CloneNotSupportedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + int[] tempEdge; + // ѭÿε + while (count < n) { + tempEdge = new int[2]; + minDistance = Integer.MAX_VALUE; + + for (Point p1 : pointList1) { + for (Point p2 : pointList2) { + distance = p1.ouDistance(p2); + if (distance < minDistance) { + point1 = p1; + point2 = p2; + tempEdge[0] = p1.id; + tempEdge[1] = p2.id; + + minDistance = distance; + } + } + } + + pointList1.remove(point1); + pointList2.remove(point2); + edgeList.add(tempEdge); + count++; + } + + return edgeList; + } + + @Override + protected Object clone() throws CloneNotSupportedException { + // TODO Auto-generated method stub + + //Ҫٴθƣʵ + ArrayList pointList = (ArrayList) this.points.clone(); + Cluster cluster = new Cluster(id, pointList); + + return cluster; + } + + + +} diff --git a/Others/DataMining_Chameleon/Point.java b/Others/DataMining_Chameleon/Point.java new file mode 100644 index 0000000..2a3b8cc --- /dev/null +++ b/Others/DataMining_Chameleon/Point.java @@ -0,0 +1,59 @@ +package DataMining_Chameleon; + + + +/** + * + * @author lyq + * + */ +public class Point{ + //id,idΨһ + int id; + // + Integer x; + // + Integer y; + //ǷѾʹ + boolean isVisited; + + public Point(String id, String x, String y){ + this.id = Integer.parseInt(id); + this.x = Integer.parseInt(x); + this.y = Integer.parseInt(y); + } + + /** + * 㵱ǰƶ֮ŷʽ + * + * @param p + * p + * @return + */ + public double ouDistance(Point p) { + double distance = 0; + + distance = (this.x - p.x) * (this.x - p.x) + (this.y - p.y) + * (this.y - p.y); + distance = Math.sqrt(distance); + + return distance; + } + + /** + * ж2ǷΪø + * + * @param p + * Ƚ + * @return + */ + public boolean isTheSame(Point p) { + boolean isSamed = false; + + if (this.x == p.x && this.y == p.y) { + isSamed = true; + } + + return isSamed; + } +} diff --git a/Others/DataMining_Chameleon/graphData.txt b/Others/DataMining_Chameleon/graphData.txt new file mode 100644 index 0000000..d618d9a --- /dev/null +++ b/Others/DataMining_Chameleon/graphData.txt @@ -0,0 +1,19 @@ +0 2 2 +1 3 1 +2 3 4 +3 3 14 +4 5 3 +5 8 3 +6 8 6 +7 9 8 +8 10 4 +9 10 7 +10 10 10 +11 10 14 +12 11 13 +13 12 8 +14 12 15 +15 14 7 +16 14 9 +17 14 15 +18 15 8 \ No newline at end of file From 7022e8db2bb8a49732d9c967be0ff64d5303ccb9 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Thu, 26 Mar 2015 12:14:35 +0800 Subject: [PATCH 05/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补充了others包内的算法介绍 --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ed399c2..5765f34 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # DataMiningAlgorithm 数据挖掘中经典的算法实现和详细的注释 -18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学习。 +18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 +目前追加了其他的一些经典的DM算法,在others的包中涉及聚类,分类,图算法,搜索算等等,没有具体分类。 1.C4.5算法。C4.5算法与ID3算法一样,都是数学分类算法,C4.5算法是ID3算法的一个改进。ID3算法采用信息增益进行决策判断,而C4.5采用的是增益率。 详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42395865 @@ -56,3 +57,14 @@ 18.gSpan算法。gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘,相较于其他的图算法,子图挖掘算法是他们的一个前提或基础算法。gSpan算法用到了DFS编码,和Edge五元组,最右路径子图扩展等概念,算法比较的抽象和复杂。 详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43924273 + + +Others目录下的算法: + +1.GA遗传算法。遗传算法运用了生物进化理论的知识来寻找问题最优解的算法,算法的遗传进化过程分选择,交叉和变异操作,其中选择操是非常关键的步骤,把更适应的基于组遗传给下一代。 +详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/44041499 + +2.dbScan基于空间密度聚类算法。dbScan作为一种特殊聚类算法,弥补了其他算法的一些不足,基于空间密,实现聚类效果,可以发现任意形状的聚簇。 +详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/44311309 + + From f8602ae9df2ba2afcd8e8a66b7749b7a27a26993 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 26 Mar 2015 17:00:40 +0800 Subject: [PATCH 06/58] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E9=81=97?= =?UTF-8?q?=E4=BC=A0=E7=AE=97=E6=B3=95=E5=9C=A8=E9=80=89=E6=8B=A9=E6=93=8D?= =?UTF-8?q?=E4=BD=9C=E6=97=B6=E6=95=B0=E9=87=8F=E6=9C=89=E6=97=B6=E4=BC=9A?= =?UTF-8?q?=E5=8F=98=E5=B0=91=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复了遗传算法在选择操作时数量有时会变少的bug --- Others/DataMining_GA/GATool.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Others/DataMining_GA/GATool.java b/Others/DataMining_GA/GATool.java index ca0121e..567c393 100644 --- a/Others/DataMining_GA/GATool.java +++ b/Others/DataMining_GA/GATool.java @@ -119,6 +119,10 @@ private ArrayList selectOperate(ArrayList initCodes) { for (int i = 0; i < initSetsNum; i++) { randomNum = random.nextInt(100) + 1; randomNum = randomNum / 100; + //Ϊ1.0޷жϵģ,ܺͻ޽ӽ1.0ȡΪ0.99ж + if(randomNum == 1){ + randomNum = randomNum - 0.01; + } sumAdaptiveValue = 0; // ȷ From 4bf69b6711b0ccfc6dee82143fad99814781149d Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 26 Mar 2015 17:02:45 +0800 Subject: [PATCH 07/58] =?UTF-8?q?=E9=81=97=E4=BC=A0=E7=AE=97=E6=B3=95?= =?UTF-8?q?=E5=9C=A8=E8=B5=B0=E8=BF=B7=E5=AE=AB=E6=B8=B8=E6=88=8F=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E5=BA=94=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 遗传算法在走迷宫游戏中的应用 --- Others/DataMining_GA_Maze/Client.java | 19 ++ Others/DataMining_GA_Maze/GATool.java | 452 ++++++++++++++++++++++++++ Others/DataMining_GA_Maze/mapData.txt | 5 + 3 files changed, 476 insertions(+) create mode 100644 Others/DataMining_GA_Maze/Client.java create mode 100644 Others/DataMining_GA_Maze/GATool.java create mode 100644 Others/DataMining_GA_Maze/mapData.txt diff --git a/Others/DataMining_GA_Maze/Client.java b/Others/DataMining_GA_Maze/Client.java new file mode 100644 index 0000000..0cec9c9 --- /dev/null +++ b/Others/DataMining_GA_Maze/Client.java @@ -0,0 +1,19 @@ +package GA_Maze; + +/** + * Ŵ㷨ԹϷӦ + * @author lyq + * + */ +public class Client { + public static void main(String[] args) { + //Թͼļݵַ + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\mapData.txt"; + //ʼ + int initSetsNum = 4; + + GATool tool = new GATool(filePath, initSetsNum); + tool.goOutMaze(); + } + +} diff --git a/Others/DataMining_GA_Maze/GATool.java b/Others/DataMining_GA_Maze/GATool.java new file mode 100644 index 0000000..39c8270 --- /dev/null +++ b/Others/DataMining_GA_Maze/GATool.java @@ -0,0 +1,452 @@ +package GA_Maze; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Random; + +/** + * Ŵ㷨ԹϷӦ-Ŵ㷨 + * + * @author lyq + * + */ +public class GATool { + // Թڱ + public static final int MAZE_ENTRANCE_POS = 1; + public static final int MAZE_EXIT_POS = 2; + // Ӧı + public static final int[][] MAZE_DIRECTION_CODE = new int[][] { { 0, 0 }, + { 0, 1 }, { 1, 0 }, { 1, 1 }, }; + // 㷽ı + public static final int[][] MAZE_DIRECTION_CHANGE = new int[][] { + { -1, 0 }, { 1, 0 }, { 0, -1 }, { 0, 1 }, }; + // + public static final String[] MAZE_DIRECTION_LABEL = new String[] { "", + "", "", "" }; + + // ͼļַ + private String filePath; + // Թ̲ + private int stepNum; + // ʼ + private int initSetsNum; + // Թλ + private int[] startPos; + // Թλ + private int[] endPos; + // Թͼ + private int[][] mazeData; + // ʼ弯 + private ArrayList initSets; + // + private Random random; + + public GATool(String filePath, int initSetsNum) { + this.filePath = filePath; + this.initSetsNum = initSetsNum; + + readDataFile(); + } + + /** + * ļжȡ + */ + public void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + int rowNum = dataArray.size(); + mazeData = new int[rowNum][rowNum]; + for (int i = 0; i < rowNum; i++) { + String[] data = dataArray.get(i); + for (int j = 0; j < data.length; j++) { + mazeData[i][j] = Integer.parseInt(data[j]); + + // ֵںͳλ + if (mazeData[i][j] == MAZE_ENTRANCE_POS) { + startPos = new int[2]; + startPos[0] = i; + startPos[1] = j; + } else if (mazeData[i][j] == MAZE_EXIT_POS) { + endPos = new int[2]; + endPos[0] = i; + endPos[1] = j; + } + } + } + + // ߳Թ̲ + stepNum = Math.abs(startPos[0] - endPos[0]) + + Math.abs(startPos[1] - endPos[1]); + } + + /** + * ʼݼ + */ + private void produceInitSet() { + // + int directionCode = 0; + random = new Random(); + initSets = new ArrayList<>(); + // ÿIJҪ2λֱʾ + int[] codeNum; + + for (int i = 0; i < initSetsNum; i++) { + codeNum = new int[stepNum * 2]; + for (int j = 0; j < stepNum; j++) { + directionCode = random.nextInt(4); + codeNum[2 * j] = MAZE_DIRECTION_CODE[directionCode][0]; + codeNum[2 * j + 1] = MAZE_DIRECTION_CODE[directionCode][1]; + } + + initSets.add(codeNum); + } + } + + /** + * ѡֵϸߵĸŴһ + * + * @param initCodes + * ʼ + * @return + */ + private ArrayList selectOperate(ArrayList initCodes) { + double randomNum = 0; + double sumFitness = 0; + ArrayList resultCodes = new ArrayList<>(); + double[] adaptiveValue = new double[initSetsNum]; + + for (int i = 0; i < initSetsNum; i++) { + adaptiveValue[i] = calFitness(initCodes.get(i)); + sumFitness += adaptiveValue[i]; + } + + // תɸʵʽһ + for (int i = 0; i < initSetsNum; i++) { + adaptiveValue[i] = adaptiveValue[i] / sumFitness; + } + + for (int i = 0; i < initSetsNum; i++) { + randomNum = random.nextInt(100) + 1; + randomNum = randomNum / 100; + //Ϊ1.0޷жϵģ,ܺͻ޽ӽ1.0ȡΪ0.99ж + if(randomNum == 1){ + randomNum = randomNum - 0.01; + } + + sumFitness = 0; + // ȷ + for (int j = 0; j < initSetsNum; j++) { + if (randomNum > sumFitness + && randomNum <= sumFitness + adaptiveValue[j]) { + // ÿķʽظ + resultCodes.add(initCodes.get(j).clone()); + break; + } else { + sumFitness += adaptiveValue[j]; + } + } + } + + return resultCodes; + } + + /** + * + * + * @param selectedCodes + * ϲѡı + * @return + */ + private ArrayList crossOperate(ArrayList selectedCodes) { + int randomNum = 0; + // + int crossPoint = 0; + ArrayList resultCodes = new ArrayList<>(); + // У + ArrayList randomCodeSeqs = new ArrayList<>(); + + // + while (selectedCodes.size() > 0) { + randomNum = random.nextInt(selectedCodes.size()); + + randomCodeSeqs.add(selectedCodes.get(randomNum)); + selectedCodes.remove(randomNum); + } + + int temp = 0; + int[] array1; + int[] array2; + // + for (int i = 1; i < randomCodeSeqs.size(); i++) { + if (i % 2 == 1) { + array1 = randomCodeSeqs.get(i - 1); + array2 = randomCodeSeqs.get(i); + crossPoint = random.nextInt(stepNum - 1) + 1; + + // нλúı + for (int j = 0; j < 2 * stepNum; j++) { + if (j >= 2 * crossPoint) { + temp = array1[j]; + array1[j] = array2[j]; + array2[j] = temp; + } + } + + // 뵽 + resultCodes.add(array1); + resultCodes.add(array2); + } + } + + return resultCodes; + } + + /** + * + * + * @param crossCodes + * Ľ + * @return + */ + private ArrayList variationOperate(ArrayList crossCodes) { + // + int variationPoint = 0; + ArrayList resultCodes = new ArrayList<>(); + + for (int[] array : crossCodes) { + variationPoint = random.nextInt(stepNum); + + for (int i = 0; i < array.length; i += 2) { + // б + if (i % 2 == 0 && i / 2 == variationPoint) { + array[i] = (array[i] == 0 ? 1 : 0); + array[i + 1] = (array[i + 1] == 0 ? 1 : 0); + break; + } + } + + resultCodes.add(array); + } + + return resultCodes; + } + + /** + * ݱֵ + * + * @param code + * ǰı + * @return + */ + public double calFitness(int[] code) { + double fintness = 0; + // ɱõյ + int endX = 0; + // ɱõյ + int endY = 0; + // Ƭ߷ + int direction = 0; + // ʱ + int tempX = 0; + // ʱ + int tempY = 0; + + endX = startPos[0]; + endY = startPos[1]; + for (int i = 0; i < stepNum; i++) { + direction = binaryArrayToNum(new int[] { code[2 * i], + code[2 * i + 1] }); + + // ݷıĸı + tempX = endX + MAZE_DIRECTION_CHANGE[direction][0]; + tempY = endY + MAZE_DIRECTION_CHANGE[direction][1]; + + // жǷԽ + if (tempX >= 0 && tempX < mazeData.length && tempY >= 0 + && tempY < mazeData[0].length) { + // жǷߵ谭 + if (mazeData[tempX][tempY] != -1) { + endX = tempX; + endY = tempY; + } + } + } + + // ֵֵļ + fintness = 1.0 / (Math.abs(endX - endPos[0]) + + Math.abs(endY - endPos[1]) + 1); + + return fintness; + } + + /** + * ݵǰжǷѾҵλ + * + * @param code + * ɴŴı + * @return + */ + private boolean ifArriveEndPos(int[] code) { + boolean isArrived = false; + // ɱõյ + int endX = 0; + // ɱõյ + int endY = 0; + // Ƭ߷ + int direction = 0; + // ʱ + int tempX = 0; + // ʱ + int tempY = 0; + + endX = startPos[0]; + endY = startPos[1]; + for (int i = 0; i < stepNum; i++) { + direction = binaryArrayToNum(new int[] { code[2 * i], + code[2 * i + 1] }); + + // ݷıĸı + tempX = endX + MAZE_DIRECTION_CHANGE[direction][0]; + tempY = endY + MAZE_DIRECTION_CHANGE[direction][1]; + + // жǷԽ + if (tempX >= 0 && tempX < mazeData.length && tempY >= 0 + && tempY < mazeData[0].length) { + // жǷߵ谭 + if (mazeData[tempX][tempY] != -1) { + endX = tempX; + endY = tempY; + } + } + } + + if (endX == endPos[0] && endY == endPos[1]) { + isArrived = true; + } + + return isArrived; + } + + /** + * תΪ + * + * @param binaryArray + * ת + */ + private int binaryArrayToNum(int[] binaryArray) { + int result = 0; + + for (int i = binaryArray.length - 1, k = 0; i >= 0; i--, k++) { + if (binaryArray[i] == 1) { + result += Math.pow(2, k); + } + } + + return result; + } + + /** + * Ŵ㷨߳Թ + */ + public void goOutMaze() { + // Ŵ + int loopCount = 0; + boolean canExit = false; + // · + int[] resultCode = null; + ArrayList initCodes; + ArrayList selectedCodes; + ArrayList crossedCodes; + ArrayList variationCodes; + + // ʼݼ + produceInitSet(); + initCodes = initSets; + + while (true) { + for (int[] array : initCodes) { + // ŴֹΪǷҵλ + if (ifArriveEndPos(array)) { + resultCode = array; + canExit = true; + break; + } + } + + if (canExit) { + break; + } + + selectedCodes = selectOperate(initCodes); + crossedCodes = crossOperate(selectedCodes); + variationCodes = variationOperate(crossedCodes); + initCodes = variationCodes; + + loopCount++; + + //Ŵ100Σ˳ + if(loopCount >= 100){ + break; + } + } + + System.out.println("ܹŴ" + loopCount + ""); + printFindedRoute(resultCode); + } + + /** + * ҵ· + * + * @param code + */ + private void printFindedRoute(int[] code) { + if(code == null){ + System.out.println("޵Ŵڣûҵ·"); + return; + } + + int tempX = startPos[0]; + int tempY = startPos[1]; + int direction = 0; + + System.out.println(MessageFormat.format( + "ʼλ({0},{1}), ڵλ({2}, {3})", tempX, tempY, endPos[0], + endPos[1])); + + System.out.print("Ľ룺"); + for(int value: code){ + System.out.print("" + value); + } + System.out.println(); + + for (int i = 0, k = 1; i < code.length; i += 2, k++) { + direction = binaryArrayToNum(new int[] { code[i], code[i + 1] }); + + tempX += MAZE_DIRECTION_CHANGE[direction][0]; + tempY += MAZE_DIRECTION_CHANGE[direction][1]; + + System.out.println(MessageFormat.format( + "{0},Ϊ{1}{2},{3}ƶƶ󵽴({4},{5})", k, code[i], code[i+1], + MAZE_DIRECTION_LABEL[direction], tempX, tempY)); + } + } + +} diff --git a/Others/DataMining_GA_Maze/mapData.txt b/Others/DataMining_GA_Maze/mapData.txt new file mode 100644 index 0000000..e3566d7 --- /dev/null +++ b/Others/DataMining_GA_Maze/mapData.txt @@ -0,0 +1,5 @@ +0 0 0 0 0 +2 0 0 -1 0 +0 0 0 0 0 +0 -1 0 0 -1 +0 0 0 0 1 \ No newline at end of file From db2bcd7603df1d852a3c091d824474497624928f Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Fri, 27 Mar 2015 16:14:32 +0800 Subject: [PATCH 08/58] Update README.md --- README.md | 68 +++++++++++++++++++------------------------------------ 1 file changed, 23 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 5765f34..b8f9418 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,48 @@ -# DataMiningAlgorithm -数据挖掘中经典的算法实现和详细的注释 +# 数据挖掘算法 +##18大经典DM算法 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 目前追加了其他的一些经典的DM算法,在others的包中涉及聚类,分类,图算法,搜索算等等,没有具体分类。 -1.C4.5算法。C4.5算法与ID3算法一样,都是数学分类算法,C4.5算法是ID3算法的一个改进。ID3算法采用信息增益进行决策判断,而C4.5采用的是增益率。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42395865 +* C4.5算法。C4.5算法与ID3算法一样,都是数学分类算法,C4.5算法是ID3算法的一个改进。ID3算法采用信息增益进行决策判断,而C4.5采用的是增益率。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42395865) -2.CART算法。CART算法的全称是分类回归树算法,他是一个二元分类,采用的是类似于熵的基尼指数作为分类决策,形成决策树后之后还要进行剪枝,我自己在实现整个算法的时候采用的是代价复杂度算法, -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42558235 -3.KNN(K最近邻)算法。给定一些已经训练好的数据,输入一个新的测试数据点,计算包含于此测试数据点的最近的点的分类情况,哪个分类的类型占多数,则此测试点的分类与此相同,所以在这里,有的时候可以复制不同的分类点不同的权重。近的点的权重大点,远的点自然就小点。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42613011 +* CART算法。CART算法的全称是分类回归树算法,他是一个二元分类,采用的是类似于熵的基尼指数作为分类决策,形成决策树后之后还要进行剪枝,我自己在实现整个算法的时候采用的是代价复杂度算法,[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42558235) -4.Naive Bayes(朴素贝叶斯)算法。朴素贝叶斯算法是贝叶斯算法里面一种比较简单的分类算法,用到了一个比较重要的贝叶斯定理,用一句简单的话概括就是条件概率的相互转换推导。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42680161 +* KNN(K最近邻)算法。给定一些已经训练好的数据,输入一个新的测试数据点,计算包含于此测试数据点的最近的点的分类情况,哪个分类的类型占多数,则此测试点的分类与此相同,所以在这里,有的时候可以复制不同的分类点不同的权重。近的点的权重大点,远的点自然就小点。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42613011) -5.SVM(支持向量机)算法。支持向量机算法是一种对线性和非线性数据进行分类的方法,非线性数据进行分类的时候可以通过核函数转为线性的情况再处理。其中的一个关键的步骤是搜索最大边缘超平面。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42780439 +* Naive Bayes(朴素贝叶斯)算法。朴素贝叶斯算法是贝叶斯算法里面一种比较简单的分类算法,用到了一个比较重要的贝叶斯定理,用一句简单的话概括就是条件概率的相互转换推导。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42680161) -6.EM(期望最大化)算法。期望最大化算法,可以拆分为2个算法,1个E-Step期望化步骤,和1个M-Step最大化步骤。他是一种算法框架,在每次计算结果之后,逼近统计模型参数的最大似然或最大后验估计。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/42921789 +* SVM(支持向量机)算法。支持向量机算法是一种对线性和非线性数据进行分类的方法,非线性数据进行分类的时候可以通过核函数转为线性的情况再处理。其中的一个关键的步骤是搜索最大边缘超平面。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42780439) -7.Apriori算法。Apriori算法是关联规则挖掘算法,通过连接和剪枝运算挖掘出频繁项集,然后根据频繁项集得到关联规则,关联规则的导出需要满足最小置信度的要求。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43059211 +* EM(期望最大化)算法。期望最大化算法,可以拆分为2个算法,1个E-Step期望化步骤,和1个M-Step最大化步骤。他是一种算法框架,在每次计算结果之后,逼近统计模型参数的最大似然或最大后验估计。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42921789) -8.FP-Tree(频繁模式树)算法。这个算法也有被称为FP-growth算法,这个算法克服了Apriori算法的产生过多侯选集的缺点,通过递归的产生频度模式树,然后对树进行挖掘,后面的过程与Apriori算法一致。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43234309 +* Apriori算法。Apriori算法是关联规则挖掘算法,通过连接和剪枝运算挖掘出频繁项集,然后根据频繁项集得到关联规则,关联规则的导出需要满足最小置信度的要求。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43059211) -9.PageRank(网页重要性/排名)算法。PageRank算法最早产生于Google,核心思想是通过网页的入链数作为一个网页好快的判定标准,如果1个网页内部包含了多个指向外部的链接,则PR值将会被均分,PageRank算法也会遭到Link Span攻击。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43311943 +* FP-Tree(频繁模式树)算法。这个算法也有被称为FP-growth算法,这个算法克服了Apriori算法的产生过多侯选集的缺点,通过递归的产生频度模式树,然后对树进行挖掘,后面的过程与Apriori算法一致。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43234309) -10.HITS算法。HITS算法是另外一个链接算法,部分原理与PageRank算法是比较相似的,HITS算法引入了权威值和中心值的概念,HITS算法是受用户查询条件影响的,他一般用于小规模的数据链接分析,也更容易遭受到攻击。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43311943 +* PageRank(网页重要性/排名)算法。PageRank算法最早产生于Google,核心思想是通过网页的入链数作为一个网页好快的判定标准,如果1个网页内部包含了多个指向外部的链接,则PR值将会被均分,PageRank算法也会遭到LinkSpan攻击。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43311943) -11.K-Means(K均值)算法。K-Means算法是聚类算法,k在在这里指的是分类的类型数,所以在开始设定的时候非常关键,算法的原理是首先假定k个分类点,然后根据欧式距离计算分类,然后去同分类的均值作为新的聚簇中心,循环操作直到收敛。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43373159 +* HITS算法。HITS算法是另外一个链接算法,部分原理与PageRank算法是比较相似的,HITS算法引入了权威值和中心值的概念,HITS算法是受用户查询条件影响的,他一般用于小规模的数据链接分析,也更容易遭受到攻击。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43311943) -12.BIRCH算法。BIRCH算法利用构建CF聚类特征树作为算法的核心,通过树的形式,BIRCH算法扫描数据库,在内存中建立一棵初始的CF-树,可以看做数据的多层压缩。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43532111 +* K-Means(K均值)算法。K-Means算法是聚类算法,k在在这里指的是分类的类型数,所以在开始设定的时候非常关键,算法的原理是首先假定k个分类点,然后根据欧式距离计算分类,然后去同分类的均值作为新的聚簇中心,循环操作直到收敛。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43373159) -13.AdaBoost算法。AdaBoost算法是一种提升算法,通过对数据的多次训练得到多个互补的分类器,然后组合多个分类器,构成一个更加准确的分类器, -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43635115 +* BIRCH算法。BIRCH算法利用构建CF聚类特征树作为算法的核心,通过树的形式,BIRCH算法扫描数据库,在内存中建立一棵初始的CF-树,可以看做数据的多层压缩。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43532111) -14.GSP算法。GSP算法是序列模式挖掘算法。GSP算法也是Apriori类算法,在算法的过程中也会进行连接和剪枝操作,不过在剪枝判断的时候还加上了一些时间上的约束等条件。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43699083 +* AdaBoost算法。AdaBoost算法是一种提升算法,通过对数据的多次训练得到多个互补的分类器,然后组合多个分类器,构成一个更加准确的分类器。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43635115) -15.PreFixSpan算法。PreFixSpan算法是另一个序列模式挖掘算法,在算法的过程中不会产生候选集,给定初始前缀模式,不断的通过后缀模式中的元素转到前缀模式中,而不断的递归挖掘下去。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43766253 +* GSP算法。GSP算法是序列模式挖掘算法。GSP算法也是Apriori类算法,在算法的过程中也会进行连接和剪枝操作,不过在剪枝判断的时候还加上了一些时间上的约束等条件。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43699083) -16.CBA(基于关联规则分类)算法。CBA算法是一种集成挖掘算法,因为他是建立在关联规则挖掘算法之上的,在已有的关联规则理论前提下,做分类判断,只是在算法的开始时对数据做处理,变成类似于事务的形式。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43818787 +* PreFixSpan算法。PreFixSpan算法是另一个序列模式挖掘算法,在算法的过程中不会产生候选集,给定初始前缀模式,不断的通过后缀模式中的元素转到前缀模式中,而不断的递归挖掘下去。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43766253) -17.RoughSets(粗糙集)算法。粗糙集理论是一个比较新颖的数据挖掘思想。这里使用的是用粗糙集进行属性约简的算法,通过上下近似集的判断删除无效的属性,进行规制的输出。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43876001 +* CBA(基于关联规则分类)算法。CBA算法是一种集成挖掘算法,因为他是建立在关联规则挖掘算法之上的,在已有的关联规则理论前提下,做分类判断,只是在算法的开始时对数据做处理,变成类似于事务的形式。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43818787) -18.gSpan算法。gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘,相较于其他的图算法,子图挖掘算法是他们的一个前提或基础算法。gSpan算法用到了DFS编码,和Edge五元组,最右路径子图扩展等概念,算法比较的抽象和复杂。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/43924273 +* RoughSets(粗糙集)算法。粗糙集理论是一个比较新颖的数据挖掘思想。这里使用的是用粗糙集进行属性约简的算法,通过上下近似集的判断删除无效的属性,进行规制的输出。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43876001) +* GSpan算法。gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘,相较于其他的图算法,子图挖掘算法是他们的一个前提或基础算法。gSpan算法用到了DFS编码,和Edge五元组,最右路径子图扩展等概念,算法比较的抽象和复杂。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43924273) -Others目录下的算法: - -1.GA遗传算法。遗传算法运用了生物进化理论的知识来寻找问题最优解的算法,算法的遗传进化过程分选择,交叉和变异操作,其中选择操是非常关键的步骤,把更适应的基于组遗传给下一代。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/44041499 - -2.dbScan基于空间密度聚类算法。dbScan作为一种特殊聚类算法,弥补了其他算法的一些不足,基于空间密,实现聚类效果,可以发现任意形状的聚簇。 -详细介绍链接:http://blog.csdn.net/androidlushangderen/article/details/44311309 +##Others目录下的算法: +* GA遗传算法。遗传算法运用了生物进化理论的知识来寻找问题最优解的算法,算法的遗传进化过程分选择,交叉和变异操作,其中选择操是非常关键的步骤,把更适应的基于组遗传给下一代。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44041499) +* DbScan基于空间密度聚类算法。dbScan作为一种特殊聚类算法,弥补了其他算法的一些不足,基于空间密,实现聚类效果,可以发现任意形状的聚簇。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44311309) From 6c1e2132ed7ce7bb94df8c22dfdc791aeefa7a26 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Fri, 27 Mar 2015 16:33:14 +0800 Subject: [PATCH 09/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit readMe.md文件格式优化 --- README.md | 61 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index b8f9418..b8f4732 100644 --- a/README.md +++ b/README.md @@ -4,45 +4,64 @@ 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 目前追加了其他的一些经典的DM算法,在others的包中涉及聚类,分类,图算法,搜索算等等,没有具体分类。 -* C4.5算法。C4.5算法与ID3算法一样,都是数学分类算法,C4.5算法是ID3算法的一个改进。ID3算法采用信息增益进行决策判断,而C4.5采用的是增益率。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42395865) +* ### C4.5 +C4.5算法与ID3算法一样,都是数学分类算法,C4.5算法是ID3算法的一个改进。ID3算法采用信息增益进行决策判断,而C4.5采用的是增益率。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42395865) +* ### CART +CART算法的全称是分类回归树算法,他是一个二元分类,采用的是类似于熵的基尼指数作为分类决策,形成决策树后之后还要进行剪枝,我自己在实现整个算法的时候采用的是代价复杂度算法,[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42558235) -* CART算法。CART算法的全称是分类回归树算法,他是一个二元分类,采用的是类似于熵的基尼指数作为分类决策,形成决策树后之后还要进行剪枝,我自己在实现整个算法的时候采用的是代价复杂度算法,[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42558235) +* ### KNN +K最近邻算法。给定一些已经训练好的数据,输入一个新的测试数据点,计算包含于此测试数据点的最近的点的分类情况,哪个分类的类型占多数,则此测试点的分类与此相同,所以在这里,有的时候可以复制不同的分类点不同的权重。近的点的权重大点,远的点自然就小点。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42613011) -* KNN(K最近邻)算法。给定一些已经训练好的数据,输入一个新的测试数据点,计算包含于此测试数据点的最近的点的分类情况,哪个分类的类型占多数,则此测试点的分类与此相同,所以在这里,有的时候可以复制不同的分类点不同的权重。近的点的权重大点,远的点自然就小点。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42613011) +* ### Naive Bayes +朴素贝叶斯算法。朴素贝叶斯算法是贝叶斯算法里面一种比较简单的分类算法,用到了一个比较重要的贝叶斯定理,用一句简单的话概括就是条件概率的相互转换推导。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42680161) -* Naive Bayes(朴素贝叶斯)算法。朴素贝叶斯算法是贝叶斯算法里面一种比较简单的分类算法,用到了一个比较重要的贝叶斯定理,用一句简单的话概括就是条件概率的相互转换推导。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42680161) +* ### SVM +支持向量机算法。支持向量机算法是一种对线性和非线性数据进行分类的方法,非线性数据进行分类的时候可以通过核函数转为线性的情况再处理。其中的一个关键的步骤是搜索最大边缘超平面。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42780439) -* SVM(支持向量机)算法。支持向量机算法是一种对线性和非线性数据进行分类的方法,非线性数据进行分类的时候可以通过核函数转为线性的情况再处理。其中的一个关键的步骤是搜索最大边缘超平面。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42780439) +* ### EM +期望最大化算法。期望最大化算法,可以拆分为2个算法,1个E-Step期望化步骤,和1个M-Step最大化步骤。他是一种算法框架,在每次计算结果之后,逼近统计模型参数的最大似然或最大后验估计。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42921789) -* EM(期望最大化)算法。期望最大化算法,可以拆分为2个算法,1个E-Step期望化步骤,和1个M-Step最大化步骤。他是一种算法框架,在每次计算结果之后,逼近统计模型参数的最大似然或最大后验估计。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/42921789) +* ### Apriori +Apriori算法是关联规则挖掘算法,通过连接和剪枝运算挖掘出频繁项集,然后根据频繁项集得到关联规则,关联规则的导出需要满足最小置信度的要求。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43059211) -* Apriori算法。Apriori算法是关联规则挖掘算法,通过连接和剪枝运算挖掘出频繁项集,然后根据频繁项集得到关联规则,关联规则的导出需要满足最小置信度的要求。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43059211) +* ### FP-Tree +频繁模式树算法。这个算法也有被称为FP-growth算法,这个算法克服了Apriori算法的产生过多侯选集的缺点,通过递归的产生频度模式树,然后对树进行挖掘,后面的过程与Apriori算法一致。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43234309) -* FP-Tree(频繁模式树)算法。这个算法也有被称为FP-growth算法,这个算法克服了Apriori算法的产生过多侯选集的缺点,通过递归的产生频度模式树,然后对树进行挖掘,后面的过程与Apriori算法一致。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43234309) +* ### PageRank +网页重要性/排名算法。PageRank算法最早产生于Google,核心思想是通过网页的入链数作为一个网页好快的判定标准,如果1个网页内部包含了多个指向外部的链接,则PR值将会被均分,PageRank算法也会遭到LinkSpan攻击。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43311943) -* PageRank(网页重要性/排名)算法。PageRank算法最早产生于Google,核心思想是通过网页的入链数作为一个网页好快的判定标准,如果1个网页内部包含了多个指向外部的链接,则PR值将会被均分,PageRank算法也会遭到LinkSpan攻击。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43311943) +* ### HITS +HITS算法是另外一个链接算法,部分原理与PageRank算法是比较相似的,HITS算法引入了权威值和中心值的概念,HITS算法是受用户查询条件影响的,他一般用于小规模的数据链接分析,也更容易遭受到攻击。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43311943) -* HITS算法。HITS算法是另外一个链接算法,部分原理与PageRank算法是比较相似的,HITS算法引入了权威值和中心值的概念,HITS算法是受用户查询条件影响的,他一般用于小规模的数据链接分析,也更容易遭受到攻击。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43311943) +* ### K-Means +K-Means算法是聚类算法,k在在这里指的是分类的类型数,所以在开始设定的时候非常关键,算法的原理是首先假定k个分类点,然后根据欧式距离计算分类,然后去同分类的均值作为新的聚簇中心,循环操作直到收敛。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43373159) -* K-Means(K均值)算法。K-Means算法是聚类算法,k在在这里指的是分类的类型数,所以在开始设定的时候非常关键,算法的原理是首先假定k个分类点,然后根据欧式距离计算分类,然后去同分类的均值作为新的聚簇中心,循环操作直到收敛。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43373159) +* ### BIRCH +BIRCH算法利用构建CF聚类特征树作为算法的核心,通过树的形式,BIRCH算法扫描数据库,在内存中建立一棵初始的CF-树,可以看做数据的多层压缩。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43532111) -* BIRCH算法。BIRCH算法利用构建CF聚类特征树作为算法的核心,通过树的形式,BIRCH算法扫描数据库,在内存中建立一棵初始的CF-树,可以看做数据的多层压缩。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43532111) +* ### AdaBoost +AdaBoost算法是一种提升算法,通过对数据的多次训练得到多个互补的分类器,然后组合多个分类器,构成一个更加准确的分类器。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43635115) -* AdaBoost算法。AdaBoost算法是一种提升算法,通过对数据的多次训练得到多个互补的分类器,然后组合多个分类器,构成一个更加准确的分类器。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43635115) +* ### GSP +GSP算法是序列模式挖掘算法。GSP算法也是Apriori类算法,在算法的过程中也会进行连接和剪枝操作,不过在剪枝判断的时候还加上了一些时间上的约束等条件。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43699083) -* GSP算法。GSP算法是序列模式挖掘算法。GSP算法也是Apriori类算法,在算法的过程中也会进行连接和剪枝操作,不过在剪枝判断的时候还加上了一些时间上的约束等条件。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43699083) +* ### PreFixSpan +PreFixSpan算法是另一个序列模式挖掘算法,在算法的过程中不会产生候选集,给定初始前缀模式,不断的通过后缀模式中的元素转到前缀模式中,而不断的递归挖掘下去。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43766253) -* PreFixSpan算法。PreFixSpan算法是另一个序列模式挖掘算法,在算法的过程中不会产生候选集,给定初始前缀模式,不断的通过后缀模式中的元素转到前缀模式中,而不断的递归挖掘下去。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43766253) +* ### CBA +基于关联规则分类算法。CBA算法是一种集成挖掘算法,因为他是建立在关联规则挖掘算法之上的,在已有的关联规则理论前提下,做分类判断,只是在算法的开始时对数据做处理,变成类似于事务的形式。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43818787) -* CBA(基于关联规则分类)算法。CBA算法是一种集成挖掘算法,因为他是建立在关联规则挖掘算法之上的,在已有的关联规则理论前提下,做分类判断,只是在算法的开始时对数据做处理,变成类似于事务的形式。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43818787) +* ### RoughSets +粗糙集算法。粗糙集理论是一个比较新颖的数据挖掘思想。这里使用的是用粗糙集进行属性约简的算法,通过上下近似集的判断删除无效的属性,进行规制的输出。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43876001) -* RoughSets(粗糙集)算法。粗糙集理论是一个比较新颖的数据挖掘思想。这里使用的是用粗糙集进行属性约简的算法,通过上下近似集的判断删除无效的属性,进行规制的输出。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43876001) - -* GSpan算法。gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘,相较于其他的图算法,子图挖掘算法是他们的一个前提或基础算法。gSpan算法用到了DFS编码,和Edge五元组,最右路径子图扩展等概念,算法比较的抽象和复杂。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43924273) +* ### GSpan +gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘,相较于其他的图算法,子图挖掘算法是他们的一个前提或基础算法。gSpan算法用到了DFS编码,和Edge五元组,最右路径子图扩展等概念,算法比较的抽象和复杂。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/43924273) ##Others目录下的算法: -* GA遗传算法。遗传算法运用了生物进化理论的知识来寻找问题最优解的算法,算法的遗传进化过程分选择,交叉和变异操作,其中选择操是非常关键的步骤,把更适应的基于组遗传给下一代。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44041499) +* ### GA +遗传算法。遗传算法运用了生物进化理论的知识来寻找问题最优解的算法,算法的遗传进化过程分选择,交叉和变异操作,其中选择操是非常关键的步骤,把更适应的基于组遗传给下一代。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44041499) -* DbScan基于空间密度聚类算法。dbScan作为一种特殊聚类算法,弥补了其他算法的一些不足,基于空间密,实现聚类效果,可以发现任意形状的聚簇。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44311309) +* ### DbScan +基于空间密度聚类算法。dbScan作为一种特殊聚类算法,弥补了其他算法的一些不足,基于空间密,实现聚类效果,可以发现任意形状的聚簇。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44311309) From ed2b466afd57445a765b87980a781fe614bdeadf Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Fri, 27 Mar 2015 16:51:01 +0800 Subject: [PATCH 10/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit readMe文档格式进行了优化 --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index b8f4732..0d384e7 100644 --- a/README.md +++ b/README.md @@ -65,3 +65,12 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, * ### DbScan 基于空间密度聚类算法。dbScan作为一种特殊聚类算法,弥补了其他算法的一些不足,基于空间密,实现聚类效果,可以发现任意形状的聚簇。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44311309) + +* ### GA_Maze +遗传算法在走迷宫游戏中的应用。将走迷宫中的搜索出口路径的问题转化为遗传算法中的问题通过构造针对此特定问题的适值函数,基因移动方向的定位,巧的进行问题的求解。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44656809) + +* ### CABDDCC +基于连通图的分裂聚类算法。也是属于层次聚类算法主要分为2个阶段,第一阶段构造连通图。第二个阶段是分裂连通图,最终形成聚类结果。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44463997) + +* ### Chameleon +两阶段聚类算法。与CABDDCC算法相反,最后是通过对小簇集合的合并,形成最终的结果,在第一阶段主要是通过K近邻的思想形成小规模的连通图,第二阶段通过RI(相对互连性)和RC(相对近似性)来选一个最佳的簇进行合并。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44569077) From a0e9ca92d5e4c496e8d269f8fad2f11a000260d3 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 29 Mar 2015 21:56:18 +0800 Subject: [PATCH 11/58] =?UTF-8?q?=E9=9A=8F=E6=9C=BA=E6=A3=AE=E6=9E=97?= =?UTF-8?q?=E7=AE=97=E6=B3=95=EF=BC=8C=E7=BB=93=E5=90=88=E4=BA=86=E5=86=B3?= =?UTF-8?q?=E7=AD=96=E6=A0=91+boosting=E7=9A=84=E6=80=9D=E6=83=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 随机森林算法,结合了决策树+boosting的思想 --- Others/DataMining_RandomForest/CARTTool.java | 511 ++++++++++++++++++ Others/DataMining_RandomForest/Client.java | 33 ++ .../DataMining_RandomForest/DecisionTree.java | 165 ++++++ .../RandomForestTool.java | 223 ++++++++ Others/DataMining_RandomForest/TreeNode.java | 85 +++ Others/DataMining_RandomForest/input.txt | 15 + 6 files changed, 1032 insertions(+) create mode 100644 Others/DataMining_RandomForest/CARTTool.java create mode 100644 Others/DataMining_RandomForest/Client.java create mode 100644 Others/DataMining_RandomForest/DecisionTree.java create mode 100644 Others/DataMining_RandomForest/RandomForestTool.java create mode 100644 Others/DataMining_RandomForest/TreeNode.java create mode 100644 Others/DataMining_RandomForest/input.txt diff --git a/Others/DataMining_RandomForest/CARTTool.java b/Others/DataMining_RandomForest/CARTTool.java new file mode 100644 index 0000000..d68aab4 --- /dev/null +++ b/Others/DataMining_RandomForest/CARTTool.java @@ -0,0 +1,511 @@ +package DataMining_RandomForest; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Queue; + +/** + * CARTع㷨 + * + * @author lyq + * + */ +public class CARTTool { + // ŵֵ + private final String YES = "Yes"; + private final String NO = "No"; + + // Ե,dataԴݵ + private int attrNum; + private String filePath; + // ʼԴݣһάַģ± + private String[][] data; + // ݵе + private String[] attrNames; + // ÿԵֵ + private HashMap> attrValue; + + public CARTTool(ArrayList dataArray) { + attrValue = new HashMap<>(); + readData(dataArray); + } + + /** + * ѡȡݽгʼ + * @param dataArray + * Ѿ + */ + public void readData(ArrayList dataArray) { + data = new String[dataArray.size()][]; + dataArray.toArray(data); + attrNum = data[0].length; + attrNames = data[0]; + } + + /** + * ȳʼÿԵֵͣںصļʱ + */ + public void initAttrValue() { + ArrayList tempValues; + + // еķʽ + for (int j = 1; j < attrNum; j++) { + // һе¿ʼѰֵ + tempValues = new ArrayList<>(); + for (int i = 1; i < data.length; i++) { + if (!tempValues.contains(data[i][j])) { + // Եֵûӹ + tempValues.add(data[i][j]); + } + } + + // һԵֵѾϣƵmapԱ + attrValue.put(data[0][j], tempValues); + } + } + + /** + * ָ + * + * @param remainData + * ʣ + * @param attrName + * + * @param value + * ֵ + * @param beLongValue + * Ƿڴֵ + * @return + */ + public double computeGini(String[][] remainData, String attrName, + String value, boolean beLongValue) { + // ʵ + int total = 0; + // ʵ + int posNum = 0; + // ʵ + int negNum = 0; + // ָ + double gini = 0; + + // ǰдұ + for (int j = 1; j < attrNames.length; j++) { + // ҵָ + if (attrName.equals(attrNames[j])) { + for (int i = 1; i < remainData.length; i++) { + // ͳʵںͲֵͽл + if ((beLongValue && remainData[i][j].equals(value)) + || (!beLongValue && !remainData[i][j].equals(value))) { + if (remainData[i][attrNames.length - 1].equals(YES)) { + // жϴǷΪʵ + posNum++; + } else { + negNum++; + } + } + } + } + } + + total = posNum + negNum; + double posProbobly = (double) posNum / total; + double negProbobly = (double) negNum / total; + gini = 1 - posProbobly * posProbobly - negProbobly * negProbobly; + + // ؼָ + return gini; + } + + /** + * ԻֵСָСֵֺСĻָһ + * + * @param remainData + * ʣ˭ + * @param attrName + * + * @return + */ + public String[] computeAttrGini(String[][] remainData, String attrName) { + String[] str = new String[2]; + // ոԵĻֵ + String spiltValue = ""; + // ʱ + int tempNum = 0; + // ԵֵʱСĻָ + double minGini = Integer.MAX_VALUE; + ArrayList valueTypes = attrValue.get(attrName); + // ڴֵʵ + HashMap belongNum = new HashMap<>(); + + for (String string : valueTypes) { + // ¼ʱֹ0 + tempNum = 0; + // дұ + for (int j = 1; j < attrNames.length; j++) { + // ҵָ + if (attrName.equals(attrNames[j])) { + for (int i = 1; i < remainData.length; i++) { + // ͳʵںͲֵͽл + if (remainData[i][j].equals(string)) { + tempNum++; + } + } + } + } + + belongNum.put(string, tempNum); + } + + double tempGini = 0; + double posProbably = 1.0; + double negProbably = 1.0; + for (String string : valueTypes) { + tempGini = 0; + + posProbably = 1.0 * belongNum.get(string) / (remainData.length - 1); + negProbably = 1 - posProbably; + + tempGini += posProbably + * computeGini(remainData, attrName, string, true); + tempGini += negProbably + * computeGini(remainData, attrName, string, false); + + if (tempGini < minGini) { + minGini = tempGini; + spiltValue = string; + } + } + + str[0] = spiltValue; + str[1] = minGini + ""; + + return str; + } + + public void buildDecisionTree(TreeNode node, String parentAttrValue, + String[][] remainData, ArrayList remainAttr, + boolean beLongParentValue) { + // Իֵ + String valueType = ""; + // + String spiltAttrName = ""; + double minGini = Integer.MAX_VALUE; + double tempGini = 0; + // ָ飬˻ָʹ˻ָĻֵ + String[] giniArray; + + if (beLongParentValue) { + node.setParentAttrValue(parentAttrValue); + } else { + node.setParentAttrValue("!" + parentAttrValue); + } + + if (remainAttr.size() == 0) { + if (remainData.length > 1) { + ArrayList indexArray = new ArrayList<>(); + for (int i = 1; i < remainData.length; i++) { + indexArray.add(remainData[i][0]); + } + node.setDataIndex(indexArray); + } + // System.out.println("attr remain null"); + return; + } + + for (String str : remainAttr) { + giniArray = computeAttrGini(remainData, str); + tempGini = Double.parseDouble(giniArray[1]); + + if (tempGini < minGini) { + spiltAttrName = str; + minGini = tempGini; + valueType = giniArray[0]; + } + } + // Ƴ + remainAttr.remove(spiltAttrName); + node.setAttrName(spiltAttrName); + + // ӽڵ,عУÿζԪֳ֣2ӽڵ + TreeNode[] childNode = new TreeNode[2]; + String[][] rData; + + boolean[] bArray = new boolean[] { true, false }; + for (int i = 0; i < bArray.length; i++) { + // ԪֵĻ + rData = removeData(remainData, spiltAttrName, valueType, bArray[i]); + + boolean sameClass = true; + ArrayList indexArray = new ArrayList<>(); + for (int k = 1; k < rData.length; k++) { + indexArray.add(rData[k][0]); + // жǷΪͬһ + if (!rData[k][attrNames.length - 1] + .equals(rData[1][attrNames.length - 1])) { + // ֻҪ1ȣͲͬ͵ + sameClass = false; + break; + } + } + + childNode[i] = new TreeNode(); + if (!sameClass) { + // µĶԣͬû + ArrayList rAttr = new ArrayList<>(); + for (String str : remainAttr) { + rAttr.add(str); + } + buildDecisionTree(childNode[i], valueType, rData, rAttr, + bArray[i]); + } else { + String pAtr = (bArray[i] ? valueType : "!" + valueType); + childNode[i].setParentAttrValue(pAtr); + childNode[i].setDataIndex(indexArray); + } + } + + node.setChildAttrNode(childNode); + } + + /** + * ԻϣݵƳ + * + * @param srcData + * Դ + * @param attrName + * ֵ + * @param valueType + * Եֵ + * @parame beLongValue Ƿڴֵ + */ + private String[][] removeData(String[][] srcData, String attrName, + String valueType, boolean beLongValue) { + String[][] desDataArray; + ArrayList desData = new ArrayList<>(); + // ɾ + ArrayList selectData = new ArrayList<>(); + selectData.add(attrNames); + + // תбУƳ + for (int i = 0; i < srcData.length; i++) { + desData.add(srcData[i]); + } + + // ǴһеIJ + for (int j = 1; j < attrNames.length; j++) { + if (attrNames[j].equals(attrName)) { + for (int i = 1; i < desData.size(); i++) { + if (desData.get(i)[j].equals(valueType)) { + // ƥݣƳ + selectData.add(desData.get(i)); + } + } + } + } + + if (beLongValue) { + desDataArray = new String[selectData.size()][]; + selectData.toArray(desDataArray); + } else { + // вƳ + selectData.remove(attrNames); + // ǻֲڴ͵ʱƳ + desData.removeAll(selectData); + desDataArray = new String[desData.size()][]; + desData.toArray(desDataArray); + } + + return desDataArray; + } + + /** + * عظڵ + * @return + */ + public TreeNode startBuildingTree() { + initAttrValue(); + + ArrayList remainAttr = new ArrayList<>(); + // ԣһ + for (int i = 1; i < attrNames.length - 1; i++) { + remainAttr.add(attrNames[i]); + } + + TreeNode rootNode = new TreeNode(); + buildDecisionTree(rootNode, "", data, remainAttr, false); + setIndexAndAlpah(rootNode, 0, false); + showDecisionTree(rootNode, 1); + + return rootNode; + } + + /** + * ʾ + * + * @param node + * ʾĽڵ + * @param blankNum + * пոʾͽṹ + */ + private void showDecisionTree(TreeNode node, int blankNum) { + System.out.println(); + for (int i = 0; i < blankNum; i++) { + System.out.print(" "); + } + System.out.print("--"); + // ʾֵ + if (node.getParentAttrValue() != null + && node.getParentAttrValue().length() > 0) { + System.out.print(node.getParentAttrValue()); + } else { + System.out.print("--"); + } + System.out.print("--"); + + if (node.getDataIndex() != null && node.getDataIndex().size() > 0) { + String i = node.getDataIndex().get(0); + System.out.print("" + node.getNodeIndex() + ":" + + data[Integer.parseInt(i)][attrNames.length - 1]); + System.out.print("["); + for (String index : node.getDataIndex()) { + System.out.print(index + ", "); + } + System.out.print("]"); + } else { + // ݹʾӽڵ + System.out.print("" + node.getNodeIndex() + ":" + + node.getAttrName() + ""); + if (node.getChildAttrNode() != null) { + for (TreeNode childNode : node.getChildAttrNode()) { + showDecisionTree(childNode, 2 * blankNum); + } + } else { + System.out.print(" Child Null"); + } + } + } + + /** + * Ϊڵкţÿڵʣں֦ + * + * @param node + * ʼʱǸڵ + * @param index + * ʼţ1ʼ + * @param ifCutNode + * ǷҪ֦ + */ + private void setIndexAndAlpah(TreeNode node, int index, boolean ifCutNode) { + TreeNode tempNode; + // С۽ڵ㣬֦Ľڵ + TreeNode minAlphaNode = null; + double minAlpah = Integer.MAX_VALUE; + Queue nodeQueue = new LinkedList(); + + nodeQueue.add(node); + while (nodeQueue.size() > 0) { + index++; + // Ӷͷȡ׸ڵ + tempNode = nodeQueue.poll(); + tempNode.setNodeIndex(index); + if (tempNode.getChildAttrNode() != null) { + for (TreeNode childNode : tempNode.getChildAttrNode()) { + nodeQueue.add(childNode); + } + computeAlpha(tempNode); + if (tempNode.getAlpha() < minAlpah) { + minAlphaNode = tempNode; + minAlpah = tempNode.getAlpha(); + } else if (tempNode.getAlpha() == minAlpah) { + // ֵһȽϰҶӽڵ֦жҶӽڵĽڵ + if (tempNode.getLeafNum() > minAlphaNode.getLeafNum()) { + minAlphaNode = tempNode; + } + } + } + } + + if (ifCutNode) { + // ļ֦ҺӽڵΪnull + minAlphaNode.setChildAttrNode(null); + } + } + + /** + * ΪҶӽڵۣĺ֦õCCP۸Ӷȼ֦ + * + * @param node + * ķҶӽڵ + */ + private void computeAlpha(TreeNode node) { + double rt = 0; + double Rt = 0; + double alpha = 0; + // ǰڵ + int sumNum = 0; + // ٵƫ + int minNum = 0; + + ArrayList dataIndex; + ArrayList leafNodes = new ArrayList<>(); + + addLeafNode(node, leafNodes); + node.setLeafNum(leafNodes.size()); + for (TreeNode attrNode : leafNodes) { + dataIndex = attrNode.getDataIndex(); + + int num = 0; + sumNum += dataIndex.size(); + for (String s : dataIndex) { + // ͳƷеʵ + if (data[Integer.parseInt(s)][attrNames.length - 1].equals(YES)) { + num++; + } + } + minNum += num; + + // ȡСֵ + if (1.0 * num / dataIndex.size() > 0.5) { + num = dataIndex.size() - num; + } + + rt += (1.0 * num / (data.length - 1)); + } + + //ͬȡƫDz + if (1.0 * minNum / sumNum > 0.5) { + minNum = sumNum - minNum; + } + + Rt = 1.0 * minNum / (data.length - 1); + alpha = 1.0 * (Rt - rt) / (leafNodes.size() - 1); + node.setAlpha(alpha); + } + + /** + * ɸѡڵҶӽڵ + * + * @param node + * ɸѡڵ + * @param leafNode + * Ҷӽڵб + */ + private void addLeafNode(TreeNode node, ArrayList leafNode) { + ArrayList dataIndex; + + if (node.getChildAttrNode() != null) { + for (TreeNode childNode : node.getChildAttrNode()) { + dataIndex = childNode.getDataIndex(); + if (dataIndex != null && dataIndex.size() > 0) { + // ˵˽ڵΪҶӽڵ + leafNode.add(childNode); + } else { + // ǷҶӽڵݹ + addLeafNode(childNode, leafNode); + } + } + } + } + +} diff --git a/Others/DataMining_RandomForest/Client.java b/Others/DataMining_RandomForest/Client.java new file mode 100644 index 0000000..6139d3e --- /dev/null +++ b/Others/DataMining_RandomForest/Client.java @@ -0,0 +1,33 @@ +package DataMining_RandomForest; + +import java.text.MessageFormat; + +/** + * ɭ㷨Գ + * + * @author lyq + * + */ +public class Client { + public static void main(String[] args) { + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + String queryStr = "Age=Youth,Income=Low,Student=No,CreditRating=Fair"; + String resultClassType = ""; + // ռռ + double sampleNumRatio = 0.4; + // ݵIJɼռı + double featureNumRatio = 0.5; + + RandomForestTool tool = new RandomForestTool(filePath, sampleNumRatio, + featureNumRatio); + tool.constructRandomTree(); + + resultClassType = tool.judgeClassType(queryStr); + + System.out.println(); + System.out + .println(MessageFormat.format( + "ѯ{0},ԤķΪBuysCompute:{1}", queryStr, + resultClassType)); + } +} diff --git a/Others/DataMining_RandomForest/DecisionTree.java b/Others/DataMining_RandomForest/DecisionTree.java new file mode 100644 index 0000000..119254e --- /dev/null +++ b/Others/DataMining_RandomForest/DecisionTree.java @@ -0,0 +1,165 @@ +package DataMining_RandomForest; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * + * + * @author lyq + * + */ +public class DecisionTree { + // ĸڵ + TreeNode rootNode; + // ݵ + String[] featureNames; + // + ArrayList datas; + // ĵĹ + CARTTool tool; + + public DecisionTree(ArrayList datas) { + this.datas = datas; + this.featureNames = datas.get(0); + + tool = new CARTTool(datas); + // ͨCARTоĹĸڵ + rootNode = tool.startBuildingTree(); + } + + /** + * ݸж + * + * @param features + * @return + */ + public String decideClassType(String features) { + String classType = ""; + // ѯ + String[] queryFeatures; + // ڱжӦIJѯֵ + ArrayList featureStrs; + + featureStrs = new ArrayList<>(); + queryFeatures = features.split(","); + + String[] array; + for (String name : featureNames) { + for (String featureValue : queryFeatures) { + array = featureValue.split("="); + // Ӧֵ뵽б + if (array[0].equals(name)) { + featureStrs.add(array); + } + } + } + + // ʼӸݽڵµݹ + classType = recusiveSearchClassType(rootNode, featureStrs); + + return classType; + } + + /** + * ݹѯԵķ + * + * @param node + * ǰĽڵ + * @param remainFeatures + * ʣδжϵ + * @return + */ + private String recusiveSearchClassType(TreeNode node, + ArrayList remainFeatures) { + String classType = null; + + // ڵݵid˵Ѿൽ + if (node.getDataIndex() != null && node.getDataIndex().size() > 0) { + classType = judgeClassType(node.getDataIndex()); + + return classType; + } + + // ȡʣеһƥΪǰж + String[] currentFeature = null; + for (String[] featureValue : remainFeatures) { + if (node.getAttrName().equals(featureValue[0])) { + currentFeature = featureValue; + break; + } + } + + for (TreeNode childNode : node.getChildAttrNode()) { + // Ѱӽڵڴֵķ֧ + if (childNode.getParentAttrValue().equals(currentFeature[1])) { + remainFeatures.remove(currentFeature); + classType = recusiveSearchClassType(childNode, remainFeatures); + + // ҵ˷ֱѭ + break; + }else{ + //еڶжϼ!ŵ + String value = childNode.getParentAttrValue(); + + if(value.charAt(0) == '!'){ + //ȥһַ + value = value.substring(1, value.length()); + + if(!value.equals(currentFeature[1])){ + remainFeatures.remove(currentFeature); + classType = recusiveSearchClassType(childNode, remainFeatures); + + break; + } + } + } + } + + return classType; + } + + /** + * ݵõзľ + * + * @param dataIndex + * ݷ + * @return + */ + public String judgeClassType(ArrayList dataIndex) { + // ֵ + String resultClassType = ""; + String classType = ""; + int count = 0; + int temp = 0; + Map type2Num = new HashMap(); + + for (String index : dataIndex) { + temp = Integer.parseInt(index); + // ȡһеľ + classType = datas.get(temp)[featureNames.length - 1]; + + if (type2Num.containsKey(classType)) { + // Ѿڣʹ1 + count = type2Num.get(classType); + count++; + } else { + count = 1; + } + + type2Num.put(classType, count); + } + + // ѡּ֧һֵ + count = -1; + for (Map.Entry entry : type2Num.entrySet()) { + if ((int) entry.getValue() > count) { + count = (int) entry.getValue(); + resultClassType = (String) entry.getKey(); + } + } + + return resultClassType; + } +} diff --git a/Others/DataMining_RandomForest/RandomForestTool.java b/Others/DataMining_RandomForest/RandomForestTool.java new file mode 100644 index 0000000..a244cd9 --- /dev/null +++ b/Others/DataMining_RandomForest/RandomForestTool.java @@ -0,0 +1,223 @@ +package DataMining_RandomForest; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +/** + * ɭ㷨 + * + * @author lyq + * + */ +public class RandomForestTool { + // ļַ + private String filePath; + // ռռ + private double sampleNumRatio; + // ݵIJɼռı + private double featureNumRatio; + // IJ + private int sampleNum; + // ݵIJɼ + private int featureNum; + // ɭеľĿ,ܵ/ڹÿݵ + private int treeNum; + // + private Random random; + // + private String[] featureNames; + // ԭʼܵ + private ArrayList totalDatas; + // ɭ + private ArrayList decisionForest; + + public RandomForestTool(String filePath, double sampleNumRatio, + double featureNumRatio) { + this.filePath = filePath; + this.sampleNumRatio = sampleNumRatio; + this.featureNumRatio = featureNumRatio; + + readDataFile(); + } + + /** + * ļжȡ + */ + private void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + totalDatas = dataArray; + featureNames = totalDatas.get(0); + sampleNum = (int) ((totalDatas.size() - 1) * sampleNumRatio); + //ʱҪȥidԺ;ԣԼ + featureNum = (int) ((featureNames.length -2) * featureNumRatio); + // ʱҪȥ + treeNum = (totalDatas.size() - 1) / sampleNum; + } + + /** + * + */ + private DecisionTree produceDecisionTree() { + int temp = 0; + DecisionTree tree; + String[] tempData; + //ݵк + ArrayList sampleRandomNum; + //к + ArrayList featureRandomNum; + ArrayList datas; + + sampleRandomNum = new ArrayList<>(); + featureRandomNum = new ArrayList<>(); + datas = new ArrayList<>(); + + for(int i=0; i 0){ + array[0] = temp + ""; + } + + temp++; + } + + tree = new DecisionTree(datas); + + return tree; + } + + /** + * ɭ + */ + public void constructRandomTree() { + DecisionTree tree; + random = new Random(); + decisionForest = new ArrayList<>(); + + System.out.println("ɭеľ"); + // ɭ + for (int i = 0; i < treeNum; i++) { + System.out.println("\n" + (i+1)); + tree = produceDecisionTree(); + decisionForest.add(tree); + } + } + + /** + * ݸľ + * + * @param features + * ֪ + * @return + */ + public String judgeClassType(String features) { + // ֵ + String resultClassType = ""; + String classType = ""; + int count = 0; + Map type2Num = new HashMap(); + + for (DecisionTree tree : decisionForest) { + classType = tree.decideClassType(features); + if (type2Num.containsKey(classType)) { + // Ѿڣʹ1 + count = type2Num.get(classType); + count++; + } else { + count = 1; + } + + type2Num.put(classType, count); + } + + // ѡּ֧һֵ + count = -1; + for (Map.Entry entry : type2Num.entrySet()) { + if ((int) entry.getValue() > count) { + count = (int) entry.getValue(); + resultClassType = (String) entry.getKey(); + } + } + + return resultClassType; + } +} diff --git a/Others/DataMining_RandomForest/TreeNode.java b/Others/DataMining_RandomForest/TreeNode.java new file mode 100644 index 0000000..b118472 --- /dev/null +++ b/Others/DataMining_RandomForest/TreeNode.java @@ -0,0 +1,85 @@ +package DataMining_RandomForest; + +import java.util.ArrayList; + +/** + * عڵ + * + * @author lyq + * + */ +public class TreeNode { + // ڵ + private String attrName; + // ڵ + private int nodeIndex; + //Ҷӽڵ + private int leafNum; + // ڵ + private double alpha; + // ׷ֵ + private String parentAttrValue; + // ӽڵ + private TreeNode[] childAttrNode; + // ݼ¼ + private ArrayList dataIndex; + + public String getAttrName() { + return attrName; + } + + public void setAttrName(String attrName) { + this.attrName = attrName; + } + + public int getNodeIndex() { + return nodeIndex; + } + + public void setNodeIndex(int nodeIndex) { + this.nodeIndex = nodeIndex; + } + + public double getAlpha() { + return alpha; + } + + public void setAlpha(double alpha) { + this.alpha = alpha; + } + + public String getParentAttrValue() { + return parentAttrValue; + } + + public void setParentAttrValue(String parentAttrValue) { + this.parentAttrValue = parentAttrValue; + } + + public TreeNode[] getChildAttrNode() { + return childAttrNode; + } + + public void setChildAttrNode(TreeNode[] childAttrNode) { + this.childAttrNode = childAttrNode; + } + + public ArrayList getDataIndex() { + return dataIndex; + } + + public void setDataIndex(ArrayList dataIndex) { + this.dataIndex = dataIndex; + } + + public int getLeafNum() { + return leafNum; + } + + public void setLeafNum(int leafNum) { + this.leafNum = leafNum; + } + + + +} diff --git a/Others/DataMining_RandomForest/input.txt b/Others/DataMining_RandomForest/input.txt new file mode 100644 index 0000000..ac50350 --- /dev/null +++ b/Others/DataMining_RandomForest/input.txt @@ -0,0 +1,15 @@ +Rid Age Income Student CreditRating BuysComputer +1 Youth High No Fair No +2 Youth High No Excellent No +3 MiddleAged High No Fair Yes +4 Senior Medium No Fair Yes +5 Senior Low Yes Fair Yes +6 Senior Low Yes Excellent No +7 MiddleAged Low Yes Excellent Yes +8 Youth Medium No Fair No +9 Youth Low Yes Fair Yes +10 Senior Medium Yes Fair Yes +11 Youth Medium Yes Excellent Yes +12 MiddleAged Medium No Excellent Yes +13 MiddleAged High Yes Fair Yes +14 Senior Medium No Excellent No \ No newline at end of file From 3fb0d6fe3e6942890aa0bb1cf8b9ef9ff682ead8 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Sun, 29 Mar 2015 22:08:12 +0800 Subject: [PATCH 12/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加新的算法编辑 --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 0d384e7..be0a1e1 100644 --- a/README.md +++ b/README.md @@ -74,3 +74,6 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, * ### Chameleon 两阶段聚类算法。与CABDDCC算法相反,最后是通过对小簇集合的合并,形成最终的结果,在第一阶段主要是通过K近邻的思想形成小规模的连通图,第二阶段通过RI(相对互连性)和RC(相对近似性)来选一个最佳的簇进行合并。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44569077) + +* ### RandomForest +随机森林算法。算法思想是决策树+boosting. From 5905165f7fc35c7732a7a2d6e0c957098db2dcd5 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Wed, 8 Apr 2015 17:55:59 +0800 Subject: [PATCH 13/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补充对于随机森林算法的算法描述 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index be0a1e1..34b47c6 100644 --- a/README.md +++ b/README.md @@ -76,4 +76,4 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, 两阶段聚类算法。与CABDDCC算法相反,最后是通过对小簇集合的合并,形成最终的结果,在第一阶段主要是通过K近邻的思想形成小规模的连通图,第二阶段通过RI(相对互连性)和RC(相对近似性)来选一个最佳的簇进行合并。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44569077) * ### RandomForest -随机森林算法。算法思想是决策树+boosting. +随机森林算法。算法思想是决策树+boosting.决策树采用的是CART分类回归数,通过组合各个决策树的弱分类器,构成一个最终的强分类器,在构造决策树的时候采取随机数量的样本数和随机的部分属性进行子决策树的构建,避免了过分拟合的现象发生。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44756943) From 7cc4103a437e9f53f734dde6b366034bbc46f003 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:08:32 +0800 Subject: [PATCH 14/58] =?UTF-8?q?=E7=AE=97=E6=B3=95=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=B1=BB=EF=BC=8C=E6=A8=A1=E6=8B=9F=E4=BA=862=E4=B8=AA?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=82=B9=E8=BF=9B=E8=A1=8C=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 算法测试类,模拟了2个测试点进行测试 --- Others/DataMining_KDTree/Client.java | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 Others/DataMining_KDTree/Client.java diff --git a/Others/DataMining_KDTree/Client.java b/Others/DataMining_KDTree/Client.java new file mode 100644 index 0000000..bba7377 --- /dev/null +++ b/Others/DataMining_KDTree/Client.java @@ -0,0 +1,36 @@ +package DataMining_KDTree; + +import java.text.MessageFormat; + +/** + * KD㷨 + * + * @author lyq + * + */ +public class Client { + public static void main(String[] args) { + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + Point queryNode; + Point searchedNode; + KDTreeTool tool = new KDTreeTool(filePath); + + // KDĹ + tool.createKDTree(); + + // ͨKDݵѯ + queryNode = new Point(2.1, 3.1); + searchedNode = tool.searchNearestData(queryNode); + System.out.println(MessageFormat.format( + "ѯ({0}, {1})Ϊ({2}, {3})", queryNode.x, queryNode.y, + searchedNode.x, searchedNode.y)); + + //¹KD,ȥ֮ǰķʼ¼ + tool.createKDTree(); + queryNode = new Point(2, 4.5); + searchedNode = tool.searchNearestData(queryNode); + System.out.println(MessageFormat.format( + "ѯ({0}, {1})Ϊ({2}, {3})", queryNode.x, queryNode.y, + searchedNode.x, searchedNode.y)); + } +} From ab2389ab11650a9de8dbc3c7b0efa8f667d53677 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:08:44 +0800 Subject: [PATCH 15/58] =?UTF-8?q?=E7=AE=97=E6=B3=95=E8=BE=93=E5=85=A5?= =?UTF-8?q?=E7=9A=84=E6=B5=8B=E8=AF=95=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 算法输入的测试数据 --- Others/DataMining_KDTree/input.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Others/DataMining_KDTree/input.txt diff --git a/Others/DataMining_KDTree/input.txt b/Others/DataMining_KDTree/input.txt new file mode 100644 index 0000000..f7d49f3 --- /dev/null +++ b/Others/DataMining_KDTree/input.txt @@ -0,0 +1,6 @@ +4 7 +5 4 +9 6 +7 2 +2 3 +8 1 From 3fc09c6b532ef2ac813752ba6fb0d99bc480ca52 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:08:58 +0800 Subject: [PATCH 16/58] =?UTF-8?q?=E7=AE=97=E6=B3=95=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E5=B0=81=E8=A3=85=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 算法工具封装类 --- Others/DataMining_KDTree/KDTreeTool.java | 386 +++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 Others/DataMining_KDTree/KDTreeTool.java diff --git a/Others/DataMining_KDTree/KDTreeTool.java b/Others/DataMining_KDTree/KDTreeTool.java new file mode 100644 index 0000000..0b5a53c --- /dev/null +++ b/Others/DataMining_KDTree/KDTreeTool.java @@ -0,0 +1,386 @@ +package DataMining_KDTree; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Stack; + +/** + * KD-kάռؼݼ㷨 + * + * @author lyq + * + */ +public class KDTreeTool { + // ռƽķ + public static final int DIRECTION_X = 0; + public static final int DIRECTION_Y = 1; + + // IJļ + private String filePath; + // ԭʼݵ + private ArrayList totalDatas; + // KDڵ + private TreeNode rootNode; + + public KDTreeTool(String filePath) { + this.filePath = filePath; + + readDataFile(); + } + + /** + * ļжȡ + */ + private void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + Point p; + totalDatas = new ArrayList<>(); + for (String[] array : dataArray) { + p = new Point(array[0], array[1]); + totalDatas.add(p); + } + } + + /** + * KD + * + * @return + */ + public TreeNode createKDTree() { + ArrayList copyDatas; + + rootNode = new TreeNode(); + // ݽڵ㿪ʼʱʾĿռʱ޴ + rootNode.range = new Range(); + copyDatas = (ArrayList) totalDatas.clone(); + recusiveConstructNode(rootNode, copyDatas); + + return rootNode; + } + + /** + * ݹKDĹ + * + * @param node + * ǰڹĽڵ + * @param datas + * ýڵӦڴ + * @return + */ + private void recusiveConstructNode(TreeNode node, ArrayList datas) { + int direction = 0; + ArrayList leftSideDatas; + ArrayList rightSideDatas; + Point p; + TreeNode leftNode; + TreeNode rightNode; + Range range; + Range range2; + + // ֵݵ㼯ֻ1ݣٻ + if (datas.size() == 1) { + node.nodeData = datas.get(0); + return; + } + + // ڵǰݵ㼯нзָѡ + direction = selectSplitDrc(datas); + // ݷȡλΪʸ + p = getMiddlePoint(datas, direction); + + node.spilt = direction; + node.nodeData = p; + + leftSideDatas = getLeftSideDatas(datas, p, direction); + datas.removeAll(leftSideDatas); + // Ҫȥ + datas.remove(p); + rightSideDatas = datas; + + if (leftSideDatas.size() > 0) { + leftNode = new TreeNode(); + leftNode.parentNode = node; + range2 = Range.initLeftRange(p, direction); + // ȡڵĿռʸнΧ + range = node.range.crossOperation(range2); + leftNode.range = range; + + node.leftNode = leftNode; + recusiveConstructNode(leftNode, leftSideDatas); + } + + if (rightSideDatas.size() > 0) { + rightNode = new TreeNode(); + rightNode.parentNode = node; + range2 = Range.initRightRange(p, direction); + // ȡڵĿռʸнΧ + range = node.range.crossOperation(range2); + rightNode.range = range; + + node.rightNode = rightNode; + recusiveConstructNode(rightNode, rightSideDatas); + } + } + + /** + * ݵ + * + * @param p + * Ƚ + */ + public Point searchNearestData(Point p) { + // ڵݵľ + TreeNode nearestNode = null; + // ջ¼Ľڵ + Stack stackNodes; + + stackNodes = new Stack<>(); + findedNearestLeafNode(p, rootNode, stackNodes); + + // ȡҶӽڵ㣬Ϊǰҵڵ + nearestNode = stackNodes.pop(); + nearestNode = dfsSearchNodes(stackNodes, p, nearestNode); + + return nearestNode.nodeData; + } + + /** + * ȵķʽIJ + * + * @param stack + * KDڵջ + * @param desPoint + * ݵ + * @param nearestNode + * ǰҵڵ + * @return + */ + private TreeNode dfsSearchNodes(Stack stack, Point desPoint, + TreeNode nearestNode) { + // Ƿڵ߽ + boolean isCollision; + double minDis; + double dis; + TreeNode parentNode; + + // ջڽڵѾȫ + if (stack.isEmpty()) { + return nearestNode; + } + + // ȡڵ + parentNode = stack.pop(); + + minDis = desPoint.ouDistance(nearestNode.nodeData); + dis = desPoint.ouDistance(parentNode.nodeData); + + // 뵱ǰݵĸڵ̣Ľڵи + if (dis < minDis) { + minDis = dis; + nearestNode = parentNode; + } + + // Ĭûײ + isCollision = false; + // жǷ˸ڵĿռָ + if (parentNode.spilt == DIRECTION_X) { + if (parentNode.nodeData.x > desPoint.x - minDis + && parentNode.nodeData.x < desPoint.x + minDis) { + isCollision = true; + } + } else { + if (parentNode.nodeData.y > desPoint.y - minDis + && parentNode.nodeData.y < desPoint.y + minDis) { + isCollision = true; + } + } + + // ߽ˣҴ˽ڵĺӽڵ㻹δȫ꣬Լ + if (isCollision + && (!parentNode.leftNode.isVisited || !parentNode.rightNode.isVisited)) { + TreeNode newNode; + // ½ǰСֲڵջ + Stack otherStack = new Stack<>(); + // parentNode¼Ѱ + findedNearestLeafNode(desPoint, parentNode, otherStack); + newNode = dfsSearchNodes(otherStack, desPoint, otherStack.pop()); + + dis = newNode.nodeData.ouDistance(desPoint); + if (dis < minDis) { + nearestNode = newNode; + } + } + + // ϻ + nearestNode = dfsSearchNodes(stack, desPoint, nearestNode); + + return nearestNode; + } + + /** + * ҵڵҶӽڵ + * + * @param p + * ȽϽڵ + * @param node + * ǰĽڵ + * @param stack + * Ľڵջ + */ + private void findedNearestLeafNode(Point p, TreeNode node, + Stack stack) { + // ָ + int splitDic; + + // Ľڵջ + stack.push(node); + // Ϊʹ + node.isVisited = true; + // ˽ڵûҺӽڵ˵ѾҶӽڵ + if (node.leftNode == null && node.rightNode == null) { + return; + } + + splitDic = node.spilt; + // ѡһϷָΧĽڵݹѰ + if ((splitDic == DIRECTION_X && p.x < node.nodeData.x) + || (splitDic == DIRECTION_Y && p.y < node.nodeData.y)) { + if (!node.leftNode.isVisited) { + findedNearestLeafNode(p, node.leftNode, stack); + } else { + // ӽڵѾʹһ + findedNearestLeafNode(p, node.rightNode, stack); + } + } else if ((splitDic == DIRECTION_X && p.x > node.nodeData.x) + || (splitDic == DIRECTION_Y && p.y > node.nodeData.y)) { + if (!node.rightNode.isVisited) { + findedNearestLeafNode(p, node.rightNode, stack); + } else { + // ҺӽڵѾʹһ + findedNearestLeafNode(p, node.leftNode, stack); + } + } + } + + /** + * ݸݵͨ㷴ѡķָ + * + * @param datas + * ֵļϵ㼯 + * @return + */ + private int selectSplitDrc(ArrayList datas) { + int direction = 0; + double avgX = 0; + double avgY = 0; + double varianceX = 0; + double varianceY = 0; + + for (Point p : datas) { + avgX += p.x; + avgY += p.y; + } + + avgX /= datas.size(); + avgY /= datas.size(); + + for (Point p : datas) { + varianceX += (p.x - avgX) * (p.x - avgX); + varianceY += (p.y - avgY) * (p.y - avgY); + } + + // ķ + varianceX /= datas.size(); + varianceY /= datas.size(); + + // ͨȽϷĴСָѡ񲨶ϴĽл + direction = varianceX > varianceY ? DIRECTION_X : DIRECTION_Y; + + return direction; + } + + /** + * 㷽λѡм + * + * @param datas + * ݵ㼯 + * @param dir + * 귽 + */ + private Point getMiddlePoint(ArrayList datas, int dir) { + int index = 0; + Point middlePoint; + + index = datas.size() / 2; + if (dir == DIRECTION_X) { + Collections.sort(datas, new Comparator() { + + @Override + public int compare(Point o1, Point o2) { + // TODO Auto-generated method stub + return o1.x.compareTo(o2.x); + } + }); + } else { + Collections.sort(datas, new Comparator() { + + @Override + public int compare(Point o1, Point o2) { + // TODO Auto-generated method stub + return o1.y.compareTo(o2.y); + } + }); + } + + // ȡλ + middlePoint = datas.get(index); + + return middlePoint; + } + + /** + * ݷõԭֽڵ㼯ݵ + * + * @param datas + * ԭʼݵ㼯 + * @param nodeData + * ʸ + * @param dir + * ָ + * @return + */ + private ArrayList getLeftSideDatas(ArrayList datas, + Point nodeData, int dir) { + ArrayList leftSideDatas = new ArrayList<>(); + + for (Point p : datas) { + if (dir == DIRECTION_X && p.x < nodeData.x) { + leftSideDatas.add(p); + } else if (dir == DIRECTION_Y && p.y < nodeData.y) { + leftSideDatas.add(p); + } + } + + return leftSideDatas; + } +} From fd07316bae3b283ba1be138da83799c84200ff93 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:09:10 +0800 Subject: [PATCH 17/58] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=9D=90=E6=A0=87?= =?UTF-8?q?=E7=82=B9=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 数据坐标点类 --- Others/DataMining_KDTree/Point.java | 58 +++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Others/DataMining_KDTree/Point.java diff --git a/Others/DataMining_KDTree/Point.java b/Others/DataMining_KDTree/Point.java new file mode 100644 index 0000000..c98a770 --- /dev/null +++ b/Others/DataMining_KDTree/Point.java @@ -0,0 +1,58 @@ +package DataMining_KDTree; + +/** + * + * + * @author lyq + * + */ +public class Point{ + // + Double x; + // + Double y; + + public Point(double x, double y){ + this.x = x; + this.y = y; + } + + public Point(String x, String y) { + this.x = (Double.parseDouble(x)); + this.y = (Double.parseDouble(y)); + } + + /** + * 㵱ǰƶ֮ŷʽ + * + * @param p + * p + * @return + */ + public double ouDistance(Point p) { + double distance = 0; + + distance = (this.x - p.x) * (this.x - p.x) + (this.y - p.y) + * (this.y - p.y); + distance = Math.sqrt(distance); + + return distance; + } + + /** + * ж2ǷΪø + * + * @param p + * Ƚ + * @return + */ + public boolean isTheSame(Point p) { + boolean isSamed = false; + + if (this.x == p.x && this.y == p.y) { + isSamed = true; + } + + return isSamed; + } +} From a3c5fa4544020ac309ee085063470e9a22c5e74f Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:09:32 +0800 Subject: [PATCH 18/58] =?UTF-8?q?KD=E6=A0=91=E8=8A=82=E7=82=B9=E7=A9=BA?= =?UTF-8?q?=E9=97=B4=E7=9F=A2=E9=87=8F=E7=B1=BB=EF=BC=8C=E8=A1=A8=E7=A4=BA?= =?UTF-8?q?=E7=A9=BA=E9=97=B4=E8=8C=83=E5=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KD树节点空间矢量类,表示空间范围 --- Others/DataMining_KDTree/Range.java | 114 ++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 Others/DataMining_KDTree/Range.java diff --git a/Others/DataMining_KDTree/Range.java b/Others/DataMining_KDTree/Range.java new file mode 100644 index 0000000..b36d3d3 --- /dev/null +++ b/Others/DataMining_KDTree/Range.java @@ -0,0 +1,114 @@ +package DataMining_KDTree; + +/** + * ռʸʾĿռ䷶Χ + * + * @author lyq + * + */ +public class Range { + // ߽߽ + double left; + // ߽ұ߽ + double right; + // ߽ϱ߽ + double top; + // ߽±߽ + double bottom; + + public Range() { + this.left = -Integer.MAX_VALUE; + this.right = Integer.MAX_VALUE; + this.top = Integer.MAX_VALUE; + this.bottom = -Integer.MAX_VALUE; + } + + public Range(int left, int right, int top, int bottom) { + this.left = left; + this.right = right; + this.top = top; + this.bottom = bottom; + } + + /** + * ռʸв + * + * @param range + * @return + */ + public Range crossOperation(Range r) { + Range range = new Range(); + + // ȡҲ߽ + if (r.left > this.left) { + range.left = r.left; + } else { + range.left = this.left; + } + + // ȡұ߽ + if (r.right < this.right) { + range.right = r.right; + } else { + range.right = this.right; + } + + // ȡ²ϱ߽ + if (r.top < this.top) { + range.top = r.top; + } else { + range.top = this.top; + } + + // ȡϲ±߽ + if (r.bottom > this.bottom) { + range.bottom = r.bottom; + } else { + range.bottom = this.bottom; + } + + return range; + } + + /** + * ָȷռʸ + * + * @param p + * ʸ + * @param dir + * ָ + * @return + */ + public static Range initLeftRange(Point p, int dir) { + Range range = new Range(); + + if (dir == KDTreeTool.DIRECTION_X) { + range.right = p.x; + } else { + range.bottom = p.y; + } + + return range; + } + + /** + * ָȷҲռʸ + * + * @param p + * ʸ + * @param dir + * ָ + * @return + */ + public static Range initRightRange(Point p, int dir) { + Range range = new Range(); + + if (dir == KDTreeTool.DIRECTION_X) { + range.left = p.x; + } else { + range.top = p.y; + } + + return range; + } +} From 511effd8a2d1c97e7a8a25cc514b18d1a2ea23a0 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:10:10 +0800 Subject: [PATCH 19/58] =?UTF-8?q?KD=E6=A0=91=E8=8A=82=E7=82=B9=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KD树节点类 --- Others/DataMining_KDTree/TreeNode.java | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 Others/DataMining_KDTree/TreeNode.java diff --git a/Others/DataMining_KDTree/TreeNode.java b/Others/DataMining_KDTree/TreeNode.java new file mode 100644 index 0000000..127833c --- /dev/null +++ b/Others/DataMining_KDTree/TreeNode.java @@ -0,0 +1,27 @@ +package DataMining_KDTree; + +/** + * KDڵ + * @author lyq + * + */ +public class TreeNode { + //ʸ + Point nodeData; + //ָƽķָ + int spilt; + //ռʸýڵʾĿռ䷶Χ + Range range; + //ڵ + TreeNode parentNode; + //λڷָƽĺӽڵ + TreeNode leftNode; + //λڷָƽҲĺӽڵ + TreeNode rightNode; + //ڵǷ񱻷ʹ,ڻʱʹ + boolean isVisited; + + public TreeNode(){ + this.isVisited = false; + } +} From 7d50135e80665af831ea9a89f9c9814d99492199 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 9 Apr 2015 22:14:39 +0800 Subject: [PATCH 20/58] =?UTF-8?q?=E6=B7=BB=E5=8A=A0KD=E6=A0=91=E7=9A=84?= =?UTF-8?q?=E7=AE=97=E6=B3=95=E4=BB=8B=E7=BB=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加KD树的算法介绍 --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 0d384e7..472fc9c 100644 --- a/README.md +++ b/README.md @@ -74,3 +74,6 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, * ### Chameleon 两阶段聚类算法。与CABDDCC算法相反,最后是通过对小簇集合的合并,形成最终的结果,在第一阶段主要是通过K近邻的思想形成小规模的连通图,第二阶段通过RI(相对互连性)和RC(相对近似性)来选一个最佳的簇进行合并。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44569077) + +* ### KDTree +K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与查找。 \ No newline at end of file From d7070951458840d9eff43c3a4ce1a211cb2b44e6 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Tue, 14 Apr 2015 22:45:38 +0800 Subject: [PATCH 21/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KD树算法补充介绍 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fac28f0..5e59d53 100644 --- a/README.md +++ b/README.md @@ -79,4 +79,4 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, 随机森林算法。算法思想是决策树+boosting.决策树采用的是CART分类回归数,通过组合各个决策树的弱分类器,构成一个最终的强分类器,在构造决策树的时候采取随机数量的样本数和随机的部分属性进行子决策树的构建,避免了过分拟合的现象发生。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44756943) * ### KDTree -K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与查找。 \ No newline at end of file +K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与查找。主要用于关键信息的搜索,类似于在空间中的二分搜索,大大提高了搜索效率,在寻找目标元素时,使用了DFS深度优先的方式和回溯进行最近点的寻找。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44985259) From 699ffa37c4d830d5a3eb3724a6ebf80bdb3e8875 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 16 Apr 2015 16:06:39 +0800 Subject: [PATCH 22/58] =?UTF-8?q?ms-apriori=E7=AE=97=E6=B3=95=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ms-apriori算法测试类 --- Others/DataMining_MSApriori/Client.java | 45 +++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 Others/DataMining_MSApriori/Client.java diff --git a/Others/DataMining_MSApriori/Client.java b/Others/DataMining_MSApriori/Client.java new file mode 100644 index 0000000..f49e83d --- /dev/null +++ b/Others/DataMining_MSApriori/Client.java @@ -0,0 +1,45 @@ +package DataMining_MSApriori; + +/** + * ڶֶ֧ȵApriori㷨 + * @author lyq + * + */ +public class Client { + public static void main(String[] args){ + //Ƿ + boolean isTransaction; + //ļַ + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + //ϵļַ + String tableFilePath = "C:\\Users\\lyq\\Desktop\\icon\\input2.txt"; + //Сֵֶ֧ + double minSup; + // СŶ + double minConf; + //ֶ֧Ȳֵ + double delta; + //ĿСֶ֧,е±ƷID + double[] mis; + //msApriori㷨 + MSAprioriTool tool; + + //Ϊ˲Եķ㣬ȡһƫ͵Ŷֵ0.3 + minConf = 0.3; + minSup = 0.1; + delta = 0.5; + //ÿֶ֧ʶĬΪ0.1һʹ + mis = new double[]{-1, 0.1, 0.1, 0.1, 0.1, 0.1}; + isTransaction = true; + + isTransaction = true; + tool = new MSAprioriTool(filePath, minConf, delta, mis, isTransaction); + tool.calFItems(); + System.out.println(); + + isTransaction = false; + //³ʼ + tool = new MSAprioriTool(tableFilePath, minConf, minSup, isTransaction); + tool.calFItems(); + } +} From 500767ec16df11d28f5a0b6c8e419668edb6bbe6 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 16 Apr 2015 16:06:56 +0800 Subject: [PATCH 23/58] =?UTF-8?q?Apriori=E7=AE=97=E6=B3=95=E9=A2=91?= =?UTF-8?q?=E7=B9=81=E9=A1=B9=E9=9B=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apriori算法频繁项集类 --- Others/DataMining_MSApriori/FrequentItem.java | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 Others/DataMining_MSApriori/FrequentItem.java diff --git a/Others/DataMining_MSApriori/FrequentItem.java b/Others/DataMining_MSApriori/FrequentItem.java new file mode 100644 index 0000000..2ba88c4 --- /dev/null +++ b/Others/DataMining_MSApriori/FrequentItem.java @@ -0,0 +1,56 @@ +package DataMining_MSApriori; + +/** + * Ƶ + * + * @author lyq + * + */ +public class FrequentItem implements Comparable{ + // ƵļID + private String[] idArray; + // Ƶֶ֧ȼ + private int count; + //Ƶijȣ123 + private int length; + + public FrequentItem(String[] idArray, int count){ + this.idArray = idArray; + this.count = count; + length = idArray.length; + } + + public String[] getIdArray() { + return idArray; + } + + public void setIdArray(String[] idArray) { + this.idArray = idArray; + } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + public int getLength() { + return length; + } + + public void setLength(int length) { + this.length = length; + } + + @Override + public int compareTo(FrequentItem o) { + // TODO Auto-generated method stub + Integer int1 = Integer.parseInt(this.getIdArray()[0]); + Integer int2 = Integer.parseInt(o.getIdArray()[0]); + + return int1.compareTo(int2); + } + +} From 0036384d3c0d1010311013d5e7631ceab3c9df41 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 16 Apr 2015 16:07:12 +0800 Subject: [PATCH 24/58] =?UTF-8?q?=E7=AE=97=E6=B3=95=E4=BA=8B=E5=8A=A1?= =?UTF-8?q?=E5=9E=8B=E6=B5=8B=E8=AF=95=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 算法事务型测试数据 --- Others/DataMining_MSApriori/testInput.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 Others/DataMining_MSApriori/testInput.txt diff --git a/Others/DataMining_MSApriori/testInput.txt b/Others/DataMining_MSApriori/testInput.txt new file mode 100644 index 0000000..9769e26 --- /dev/null +++ b/Others/DataMining_MSApriori/testInput.txt @@ -0,0 +1,9 @@ +T1 1 2 5 +T2 2 4 +T3 2 3 +T4 1 2 4 +T5 1 3 +T6 2 3 +T7 1 3 +T8 1 2 3 5 +T9 1 2 3 \ No newline at end of file From 1a3adc78ef1970dcd75eb9bcbe0627d749db5fa7 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 16 Apr 2015 16:07:36 +0800 Subject: [PATCH 25/58] =?UTF-8?q?=E7=AE=97=E6=B3=95=E5=85=B3=E7=B3=BB?= =?UTF-8?q?=E5=9E=8B=E6=95=B0=E6=8D=AE=E6=B5=8B=E8=AF=95=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 算法关系型数据测试数据 --- Others/DataMining_MSApriori/testInput2.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Others/DataMining_MSApriori/testInput2.txt diff --git a/Others/DataMining_MSApriori/testInput2.txt b/Others/DataMining_MSApriori/testInput2.txt new file mode 100644 index 0000000..ac50350 --- /dev/null +++ b/Others/DataMining_MSApriori/testInput2.txt @@ -0,0 +1,15 @@ +Rid Age Income Student CreditRating BuysComputer +1 Youth High No Fair No +2 Youth High No Excellent No +3 MiddleAged High No Fair Yes +4 Senior Medium No Fair Yes +5 Senior Low Yes Fair Yes +6 Senior Low Yes Excellent No +7 MiddleAged Low Yes Excellent Yes +8 Youth Medium No Fair No +9 Youth Low Yes Fair Yes +10 Senior Medium Yes Fair Yes +11 Youth Medium Yes Excellent Yes +12 MiddleAged Medium No Excellent Yes +13 MiddleAged High Yes Fair Yes +14 Senior Medium No Excellent No \ No newline at end of file From 1388fde922a52752176e6d9eef9ee65bf0f39295 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Thu, 16 Apr 2015 16:08:02 +0800 Subject: [PATCH 26/58] =?UTF-8?q?Apriori=E7=AE=97=E6=B3=95=E7=9A=84?= =?UTF-8?q?=E5=8D=87=E7=BA=A7=E7=AE=97=E6=B3=95=E5=B0=81=E8=A3=85=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apriori算法的升级算法封装类 --- .../DataMining_MSApriori/MSAprioriTool.java | 780 ++++++++++++++++++ 1 file changed, 780 insertions(+) create mode 100644 Others/DataMining_MSApriori/MSAprioriTool.java diff --git a/Others/DataMining_MSApriori/MSAprioriTool.java b/Others/DataMining_MSApriori/MSAprioriTool.java new file mode 100644 index 0000000..ba5d444 --- /dev/null +++ b/Others/DataMining_MSApriori/MSAprioriTool.java @@ -0,0 +1,780 @@ +package DataMining_MSApriori; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import DataMining_Apriori.FrequentItem; + +/** + * ڶֶ֧ȵApriori㷨 + * + * @author lyq + * + */ +public class MSAprioriTool { + // ǰжϵĽֵڹƵ + public static final int PREFIX_NOT_SUB = -1; + public static final int PREFIX_EQUAL = 1; + public static final int PREFIX_IS_SUB = 2; + + // Ƿȡ + private boolean isTransaction; + // Ƶkkֵ + private int initFItemNum; + // ļַ + private String filePath; + // Сֵֶ֧ + private double minSup; + // СŶ + private double minConf; + // ֶ֧Ȳֵ + private double delta; + // ĿСֶ֧,е±ƷID + private double[] mis; + // ÿеƷID + private ArrayList totalGoodsIDs; + // ϵת + private ArrayList transactionDatas; + // мƵб + private ArrayList resultItem; + // мƵID + private ArrayList resultItemID; + // Եֵӳͼ + private HashMap attr2Num; + // idӦԵӳͼ + private HashMap num2Attr; + // Ƶǵidֵ + private Map fItem2Id; + + /** + * ݹھ㷨 + * + * @param filePath + * @param minConf + * @param delta + * @param mis + * @param isTransaction + */ + public MSAprioriTool(String filePath, double minConf, double delta, + double[] mis, boolean isTransaction) { + this.filePath = filePath; + this.minConf = minConf; + this.delta = delta; + this.mis = mis; + this.isTransaction = isTransaction; + this.fItem2Id = new HashMap<>(); + + readDataFile(); + } + + /** + * ͹ھ + * + * @param filePath + * @param minConf + * @param minSup + * @param isTransaction + */ + public MSAprioriTool(String filePath, double minConf, double minSup, + boolean isTransaction) { + this.filePath = filePath; + this.minConf = minConf; + this.minSup = minSup; + this.isTransaction = isTransaction; + this.delta = 1.0; + this.fItem2Id = new HashMap<>(); + + readRDBMSData(filePath); + } + + /** + * ļжȡ + */ + private void readDataFile() { + String[] temp = null; + ArrayList dataArray; + + dataArray = readLine(filePath); + totalGoodsIDs = new ArrayList<>(); + + for (String[] array : dataArray) { + temp = new String[array.length - 1]; + System.arraycopy(array, 1, temp, 0, array.length - 1); + + // IDб + totalGoodsIDs.add(temp); + } + } + + /** + * ļж + * + * @param filePath + * ļַ + * @return + */ + private ArrayList readLine(String filePath) { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + return dataArray; + } + + /** + * Ƶ + */ + public void calFItems() { + FrequentItem fItem; + + computeLink(); + printFItems(); + + if (isTransaction) { + fItem = resultItem.get(resultItem.size() - 1); + // ȡһƵƵ + System.out.println("һƵƵ"); + printAttachRuls(fItem.getIdArray()); + } + } + + /** + * Ƶ + */ + private void printFItems() { + if (isTransaction) { + System.out.println("Ƶ:"); + } else { + System.out.println("(ϵ)Ƶ:"); + } + + // Ƶ + for (int k = 1; k <= initFItemNum; k++) { + System.out.println("Ƶ" + k + ""); + for (FrequentItem i : resultItem) { + if (i.getLength() == k) { + System.out.print("{"); + for (String t : i.getIdArray()) { + if (!isTransaction) { + // ԭǷݣҪ滻 + t = num2Attr.get(Integer.parseInt(t)); + } + + System.out.print(t + ","); + } + System.out.print("},"); + } + } + System.out.println(); + } + } + + /** + *  + */ + private void computeLink() { + // Ӽֹk㵽k-1Ϊֹ + int endNum = 0; + // ǰѾ㵽,ʼʱ1 + int currentNum = 1; + // Ʒ1Ƶӳͼ + HashMap itemMap = new HashMap<>(); + FrequentItem tempItem; + // ʼб + ArrayList list = new ArrayList<>(); + // Ľ + resultItem = new ArrayList<>(); + resultItemID = new ArrayList<>(); + // ƷID + ArrayList idType = new ArrayList<>(); + for (String[] a : totalGoodsIDs) { + for (String s : a) { + if (!idType.contains(s)) { + tempItem = new FrequentItem(new String[] { s }, 1); + idType.add(s); + resultItemID.add(new String[] { s }); + } else { + // ֶ֧ȼ1 + tempItem = itemMap.get(s); + tempItem.setCount(tempItem.getCount() + 1); + } + itemMap.put(s, tempItem); + } + } + // ʼƵת뵽бУԱ + for (Map.Entry entry : itemMap.entrySet()) { + tempItem = entry.getValue(); + + // ж1ƵǷֵֶ֧ + if (judgeFItem(tempItem.getIdArray())) { + list.add(tempItem); + } + } + + // ƷID򣬷Ӽ᲻һ£ + Collections.sort(list); + resultItem.addAll(list); + + String[] array1; + String[] array2; + String[] resultArray; + ArrayList tempIds; + ArrayList resultContainer; + // ܹҪ㵽endNum + endNum = list.size() - 1; + initFItemNum = list.size() - 1; + + while (currentNum < endNum) { + resultContainer = new ArrayList<>(); + for (int i = 0; i < list.size() - 1; i++) { + tempItem = list.get(i); + array1 = tempItem.getIdArray(); + + for (int j = i + 1; j < list.size(); j++) { + tempIds = new ArrayList<>(); + array2 = list.get(j).getIdArray(); + + for (int k = 0; k < array1.length; k++) { + // Ӧλϵֵȵʱֻȡһֵһɾ + if (array1[k].equals(array2[k])) { + tempIds.add(array1[k]); + } else { + tempIds.add(array1[k]); + tempIds.add(array2[k]); + } + } + + resultArray = new String[tempIds.size()]; + tempIds.toArray(resultArray); + + boolean isContain = false; + // ˲ĵID飬ظĺͳȲҪ + if (resultArray.length == (array1.length + 1)) { + isContain = isIDArrayContains(resultContainer, + resultArray); + if (!isContain) { + resultContainer.add(resultArray); + } + } + } + } + + // Ƶļ֦뱣֤µƵҲƵ + list = cutItem(resultContainer); + currentNum++; + } + } + + /** + * Ƶ֦裬뱣֤µƵҲƵ + */ + private ArrayList cutItem(ArrayList resultIds) { + String[] temp; + // ԵλãԴ˹Ӽ + int igNoreIndex = 0; + FrequentItem tempItem; + // ֦µƵ + ArrayList newItem = new ArrayList<>(); + // Ҫid + ArrayList deleteIdArray = new ArrayList<>(); + // ǷҲΪƵ + boolean isContain = true; + + for (String[] array : resultIds) { + // оٳеһжϴƵб + temp = new String[array.length - 1]; + for (igNoreIndex = 0; igNoreIndex < array.length; igNoreIndex++) { + isContain = true; + for (int j = 0, k = 0; j < array.length; j++) { + if (j != igNoreIndex) { + temp[k] = array[j]; + k++; + } + } + + if (!isIDArrayContains(resultItemID, temp)) { + isContain = false; + break; + } + } + + if (!isContain) { + deleteIdArray.add(array); + } + } + + // ƳID + resultIds.removeAll(deleteIdArray); + + // Ƴֶ֧ȼid + int tempCount = 0; + boolean isSatisfied = false; + for (String[] array : resultIds) { + isSatisfied = judgeFItem(array); + + // Ƶֵֶֶ֧֧Ȳ + if (isSatisfied) { + tempItem = new FrequentItem(array, tempCount); + newItem.add(tempItem); + resultItemID.add(array); + resultItem.add(tempItem); + } + } + + return newItem; + } + + /** + * жбǷѾ + * + * @param container + * ID + * @param array + * Ƚ + * @return + */ + private boolean isIDArrayContains(ArrayList container, + String[] array) { + boolean isContain = true; + if (container.size() == 0) { + isContain = false; + return isContain; + } + + for (String[] s : container) { + // ȽϵӺ뱣֤һ + if (s.length != array.length) { + continue; + } + + isContain = true; + for (int i = 0; i < s.length; i++) { + // ֻҪһidȣ㲻 + if (s[i] != array[i]) { + isContain = false; + break; + } + } + + // Ѿжǰʱֱ˳ + if (isContain) { + break; + } + } + + return isContain; + } + + /** + * жһƵǷ + * + * @param frequentItem + * жƵ + * @return + */ + private boolean judgeFItem(String[] frequentItem) { + boolean isSatisfied = true; + int id; + int count; + double tempMinSup; + // Сֵֶ֧ + double minMis = Integer.MAX_VALUE; + // ֵֶ֧ + double maxMis = -Integer.MAX_VALUE; + + // ݣmisжϣͳһͬСֵֶ֧ж + if (isTransaction) { + // ѰƵеСֵֶ֧ + for (int i = 0; i < frequentItem.length; i++) { + id = i + 1; + + if (mis[id] < minMis) { + minMis = mis[id]; + } + + if (mis[id] > maxMis) { + maxMis = mis[id]; + } + } + } else { + minMis = minSup; + maxMis = minSup; + } + + count = calSupportCount(frequentItem); + tempMinSup = 1.0 * count / totalGoodsIDs.size(); + // жƵֵֶ֧Ƿ񳬹Сֵֶ֧ + if (tempMinSup < minMis) { + isSatisfied = false; + } + + // ֶ֧ȲҲ㲻 + if (Math.abs(maxMis - minMis) > delta) { + isSatisfied = false; + } + + return isSatisfied; + } + + /** + * ͳƺѡƵֶ֧Ӽмɨݼ + * + * @param frequentItem + * Ƶ + * @return + */ + private int calSupportCount(String[] frequentItem) { + int count = 0; + int[] ids; + String key; + String[] array; + ArrayList newIds; + + key = ""; + for (int i = 1; i < frequentItem.length; i++) { + key += frequentItem[i]; + } + + newIds = new ArrayList<>(); + // ҳID + ids = fItem2Id.get(key); + + // ûҵidȫɨݼ + if (ids == null || ids.length == 0) { + for (int j = 0; j < totalGoodsIDs.size(); j++) { + array = totalGoodsIDs.get(j); + if (isStrArrayContain(array, frequentItem)) { + count++; + newIds.add(j); + } + } + } else { + for (int index : ids) { + array = totalGoodsIDs.get(index); + if (isStrArrayContain(array, frequentItem)) { + count++; + newIds.add(index); + } + } + } + + ids = new int[count]; + for (int i = 0; i < ids.length; i++) { + ids[i] = newIds.get(i); + } + + key = frequentItem[0] + key; + // ֵͼУ´εļ + fItem2Id.put(key, ids); + + return count; + } + + /** + * ݸƵ + * + * @param frequentItems + * Ƶ + */ + public void printAttachRuls(String[] frequentItem) { + // ǰ, + Map, ArrayList> rules; + // ǰʷ + Map, ArrayList> searchHistory; + ArrayList prefix; + ArrayList suffix; + + rules = new HashMap, ArrayList>(); + searchHistory = new HashMap<>(); + + for (int i = 0; i < frequentItem.length; i++) { + suffix = new ArrayList<>(); + for (int j = 0; j < frequentItem.length; j++) { + suffix.add(frequentItem[j]); + } + prefix = new ArrayList<>(); + + recusiveFindRules(rules, searchHistory, prefix, suffix); + } + + // ҵĹ + for (Map.Entry, ArrayList> entry : rules + .entrySet()) { + prefix = entry.getKey(); + suffix = entry.getValue(); + + printRuleDetail(prefix, suffix); + } + } + + /** + * ǰ + * + * @param prefix + * @param suffix + */ + private void printRuleDetail(ArrayList prefix, + ArrayList suffix) { + // {A}-->{B}˼ΪA·Bĸ + System.out.print("{"); + for (String s : prefix) { + System.out.print(s + ", "); + } + System.out.print("}-->"); + System.out.print("{"); + for (String s : suffix) { + System.out.print(s + ", "); + } + System.out.println("}"); + } + + /** + * ݹչ + * + * @param rules + * + * @param history + * ǰʷ + * @param prefix + * ǰ + * @param suffix + * + */ + private void recusiveFindRules( + Map, ArrayList> rules, + Map, ArrayList> history, + ArrayList prefix, ArrayList suffix) { + int count1; + int count2; + int compareResult; + // ŶȴС + double conf; + String[] temp1; + String[] temp2; + ArrayList copyPrefix; + ArrayList copySuffix; + + // ֻ1 + if (suffix.size() == 1) { + return; + } + + for (String s : suffix) { + count1 = 0; + count2 = 0; + + copyPrefix = (ArrayList) prefix.clone(); + copyPrefix.add(s); + + copySuffix = (ArrayList) suffix.clone(); + // ĺƳӵһ + copySuffix.remove(s); + + compareResult = isSubSetInRules(history, copyPrefix); + if (compareResult == PREFIX_EQUAL) { + // Ѿ + continue; + } + + // жǷΪӼӼ + compareResult = isSubSetInRules(rules, copyPrefix); + if (compareResult == PREFIX_IS_SUB) { + rules.put(copyPrefix, copySuffix); + // 뵽ʷ + history.put(copyPrefix, copySuffix); + recusiveFindRules(rules, history, copyPrefix, copySuffix); + continue; + } + + // ʱϲΪܵļ + copySuffix.addAll(copyPrefix); + temp1 = new String[copyPrefix.size()]; + temp2 = new String[copySuffix.size()]; + copyPrefix.toArray(temp1); + copySuffix.toArray(temp2); + // ֮ٴƳ֮ǰ콣ǰ + copySuffix.removeAll(copyPrefix); + + for (String[] a : totalGoodsIDs) { + if (isStrArrayContain(a, temp1)) { + count1++; + + // group1£ͳgroup2¼ + if (isStrArrayContain(a, temp2)) { + count2++; + } + } + } + + conf = 1.0 * count2 / count1; + if (conf > minConf) { + // ôǰ£ܵ + rules.put(copyPrefix, copySuffix); + } + + // 뵽ʷ + history.put(copyPrefix, copySuffix); + recusiveFindRules(rules, history, copyPrefix, copySuffix); + } + } + + /** + * жϵǰǰǷӼ + * + * @param rules + * ǰѾжϳĹ + * @param prefix + * жϵǰ + * @return + */ + private int isSubSetInRules( + Map, ArrayList> rules, + ArrayList prefix) { + int result = PREFIX_NOT_SUB; + String[] temp1; + String[] temp2; + ArrayList tempPrefix; + + for (Map.Entry, ArrayList> entry : rules + .entrySet()) { + tempPrefix = entry.getKey(); + + temp1 = new String[tempPrefix.size()]; + temp2 = new String[prefix.size()]; + + tempPrefix.toArray(temp1); + prefix.toArray(temp2); + + // жϵǰǰǷѾǴǰӼ + if (isStrArrayContain(temp2, temp1)) { + if (temp2.length == temp1.length) { + result = PREFIX_EQUAL; + } else { + result = PREFIX_IS_SUB; + } + } + + if (result == PREFIX_EQUAL) { + break; + } + } + + return result; + } + + /** + * array2Ƿarray1УҪȫһ + * + * @param array1 + * @param array2 + * @return + */ + private boolean isStrArrayContain(String[] array1, String[] array2) { + boolean isContain = true; + for (String s2 : array2) { + isContain = false; + for (String s1 : array1) { + // ֻҪs2ַarray1Уַarray1 + if (s2.equals(s1)) { + isContain = true; + break; + } + } + + // һֲַarray2鲻array1 + if (!isContain) { + break; + } + } + + return isContain; + } + + /** + * ϵеݣתΪ + * + * @param filePath + */ + private void readRDBMSData(String filePath) { + String str; + // + String[] attrNames = null; + String[] temp; + String[] newRecord; + ArrayList datas = null; + + datas = readLine(filePath); + + // ȡ + attrNames = datas.get(0); + this.transactionDatas = new ArrayList<>(); + + // ȥ + for (int i = 1; i < datas.size(); i++) { + temp = datas.get(i); + + // ˵id + for (int j = 1; j < temp.length; j++) { + str = ""; + // +ֵʽݵظ + str = attrNames[j] + ":" + temp[j]; + temp[j] = str; + } + + newRecord = new String[attrNames.length - 1]; + System.arraycopy(temp, 1, newRecord, 0, attrNames.length - 1); + this.transactionDatas.add(newRecord); + } + + attributeReplace(); + // תtotalGoodsIDͳһ + this.totalGoodsIDs = transactionDatas; + } + + /** + * ֵ滻滻ֵʽԱƵھ + */ + private void attributeReplace() { + int currentValue = 1; + String s; + // ֵӳͼ + attr2Num = new HashMap<>(); + num2Attr = new HashMap<>(); + + // 1еķʽұɨ,кid + for (int j = 0; j < transactionDatas.get(0).length; j++) { + for (int i = 0; i < transactionDatas.size(); i++) { + s = transactionDatas.get(i)[j]; + + if (!attr2Num.containsKey(s)) { + attr2Num.put(s, currentValue); + num2Attr.put(currentValue, s); + + transactionDatas.get(i)[j] = currentValue + ""; + currentValue++; + } else { + transactionDatas.get(i)[j] = attr2Num.get(s) + ""; + } + } + } + } +} From dce1e860abef7bf461b0c8122112131c74367c59 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Thu, 16 Apr 2015 16:16:37 +0800 Subject: [PATCH 27/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补充ms-aprioir算法的介绍 --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 5e59d53..12b3bbd 100644 --- a/README.md +++ b/README.md @@ -80,3 +80,6 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, * ### KDTree K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与查找。主要用于关键信息的搜索,类似于在空间中的二分搜索,大大提高了搜索效率,在寻找目标元素时,使用了DFS深度优先的方式和回溯进行最近点的寻找。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44985259) + +* ### MS-Apriori +基于多支持度的Apriori算法。是Apriori算法的升级算法,弥补了原先Apriori算法的不足,还增加了支持度差别限制以及支持度计数统计方面的优化,无须再次重新扫描整个数据集,产生关联规则的时候可以根据子集的关系避免一些置信度的计算。 From a1bf70cfdaaedd1b817a1eb01b126282508de2a3 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Mon, 27 Apr 2015 22:21:22 +0800 Subject: [PATCH 28/58] =?UTF-8?q?ACO=E8=9A=81=E7=BE=A4=E7=AE=97=E6=B3=95?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACO蚁群算法工具类 --- Others/DataMining_ACO/ACOTool.java | 341 +++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 Others/DataMining_ACO/ACOTool.java diff --git a/Others/DataMining_ACO/ACOTool.java b/Others/DataMining_ACO/ACOTool.java new file mode 100644 index 0000000..b351346 --- /dev/null +++ b/Others/DataMining_ACO/ACOTool.java @@ -0,0 +1,341 @@ +package DataMining_ACO; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +/** + * Ⱥ㷨 + * + * @author lyq + * + */ +public class ACOTool { + // + public static final int INPUT_CITY_NAME = 1; + public static final int INPUT_CITY_DIS = 2; + + // мڽӾ + public static double[][] disMatrix; + // ǰʱ + public static int currentTime; + + // ݵַ + private String filePath; + // + private int antNum; + // Ʋ + private double alpha; + private double beita; + private double p; + private double Q; + // + private Random random; + // Ƽ,Ϊ˷㣬ֱʾ + private ArrayList totalCitys; + // еϼ + private ArrayList totalAnts; + // мϢŨȾʱ + private double[][] pheromoneMatrix; + // Ŀ·,˳ΪӼϵǰŲ + private ArrayList bestPath; + // Ϣؾ洢ͼ,keyõĸʽ(i,j,t)->value + private Map pheromoneTimeMap; + + public ACOTool(String filePath, int antNum, double alpha, double beita, + double p, double Q) { + this.filePath = filePath; + this.antNum = antNum; + this.alpha = alpha; + this.beita = beita; + this.p = p; + this.Q = Q; + this.currentTime = 0; + + readDataFile(); + } + + /** + * ļжȡ + */ + private void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + int flag = -1; + int src = 0; + int des = 0; + int size = 0; + // гͳ + this.totalCitys = new ArrayList<>(); + for (String[] array : dataArray) { + if (array[0].equals("#") && totalCitys.size() == 0) { + flag = INPUT_CITY_NAME; + + continue; + } else if (array[0].equals("#") && totalCitys.size() > 0) { + size = totalCitys.size(); + // ʼ + this.disMatrix = new double[size + 1][size + 1]; + this.pheromoneMatrix = new double[size + 1][size + 1]; + + // ʼֵ-1˶Ӧλֵ + for (int i = 0; i < size; i++) { + for (int j = 0; j < size; j++) { + this.disMatrix[i][j] = -1; + this.pheromoneMatrix[i][j] = -1; + } + } + + flag = INPUT_CITY_DIS; + continue; + } + + if (flag == INPUT_CITY_NAME) { + this.totalCitys.add(array[0]); + } else { + src = Integer.parseInt(array[0]); + des = Integer.parseInt(array[1]); + + this.disMatrix[src][des] = Double.parseDouble(array[2]); + this.disMatrix[des][src] = Double.parseDouble(array[2]); + } + } + } + + /** + * ϳijĸ + * + * @param cityI + * I + * @param cityJ + * J + * @param currentTime + * ǰʱ + * @return + */ + private double calIToJProbably(String cityI, String cityJ, int currentTime) { + double pro = 0; + double n = 0; + double pheromone; + int i; + int j; + + i = Integer.parseInt(cityI); + j = Integer.parseInt(cityJ); + + pheromone = getPheromone(currentTime, cityI, cityJ); + n = 1.0 / disMatrix[i][j]; + + if (pheromone == 0) { + pheromone = 1; + } + + pro = Math.pow(n, alpha) * Math.pow(pheromone, beita); + + return pro; + } + + /** + * ۺϸϴIߵJеĸ + * + * @return + */ + public String selectAntNextCity(Ant ant, int currentTime) { + double randomNum; + double tempPro; + // ָܸ + double proTotal; + String nextCity = null; + ArrayList allowedCitys; + // иʼ + double[] proArray; + + // Ǹոտʼʱû·κγУһ + if (ant.currentPath.size() == 0) { + nextCity = String.valueOf(random.nextInt(totalCitys.size()) + 1); + + return nextCity; + } else if (ant.nonVisitedCitys.isEmpty()) { + // ȫϣٴλص + nextCity = ant.currentPath.get(0); + + return nextCity; + } + + proTotal = 0; + allowedCitys = ant.nonVisitedCitys; + proArray = new double[allowedCitys.size()]; + + for (int i = 0; i < allowedCitys.size(); i++) { + nextCity = allowedCitys.get(i); + proArray[i] = calIToJProbably(ant.currentPos, nextCity, currentTime); + proTotal += proArray[i]; + } + + for (int i = 0; i < allowedCitys.size(); i++) { + // һ + proArray[i] /= proTotal; + } + + // ѡһ + randomNum = random.nextInt(100) + 1; + randomNum = randomNum / 100; + // Ϊ1.0޷жϵģ,ܺͻ޽ӽ1.0ȡΪ0.99ж + if (randomNum == 1) { + randomNum = randomNum - 0.01; + } + + tempPro = 0; + // ȷ + for (int j = 0; j < allowedCitys.size(); j++) { + if (randomNum > tempPro && randomNum <= tempPro + proArray[j]) { + // ÿķʽظ + nextCity = allowedCitys.get(j); + break; + } else { + tempPro += proArray[j]; + } + } + + return nextCity; + } + + /** + * ȡʱϴӳijϢŨ + * + * @param t + * @param cityI + * @param cityJ + * @return + */ + private double getPheromone(int t, String cityI, String cityJ) { + double pheromone = 0; + String key; + + // һ轫ʱ䵹һ + key = MessageFormat.format("{0},{1},{2}", cityI, cityJ, t); + + if (pheromoneTimeMap.containsKey(key)) { + pheromone = pheromoneTimeMap.get(key); + } + + return pheromone; + } + + /** + * ÿֽˢϢŨȾ + * + * @param t + */ + private void refreshPheromone(int t) { + double pheromone = 0; + // һڽϢŨȣϢŨͼв + double lastTimeP = 0; + // ϢŨ + double addPheromone; + String key; + + for (String i : totalCitys) { + for (String j : totalCitys) { + if (!i.equals(j)) { + // һ轫ʱ䵹һ + key = MessageFormat.format("{0},{1},{2}", i, j, t - 1); + + if (pheromoneTimeMap.containsKey(key)) { + lastTimeP = pheromoneTimeMap.get(key); + } else { + lastTimeP = 0; + } + + addPheromone = 0; + for (Ant ant : totalAnts) { + // ÿֻϴϢΪӳԾܳɱ + addPheromone += Q / ant.calSumDistance(); + } + + // ϴεĽֵϵͼ + pheromone = p * lastTimeP + addPheromone; + key = MessageFormat.format("{0},{1},{2}", i, j, t); + pheromoneTimeMap.put(key, pheromone); + } + } + } + + } + + public void antStartSearching() { + // ȺѰҵܴ + int loopCount = 0; + // ѡеһ + String selectedCity = ""; + + pheromoneTimeMap = new HashMap(); + totalAnts = new ArrayList<>(); + random = new Random(); + + while (loopCount < 10) { + initAnts(); + + while (true) { + for (Ant ant : totalAnts) { + selectedCity = selectAntNextCity(ant, currentTime); + ant.goToNextCity(selectedCity); + } + + // ѾгУѭ + if (totalAnts.get(0).isBack()) { + break; + } + } + + // ʱ + currentTime++; + refreshPheromone(currentTime); + } + + // ݾɱѡ̵һ· + Collections.sort(totalAnts); + bestPath = totalAnts.get(0).currentPath; + for (String cityName : bestPath) { + System.out.println(MessageFormat.format("-->{0}", cityName)); + } + } + + /** + * ʼȺ + */ + private void initAnts() { + Ant tempAnt; + ArrayList nonVisitedCitys; + totalAnts.clear(); + + // ʼȺ + for (int i = 0; i < antNum; i++) { + nonVisitedCitys = (ArrayList) totalCitys.clone(); + tempAnt = new Ant(pheromoneMatrix, nonVisitedCitys); + + totalAnts.add(tempAnt); + } + } +} From 44a415f1fbc29a2821e8103ec1b133edc8a65143 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Mon, 27 Apr 2015 22:21:42 +0800 Subject: [PATCH 29/58] =?UTF-8?q?=E8=9A=81=E7=BE=A4=E7=AE=97=E6=B3=95?= =?UTF-8?q?=E8=9A=82=E8=9A=81=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 蚁群算法蚂蚁类 --- Others/DataMining_ACO/Ant.java | 94 +++++++++++++++++++++++++++++++++ Others/DataMining_ACO/input.txt | 12 +++++ 2 files changed, 106 insertions(+) create mode 100644 Others/DataMining_ACO/Ant.java create mode 100644 Others/DataMining_ACO/input.txt diff --git a/Others/DataMining_ACO/Ant.java b/Others/DataMining_ACO/Ant.java new file mode 100644 index 0000000..501ceee --- /dev/null +++ b/Others/DataMining_ACO/Ant.java @@ -0,0 +1,94 @@ +package DataMining_ACO; + +import java.util.ArrayList; + +/** + * ࣬· + * + * @author lyq + * + */ +public class Ant implements Comparable{ + //ϵǰڳ + String currentPos; + // ϱصԭõܾ + Double sumDistance; + // мϢŨȾʱ + double[][] pheromoneMatrix; + // Ѿ߹ijм + ArrayList visitedCitys; + // δ߹ijм + ArrayList nonVisitedCitys; + // ϵǰ߹· + ArrayList currentPath; + + public Ant(double[][] pheromoneMatrix, ArrayList nonVisitedCitys) { + this.pheromoneMatrix = pheromoneMatrix; + this.nonVisitedCitys = nonVisitedCitys; + + this.visitedCitys = new ArrayList<>(); + this.currentPath = new ArrayList<>(); + } + + /** + * ·ܳɱ() + * + * @return + */ + public double calSumDistance() { + sumDistance = 0.0; + String lastCity; + String currentCity; + + for (int i = 0; i < currentPath.size() - 1; i++) { + lastCity = currentPath.get(i); + currentCity = currentPath.get(i + 1); + + // ͨм + sumDistance += ACOTool.disMatrix[Integer.parseInt(lastCity)][Integer + .parseInt(currentCity)]; + } + + return sumDistance; + } + + /** + * ѡǰһ + * @param city + * ѡij + */ + public void goToNextCity(String city){ + this.currentPath.add(city); + this.currentPos = city; + this.nonVisitedCitys.remove(city); + this.visitedCitys.add(city); + } + + /** + * жǷѾ»ص + * @return + */ + public boolean isBack(){ + boolean isBack = false; + String startPos; + String endPos; + + if(currentPath.size() == 0){ + return isBack; + } + + startPos = currentPath.get(0); + endPos = currentPath.get(currentPath.size()-1); + if(currentPath.size() > 1 && startPos.equals(endPos)){ + isBack = true; + } + + return isBack; + } + + @Override + public int compareTo(Ant o) { + // TODO Auto-generated method stub + return this.sumDistance.compareTo(o.sumDistance); + } +} diff --git a/Others/DataMining_ACO/input.txt b/Others/DataMining_ACO/input.txt new file mode 100644 index 0000000..87bed70 --- /dev/null +++ b/Others/DataMining_ACO/input.txt @@ -0,0 +1,12 @@ +# CityName +1 +2 +3 +4 +# Distance +1 2 1 +1 3 1.4 +1 4 1 +2 3 1 +2 4 1 +3 4 1 \ No newline at end of file From 8fe6fee4592988e5655d87b1c8eab974b58f4e91 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Mon, 27 Apr 2015 22:21:59 +0800 Subject: [PATCH 30/58] =?UTF-8?q?=E7=AE=97=E6=B3=95=E5=9C=BA=E6=99=AF?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 算法场景测试类 --- Others/DataMining_ACO/Client.java | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 Others/DataMining_ACO/Client.java diff --git a/Others/DataMining_ACO/Client.java b/Others/DataMining_ACO/Client.java new file mode 100644 index 0000000..c335ec0 --- /dev/null +++ b/Others/DataMining_ACO/Client.java @@ -0,0 +1,29 @@ +package DataMining_ACO; + +/** + * Ⱥ㷨 + * @author lyq + * + */ +public class Client { + public static void main(String[] args){ + // + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + // + int antNum; + //Ʋ + double alpha; + double beita; + double p; + double Q; + + antNum = 3; + alpha = 0.5; + beita = 1; + p = 0.5; + Q = 5; + + ACOTool tool = new ACOTool(filePath, antNum, alpha, beita, p, Q); + tool.antStartSearching(); + } +} From 899186128f5b1c3fb6e55e9f19dbdb33328c8784 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Mon, 27 Apr 2015 22:31:43 +0800 Subject: [PATCH 31/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加Ms-Apriori算法介绍文档 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 12b3bbd..46cf9b9 100644 --- a/README.md +++ b/README.md @@ -82,4 +82,4 @@ gSpan算法属于图挖掘算法领域。,主要用于频繁子图的挖掘, K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与查找。主要用于关键信息的搜索,类似于在空间中的二分搜索,大大提高了搜索效率,在寻找目标元素时,使用了DFS深度优先的方式和回溯进行最近点的寻找。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/44985259) * ### MS-Apriori -基于多支持度的Apriori算法。是Apriori算法的升级算法,弥补了原先Apriori算法的不足,还增加了支持度差别限制以及支持度计数统计方面的优化,无须再次重新扫描整个数据集,产生关联规则的时候可以根据子集的关系避免一些置信度的计算。 +基于多支持度的Apriori算法。是Apriori算法的升级算法,弥补了原先Apriori算法的不足,还增加了支持度差别限制以及支持度计数统计方面的优化,无须再次重新扫描整个数据集,产生关联规则的时候可以根据子集的关系避免一些置信度的计算。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45082337) From 5c08a1d9b57e743a270320d23ac52fd4e12d2578 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Tue, 28 Apr 2015 21:16:17 +0800 Subject: [PATCH 32/58] =?UTF-8?q?ACO=E8=9A=81=E7=BE=A4=E7=AE=97=E6=B3=95bu?= =?UTF-8?q?g=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACO蚁群算法bug修复 --- Others/DataMining_ACO/ACOTool.java | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Others/DataMining_ACO/ACOTool.java b/Others/DataMining_ACO/ACOTool.java index b351346..21b9760 100644 --- a/Others/DataMining_ACO/ACOTool.java +++ b/Others/DataMining_ACO/ACOTool.java @@ -270,8 +270,10 @@ private void refreshPheromone(int t) { addPheromone = 0; for (Ant ant : totalAnts) { - // ÿֻϴϢΪӳԾܳɱ - addPheromone += Q / ant.calSumDistance(); + if(ant.pathContained(i, j)){ + // ÿֻϴϢΪӳԾܳɱ + addPheromone += Q / ant.calSumDistance(); + } } // ϴεĽֵϵͼ @@ -284,9 +286,14 @@ private void refreshPheromone(int t) { } - public void antStartSearching() { + /** + * Ⱥ㷨 + * @param loopCount + * + */ + public void antStartSearching(int loopCount) { // ȺѰҵܴ - int loopCount = 0; + int count = 0; // ѡеһ String selectedCity = ""; @@ -294,7 +301,7 @@ public void antStartSearching() { totalAnts = new ArrayList<>(); random = new Random(); - while (loopCount < 10) { + while (count < loopCount) { initAnts(); while (true) { @@ -312,13 +319,16 @@ public void antStartSearching() { // ʱ currentTime++; refreshPheromone(currentTime); + count++; } // ݾɱѡ̵һ· Collections.sort(totalAnts); bestPath = totalAnts.get(0).currentPath; + System.out.println(MessageFormat.format("{0}ѭյó·", count)); + System.out.print("entrance"); for (String cityName : bestPath) { - System.out.println(MessageFormat.format("-->{0}", cityName)); + System.out.print(MessageFormat.format("-->{0}", cityName)); } } From 9e16a9842f026bb70a6e0282bcbb8be9b4fcd820 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Tue, 28 Apr 2015 21:16:45 +0800 Subject: [PATCH 33/58] =?UTF-8?q?=E8=9A=82=E8=9A=81=E7=B1=BB=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E5=BD=93=E5=89=8D=E8=B7=AF=E5=BE=84=E5=8C=85=E5=90=AB?= =?UTF-8?q?=E5=AD=90=E8=B7=AF=E5=BE=84=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 蚂蚁类添加当前路径包含子路径方法 --- Others/DataMining_ACO/Ant.java | 57 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/Others/DataMining_ACO/Ant.java b/Others/DataMining_ACO/Ant.java index 501ceee..fd89c71 100644 --- a/Others/DataMining_ACO/Ant.java +++ b/Others/DataMining_ACO/Ant.java @@ -8,8 +8,8 @@ * @author lyq * */ -public class Ant implements Comparable{ - //ϵǰڳ +public class Ant implements Comparable { + // ϵǰڳ String currentPos; // ϱصԭõܾ Double sumDistance; @@ -51,41 +51,72 @@ public double calSumDistance() { return sumDistance; } - + /** * ѡǰһ + * * @param city - * ѡij + * ѡij */ - public void goToNextCity(String city){ + public void goToNextCity(String city) { this.currentPath.add(city); this.currentPos = city; this.nonVisitedCitys.remove(city); this.visitedCitys.add(city); } - + /** * жǷѾ»ص + * * @return */ - public boolean isBack(){ + public boolean isBack() { boolean isBack = false; String startPos; String endPos; - - if(currentPath.size() == 0){ + + if (currentPath.size() == 0) { return isBack; } - + startPos = currentPath.get(0); - endPos = currentPath.get(currentPath.size()-1); - if(currentPath.size() > 1 && startPos.equals(endPos)){ + endPos = currentPath.get(currentPath.size() - 1); + if (currentPath.size() > 1 && startPos.equals(endPos)) { isBack = true; } - + return isBack; } + /** + * жڱε߹·Ƿӳij + * + * @param cityI + * I + * @param cityJ + * J + * @return + */ + public boolean pathContained(String cityI, String cityJ) { + String lastCity; + String currentCity; + boolean isContained = false; + + for (int i = 0; i < currentPath.size() - 1; i++) { + lastCity = currentPath.get(i); + currentCity = currentPath.get(i + 1); + + // ijһ·ʼĩλһ£Ϊо˳ + if ((lastCity.equals(cityI) && currentCity.equals(cityJ)) + || (lastCity.equals(cityJ) && currentCity.equals(cityI))) { + isContained = true; + break; + } + } + + return isContained; + } + @Override public int compareTo(Ant o) { // TODO Auto-generated method stub From e0cf53e4928eae1ac307b46a25050878452ab26b Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Tue, 28 Apr 2015 21:17:29 +0800 Subject: [PATCH 34/58] =?UTF-8?q?=E5=9C=BA=E6=99=AF=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=B1=BB=E6=B7=BB=E5=8A=A0=E8=BF=AD=E4=BB=A3=E6=AC=A1=E6=95=B0?= =?UTF-8?q?=E5=8F=98=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 场景测试类添加迭代次数变量 --- Others/DataMining_ACO/Client.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Others/DataMining_ACO/Client.java b/Others/DataMining_ACO/Client.java index c335ec0..0e9ede9 100644 --- a/Others/DataMining_ACO/Client.java +++ b/Others/DataMining_ACO/Client.java @@ -11,6 +11,8 @@ public static void main(String[] args){ String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; // int antNum; + //Ⱥ㷨 + int loopCount; //Ʋ double alpha; double beita; @@ -22,8 +24,9 @@ public static void main(String[] args){ beita = 1; p = 0.5; Q = 5; + loopCount = 5; ACOTool tool = new ACOTool(filePath, antNum, alpha, beita, p, Q); - tool.antStartSearching(); + tool.antStartSearching(loopCount); } } From d5aafc29d9a36826f7c285d11472f063faaa561e Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Tue, 28 Apr 2015 21:32:30 +0800 Subject: [PATCH 35/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACO蚁群算法介绍补充 --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 46cf9b9..afb7794 100644 --- a/README.md +++ b/README.md @@ -83,3 +83,6 @@ K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与 * ### MS-Apriori 基于多支持度的Apriori算法。是Apriori算法的升级算法,弥补了原先Apriori算法的不足,还增加了支持度差别限制以及支持度计数统计方面的优化,无须再次重新扫描整个数据集,产生关联规则的时候可以根据子集的关系避免一些置信度的计算。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45082337) + +* ### ACO +蚁群算法。蚁群算法又称为蚂蚁算法。同GA遗传算法类似,也是运用了大自然规律的算法,用于在图中寻找最优路径的概率型算法。灵感来源于蚂蚁在寻找食物时会散播信息素的发现路径行为。 From 1190fc358aa63bde947a4c669a00855f6fdbbdb1 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Fri, 1 May 2015 09:53:38 +0800 Subject: [PATCH 36/58] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACO蚁群算法介绍补充 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index afb7794..02100cd 100644 --- a/README.md +++ b/README.md @@ -85,4 +85,4 @@ K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与 基于多支持度的Apriori算法。是Apriori算法的升级算法,弥补了原先Apriori算法的不足,还增加了支持度差别限制以及支持度计数统计方面的优化,无须再次重新扫描整个数据集,产生关联规则的时候可以根据子集的关系避免一些置信度的计算。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45082337) * ### ACO -蚁群算法。蚁群算法又称为蚂蚁算法。同GA遗传算法类似,也是运用了大自然规律的算法,用于在图中寻找最优路径的概率型算法。灵感来源于蚂蚁在寻找食物时会散播信息素的发现路径行为。 +蚁群算法。蚁群算法又称为蚂蚁算法。同GA遗传算法类似,也是运用了大自然规律的算法,用于在图中寻找最优路径的概率型算法。灵感来源于蚂蚁在寻找食物时会散播信息素的发现路径行为。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45395491) From 03c577f92919f0b8edad469b294f82b8b3f2f6c8 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 28 Jun 2015 20:33:42 +0800 Subject: [PATCH 37/58] =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E5=85=B3=E8=81=94=E5=B1=9E=E6=80=A7=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=AF=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 贝叶斯网络关联属性数据对 --- Others/DataMining_BayesNetwork/attach.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Others/DataMining_BayesNetwork/attach.txt diff --git a/Others/DataMining_BayesNetwork/attach.txt b/Others/DataMining_BayesNetwork/attach.txt new file mode 100644 index 0000000..bd4bdb6 --- /dev/null +++ b/Others/DataMining_BayesNetwork/attach.txt @@ -0,0 +1,4 @@ +B A +E A +A M +A J \ No newline at end of file From eb24cbd42f09fc3fe57f1cc1638970fa5a96896b Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 28 Jun 2015 20:34:05 +0800 Subject: [PATCH 38/58] =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E7=AE=97=E6=B3=95=E5=B7=A5=E5=85=B7=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 贝叶斯网络算法工具类 --- .../BayesNetWorkTool.java | 326 ++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 Others/DataMining_BayesNetwork/BayesNetWorkTool.java diff --git a/Others/DataMining_BayesNetwork/BayesNetWorkTool.java b/Others/DataMining_BayesNetwork/BayesNetWorkTool.java new file mode 100644 index 0000000..c0bef2e --- /dev/null +++ b/Others/DataMining_BayesNetwork/BayesNetWorkTool.java @@ -0,0 +1,326 @@ +package DataMining_BayesNetwork; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +/** + * Ҷ˹㷨 + * + * @author lyq + * + */ +public class BayesNetWorkTool { + // ϸʷֲļַ + private String dataFilePath; + // ¼ļַ + private String attachFilePath; + // + private int columns; + // ʷֲ + private String[][] totalData; + // ݶ + private ArrayList attachData; + // ڵб + private ArrayList nodes; + // ֮ĶӦϵ + private HashMap attr2Column; + + public BayesNetWorkTool(String dataFilePath, String attachFilePath) { + this.dataFilePath = dataFilePath; + this.attachFilePath = attachFilePath; + + initDatas(); + } + + /** + * ʼݺ͸ʷֲ + */ + private void initDatas() { + String[] columnValues; + String[] array; + ArrayList datas; + ArrayList adatas; + + // ļжȡ + datas = readDataFile(dataFilePath); + adatas = readDataFile(attachFilePath); + + columnValues = datas.get(0).split(" "); + // ȡУֵͼ + this.attr2Column = new HashMap<>(); + for (int i = 0; i < columnValues.length; i++) { + this.attr2Column.put(columnValues[i], i); + } + + this.columns = columnValues.length; + this.totalData = new String[datas.size()][columns]; + for (int i = 0; i < datas.size(); i++) { + this.totalData[i] = datas.get(i).split(" "); + } + + this.attachData = new ArrayList<>(); + // ݶ + for (String str : adatas) { + array = str.split(" "); + this.attachData.add(array); + } + + // 챴Ҷ˹ṹͼ + constructDAG(); + } + + /** + * ļжȡ + */ + private ArrayList readDataFile(String filePath) { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + while ((str = in.readLine()) != null) { + dataArray.add(str); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + return dataArray; + } + + /** + * ݹݹ챴Ҷ˹޻ͼ + */ + private void constructDAG() { + // ڵڱʶ + boolean srcExist; + boolean desExist; + String name1; + String name2; + Node srcNode; + Node desNode; + + this.nodes = new ArrayList<>(); + for (String[] array : this.attachData) { + srcExist = false; + desExist = false; + + name1 = array[0]; + name2 = array[1]; + + // ½ڵ + srcNode = new Node(name1); + desNode = new Node(name2); + + for (Node temp : this.nodes) { + // ҵͬڵ㣬ȡ + if (srcNode.isEqual(temp)) { + srcExist = true; + srcNode = temp; + } else if (desNode.isEqual(temp)) { + desExist = true; + desNode = temp; + } + + // 2ڵ㶼ҵѭ + if (srcExist && desExist) { + break; + } + } + + // 2ڵ + srcNode.connectNode(desNode); + + // ݱʶжǷҪб + if (!srcExist) { + this.nodes.add(srcNode); + } + + if (!desExist) { + this.nodes.add(desNode); + } + } + } + + /** + * ѯ + * + * @param attrValues + * ֵ + * @return + */ + private double queryConditionPro(ArrayList attrValues) { + // жǷֵ + boolean hasPrior; + // жǷֵ + boolean hasBack; + int priorIndex; + int attrIndex; + double backPro; + double totalPro; + double pro; + double currentPro; + // + String[] priorValue; + String[] tempData; + + pro = 0; + totalPro = 0; + backPro = 0; + attrValues.get(0); + priorValue = attrValues.get(0); + // õ + attrValues.remove(0); + + // ȡԵ + priorIndex = this.attr2Column.get(priorValue[0]); + // һе + for (int i = 1; i < this.totalData.length; i++) { + tempData = this.totalData[i]; + + hasPrior = false; + hasBack = true; + + // ǰеĸ + currentPro = Double.parseDouble(tempData[this.columns - 1]); + // жǷ + if (tempData[priorIndex].equals(priorValue[1])) { + hasPrior = true; + } + + for (String[] array : attrValues) { + attrIndex = this.attr2Column.get(array[0]); + + // жֵǷ + if (!tempData[attrIndex].equals(array[1])) { + hasBack = false; + break; + } + } + + // мͳƣֱԵֵͬʱĸ + if (hasBack) { + backPro += currentPro; + if (hasPrior) { + totalPro += currentPro; + } + } else if (hasPrior && attrValues.size() == 0) { + // ֻΪʵļ + totalPro += currentPro; + backPro = 1.0; + } + } + + // ܵĸ=/ֻʱ + pro = totalPro / backPro; + + return pro; + } + + /** + * ݱҶ˹ + * + * @param queryStr + * ѯ + * @return + */ + public double calProByNetWork(String queryStr) { + double temp; + double pro; + String[] array; + // ֵ + String[] preValue; + // ֵ + String[] backValue; + // ͺֵֵĻ + ArrayList attrValues; + + // жǷṹ + if (!satisfiedNewWork(queryStr)) { + return -1; + } + + pro = 1; + // ѯķֽ + array = queryStr.split(","); + + // ʵijֵڵһ¼ + attrValues = new ArrayList<>(); + attrValues.add(array[0].split("=")); + pro = queryConditionPro(attrValues); + + for (int i = 0; i < array.length - 1; i++) { + attrValues.clear(); + + // ±Сǰں + backValue = array[i].split("="); + preValue = array[i + 1].split("="); + attrValues.add(preValue); + attrValues.add(backValue); + + // ĸֵ + temp = queryConditionPro(attrValues); + // л + pro *= temp; + } + + return pro; + } + + /** + * ֤¼IJѯϵǷ㱴Ҷ˹ + * + * @param queryStr + * ѯַ + * @return + */ + private boolean satisfiedNewWork(String queryStr) { + String attrName; + String[] array; + boolean isExist; + boolean isSatisfied; + // ǰڵ + Node currentNode; + // ѡڵб + ArrayList nodeList; + + isSatisfied = true; + currentNode = null; + // ѯַķֽ + array = queryStr.split(","); + nodeList = this.nodes; + + for (String s : array) { + // ʼʱĬԶӦĽڵ㲻 + isExist = false; + // õ¼ + attrName = s.split("=")[0]; + + for (Node n : nodeList) { + if (n.name.equals(attrName)) { + isExist = true; + + currentNode = n; + // һֵĺѡڵΪǰڵĺӽڵ + nodeList = currentNode.childNodes; + + break; + } + } + + // δҵĽڵ㣬˵ṹѭ + if (!isExist) { + isSatisfied = false; + break; + } + } + + return isSatisfied; + } +} From 2aa4cfb81e68a5a1cd224f6d246c56a86b746afd Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 28 Jun 2015 20:34:20 +0800 Subject: [PATCH 39/58] =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E7=AE=97=E6=B3=95=E6=B5=8B=E8=AF=95=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 贝叶斯网络算法测试类 --- Others/DataMining_BayesNetwork/Client.java | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Others/DataMining_BayesNetwork/Client.java diff --git a/Others/DataMining_BayesNetwork/Client.java b/Others/DataMining_BayesNetwork/Client.java new file mode 100644 index 0000000..98706c4 --- /dev/null +++ b/Others/DataMining_BayesNetwork/Client.java @@ -0,0 +1,32 @@ +package DataMining_BayesNetwork; + +import java.text.MessageFormat; + +/** + * Ҷ˹糡 + * + * @author lyq + * + */ +public class Client { + public static void main(String[] args) { + String dataFilePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + String attachFilePath = "C:\\Users\\lyq\\Desktop\\icon\\attach.txt"; + // ѯ + String queryStr; + // + double result; + + // ѯ¼ǵˣˣ½ӵMaryĵ绰 + queryStr = "E=y,A=y,M=y"; + BayesNetWorkTool tool = new BayesNetWorkTool(dataFilePath, + attachFilePath); + result = tool.calProByNetWork(queryStr); + + if (result == -1) { + System.out.println("¼㱴Ҷ˹Ľṹ޷"); + } else { + System.out.println(String.format("¼%sĸΪ%s", queryStr, result)); + } + } +} From 0822040b0d3bf21c6e7a0439cef6720be35650a6 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 28 Jun 2015 20:34:37 +0800 Subject: [PATCH 40/58] =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E5=9B=BE=E7=9A=84=E8=8A=82=E7=82=B9=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 贝叶斯网络图的节点类 --- Others/DataMining_BayesNetwork/Node.java | 58 ++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Others/DataMining_BayesNetwork/Node.java diff --git a/Others/DataMining_BayesNetwork/Node.java b/Others/DataMining_BayesNetwork/Node.java new file mode 100644 index 0000000..bb2a07d --- /dev/null +++ b/Others/DataMining_BayesNetwork/Node.java @@ -0,0 +1,58 @@ +package DataMining_BayesNetwork; + +import java.util.ArrayList; + +/** + * Ҷ˹ڵ + * + * @author lyq + * + */ +public class Node { + // ڵ + String name; + // ڵĸ׽ڵ㣬Ҳνڵ㣬ܶ + ArrayList parentNodes; + // ڵӽڵ㣬Ҳνڵ㣬ܶ + ArrayList childNodes; + + public Node(String name) { + this.name = name; + + // ʼ + this.parentNodes = new ArrayList<>(); + this.childNodes = new ArrayList<>(); + } + + /** + * ڵӵĿĽڵ + * + * @param node + * νڵ + */ + public void connectNode(Node node) { + // νڵڵĺӽڵ + this.childNodes.add(node); + // ڵ뵽νڵĸڵ + node.parentNodes.add(this); + } + + /** + * жĿڵǷͬҪȽǷͬ + * + * @param node + * Ŀ + * @return + */ + public boolean isEqual(Node node) { + boolean isEqual; + + isEqual = false; + // ڵͬΪ + if (this.name.equals(node.name)) { + isEqual = true; + } + + return isEqual; + } +} From 760a6747de219b2815379d8e900ce9765f9bc595 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 28 Jun 2015 20:34:55 +0800 Subject: [PATCH 41/58] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 测试数据集 --- Others/DataMining_BayesNetwork/input.txt | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Others/DataMining_BayesNetwork/input.txt diff --git a/Others/DataMining_BayesNetwork/input.txt b/Others/DataMining_BayesNetwork/input.txt new file mode 100644 index 0000000..ed01889 --- /dev/null +++ b/Others/DataMining_BayesNetwork/input.txt @@ -0,0 +1,33 @@ +B E A M J P +y y y y y 0.00012 +y y y y n 0.000051 +y y y n y 0.000013 +y y y n n 0.0000057 +y y n y y 0.000000005 +y y n y n 0.00000049 +y y n n y 0.000000095 +y y n n n 0.0000094 +y n y y y 0.0058 +y n y y n 0.0025 +y n y n y 0.00065 +y n y n n 0.00028 +y n n y y 0.00000029 +y n n y n 0.000029 +y n n n y 0.0000056 +y n n n n 0.00055 +n y y y y 0.0036 +n y y y n 0.0016 +n y y n y 0.0004 +n y y n n 0.00017 +n y n y y 0.000007 +n y n y n 0.00069 +n y n n y 0.00013 +n y n n n 0.013 +n n y y y 0.00061 +n n y y n 0.00026 +n n y n y 0.000068 +n n y n n 0.000029 +n n n y y 0.00048 +n n n y n 0.048 +n n n n y 0.0092 +n n n n n 0.91 \ No newline at end of file From bf1d07783481ba29c5eda3135e15f649add6654a Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 28 Jun 2015 20:41:33 +0800 Subject: [PATCH 42/58] =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E7=AE=97=E6=B3=95=E5=A2=9E=E5=8A=A0=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 贝叶斯网络算法增加测试数据集说明 --- Others/DataMining_BayesNetwork/BayesNetWorkTool.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Others/DataMining_BayesNetwork/BayesNetWorkTool.java b/Others/DataMining_BayesNetwork/BayesNetWorkTool.java index c0bef2e..cbf99ae 100644 --- a/Others/DataMining_BayesNetwork/BayesNetWorkTool.java +++ b/Others/DataMining_BayesNetwork/BayesNetWorkTool.java @@ -50,9 +50,11 @@ private void initDatas() { adatas = readDataFile(attachFilePath); columnValues = datas.get(0).split(" "); - // ȡУֵͼ + // Ըƴ¼B()E()A().M(ӵMĵ绰)JͬM˼, + // ֵy,nyesno this.attr2Column = new HashMap<>(); for (int i = 0; i < columnValues.length; i++) { + // ȡУֵͼ this.attr2Column.put(columnValues[i], i); } From 4b27b21f591e9a5844c343aa71b7e2099ce0249c Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Sun, 28 Jun 2015 20:51:01 +0800 Subject: [PATCH 43/58] =?UTF-8?q?=E6=96=87=E6=A1=A3=E4=B8=AD=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91=E7=BB=9C=E7=AE=97?= =?UTF-8?q?=E6=B3=95=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 文档中更新贝叶斯网络算法说明 --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 02100cd..d97ca46 100644 --- a/README.md +++ b/README.md @@ -86,3 +86,6 @@ K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与 * ### ACO 蚁群算法。蚁群算法又称为蚂蚁算法。同GA遗传算法类似,也是运用了大自然规律的算法,用于在图中寻找最优路径的概率型算法。灵感来源于蚂蚁在寻找食物时会散播信息素的发现路径行为。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45395491) + +* ### BayesNetwork +贝叶斯网络算法。利用了贝叶斯网络的DAG有向无环图,允许各个事件保留一定的依赖关系,从而能求出更加精准的概率。 From d9bb91b4bdfefb2f845520d81e1a961438129640 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Sun, 28 Jun 2015 21:19:36 +0800 Subject: [PATCH 44/58] =?UTF-8?q?=E5=A2=9E=E5=8A=A018=E5=A4=A7=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E6=8C=96=E6=8E=98=E7=AE=97=E6=B3=95=E7=9B=AE=E5=BD=95?= =?UTF-8?q?=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加18大数据挖掘算法目录表 --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index d97ca46..4c0f849 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,27 @@ # 数据挖掘算法 +## 算法目录 +包名 | 目录名 | 算法名 | +-----| ------ |--------| +AssociationAnalysis | DataMining_Apriori | Apriori-关联规则挖掘算法 +AssociationAnalysis | DataMining_FPTree | FPTree-频繁模式树算法 +BaggingAndBoosting | DataMining_AdaBoost | AdaBoost-装袋提升算法 +Classification | DataMining_CART | CART-分类回归树算法 +Classification | DataMining_ID3 | ID3-决策树分类算法 +Classification | DataMining_KNN | KNN-k最近邻算法工具类 +Classification | DataMining_NaiveBayes | NaiveBayes-朴素贝叶斯算法 +Clustering | DataMining_BIRCH | BIRCH-层次聚类算法 +Clustering | DataMining_KMeans | KMeans-K均值算法 +GraphMining | DataMining_GSpan | GSpan-频繁子图挖掘算法 +IntegratedMining | DataMining_CBA | CBA-基于关联规则的分类算法 +LinkMining | DataMining_HITS | HITS-链接分析算法 +LinkMining | DataMining_PageRank | PageRank-网页重要性/排名算法 +RoughSets | DataMining_RoughSets | RoughSets-粗糙集属性约简算法 +SequentialPatterns | DataMining_GSP | GSP-序列模式分析算法 +SequentialPatterns | DataMining_PrefixSpan | PrefixSpan-序列模式分析算法 +StatisticalLearning | DataMining_EM | EM-期望最大化算法 +StatisticalLearning | DataMining_SVM | SVM-支持向量机算法 + ##18大经典DM算法 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 目前追加了其他的一些经典的DM算法,在others的包中涉及聚类,分类,图算法,搜索算等等,没有具体分类。 From ceba5c25ae75249db80e8cd6c7b9201f1a52d422 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Sun, 28 Jun 2015 21:31:53 +0800 Subject: [PATCH 45/58] =?UTF-8?q?=E6=B7=BB=E5=8A=A0others=E7=9B=AE?= =?UTF-8?q?=E5=BD=95=E4=B8=8B=E7=9A=84=E7=AE=97=E6=B3=95=E7=9B=AE=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加others目录下的算法目录 --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 4c0f849..d38a80d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # 数据挖掘算法 ## 算法目录 +#### 18大DM算法 包名 | 目录名 | 算法名 | -----| ------ |--------| AssociationAnalysis | DataMining_Apriori | Apriori-关联规则挖掘算法 @@ -21,6 +22,19 @@ SequentialPatterns | DataMining_GSP | GSP-序列模式分析算法 SequentialPatterns | DataMining_PrefixSpan | PrefixSpan-序列模式分析算法 StatisticalLearning | DataMining_EM | EM-期望最大化算法 StatisticalLearning | DataMining_SVM | SVM-支持向量机算法 +#### 其他经典DM算法 +包名 | 目录名 | 算法名 | +-----| ------ |--------| +Others | DataMining_ACO | ACO-蚁群算法 +Others | DataMining_BayesNetwork | BayesNetwork-贝叶斯网络算法 +Others | DataMining_CABDDCC | CABDDCC-基于连通图的分裂聚类算法 +Others | DataMining_Chameleon | Chameleon-两阶段合并聚类算法 +Others | DataMining_DBSCAN | DBSCAN-基于密度的聚类算法 +Others | DataMining_GA | GA-遗传算法 +Others | DataMining_GA_Maze | GA_Maze-遗传算法在走迷宫游戏中的应用算法 +Others | DataMining_KDTree | KDTree-k维空间关键数据检索算法工具类 +Others | DataMining_MSApriori | MSApriori-基于多支持度的Apriori算法 +Others | DataMining_RandomForest | RandomForest-随机森林算法 ##18大经典DM算法 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 From 3fcfe465dd2d3b030cf7baf650b72214286b0222 Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Mon, 29 Jun 2015 19:31:32 +0800 Subject: [PATCH 46/58] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=AF=B4=E6=98=8E?= =?UTF-8?q?=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新说明文档 --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d38a80d..64801c5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Others | DataMining_KDTree | KDTree-k维空间关键数据检索算法工具类 Others | DataMining_MSApriori | MSApriori-基于多支持度的Apriori算法 Others | DataMining_RandomForest | RandomForest-随机森林算法 -##18大经典DM算法 +## 18大经典DM算法 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 目前追加了其他的一些经典的DM算法,在others的包中涉及聚类,分类,图算法,搜索算等等,没有具体分类。 @@ -124,4 +124,10 @@ K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与 蚁群算法。蚁群算法又称为蚂蚁算法。同GA遗传算法类似,也是运用了大自然规律的算法,用于在图中寻找最优路径的概率型算法。灵感来源于蚂蚁在寻找食物时会散播信息素的发现路径行为。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45395491) * ### BayesNetwork -贝叶斯网络算法。利用了贝叶斯网络的DAG有向无环图,允许各个事件保留一定的依赖关系,从而能求出更加精准的概率。 +贝叶斯网络算法。弥补了朴素贝叶斯算法中必须要事件独立性的缺点,利用了贝叶斯网络的DAG有向无环图,允许各个事件保留一定的依赖关系,从而能得到精准的分类效果。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/46683729) + +## 算法使用方法 +在每个算法中给出了3大类型,主算法程序,调用程序,输入数据,调用方法如下: +* 将需要数据的测试数据转化成与给定的输入格式相同 +* 然后以Client类的测试程序调用方式进行使用。 +* 也可以自行修改算法程序,来适用于自己的使用场景 From c6719605b4224cba55ff53e63f2d97eea73a21bf Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 5 Jul 2015 14:51:59 +0800 Subject: [PATCH 47/58] =?UTF-8?q?=E6=A0=91=E5=9E=8B=E6=9C=B4=E7=B4=A0?= =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=AE=97=E6=B3=95=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 树型朴素贝叶斯算法测试数据集 --- Others/DataMining_TAN/input.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Others/DataMining_TAN/input.txt diff --git a/Others/DataMining_TAN/input.txt b/Others/DataMining_TAN/input.txt new file mode 100644 index 0000000..aea7074 --- /dev/null +++ b/Others/DataMining_TAN/input.txt @@ -0,0 +1,15 @@ +OutLook Temperature Humidity Wind PlayTennis +Sunny Hot High Weak No +Sunny Hot High Strong No +Overcast Hot High Weak Yes +Rainy Mild High Weak Yes +Rainy Cool Normal Weak Yes +Rainy Cool Normal Strong No +Overcast Cool Normal Strong Yes +Sunny Mild High Weak No +Sunny Cool Normal Weak Yes +Rainy Mild Normal Weak Yes +Sunny Mild Normal Strong Yes +Overcast Mild High Strong Yes +Overcast Hot Normal Weak Yes +Rainy Mild High Strong No \ No newline at end of file From 1eb1d5c48343ae9bf122beff81fc65a071ea5221 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 5 Jul 2015 14:52:23 +0800 Subject: [PATCH 48/58] =?UTF-8?q?=E6=A0=91=E5=9E=8B=E6=9C=B4=E7=B4=A0?= =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=AE=97=E6=B3=95=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 树型朴素贝叶斯算法测试类 --- Others/DataMining_TAN/Client.java | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 Others/DataMining_TAN/Client.java diff --git a/Others/DataMining_TAN/Client.java b/Others/DataMining_TAN/Client.java new file mode 100644 index 0000000..bd104bc --- /dev/null +++ b/Others/DataMining_TAN/Client.java @@ -0,0 +1,36 @@ +package DataMining_TAN; + +/** + * TANرҶ˹㷨 + * + * @author lyq + * + */ +public class Client { + public static void main(String[] args) { + String filePath = "C:\\Users\\lyq\\Desktop\\icon\\input.txt"; + // ѯ + String queryStr; + // 1 + double classResult1; + // 2 + double classResult2; + + TANTool tool = new TANTool(filePath); + queryStr = "OutLook=Sunny,Temperature=Hot,Humidity=High,Wind=Weak,PlayTennis=No"; + classResult1 = tool.calHappenedPro(queryStr); + + queryStr = "OutLook=Sunny,Temperature=Hot,Humidity=High,Wind=Weak,PlayTennis=Yes"; + classResult2 = tool.calHappenedPro(queryStr); + + System.out.println(String.format("Ϊ%sõĸΪ%s", "PlayTennis=No", + classResult1)); + System.out.println(String.format("Ϊ%sõĸΪ%s", "PlayTennis=Yes", + classResult2)); + if (classResult1 > classResult2) { + System.out.println("ΪPlayTennis=No"); + } else { + System.out.println("ΪPlayTennis=Yes"); + } + } +} From a4899019ba66224aa6a87d00035e4af05effa129 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 5 Jul 2015 14:52:44 +0800 Subject: [PATCH 49/58] =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E8=8A=82=E7=82=B9=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 贝叶斯网络节点类 --- Others/DataMining_TAN/Node.java | 63 +++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Others/DataMining_TAN/Node.java diff --git a/Others/DataMining_TAN/Node.java b/Others/DataMining_TAN/Node.java new file mode 100644 index 0000000..f3a3b51 --- /dev/null +++ b/Others/DataMining_TAN/Node.java @@ -0,0 +1,63 @@ +package DataMining_TAN; + +import java.util.ArrayList; + +/** + * Ҷ˹ڵ + * + * @author lyq + * + */ +public class Node { + //ڵΨһidڵӷȷ + int id; + // ڵ + String name; + // ýڵĽڵ + ArrayList connectedNodes; + + public Node(int id, String name) { + this.id = id; + this.name = name; + + // ʼ + this.connectedNodes = new ArrayList<>(); + } + + /** + * ڵӵĿĽڵ + * + * @param node + * νڵ + */ + public void connectNode(Node node) { + // + if(this.id == node.id){ + return; + } + + // ڵڵĽڵб + this.connectedNodes.add(node); + // ڵ뵽Ŀڵб + node.connectedNodes.add(this); + } + + /** + * жĿڵǷͬҪȽǷͬ + * + * @param node + * Ŀ + * @return + */ + public boolean isEqual(Node node) { + boolean isEqual; + + isEqual = false; + // ڵͬΪ + if (this.id == node.id) { + isEqual = true; + } + + return isEqual; + } +} From 744395415c608884513010d486cb0bd391a0c00a Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 5 Jul 2015 14:53:00 +0800 Subject: [PATCH 50/58] =?UTF-8?q?=E5=B1=9E=E6=80=A7=E8=8A=82=E7=82=B9?= =?UTF-8?q?=E4=BA=92=E4=BF=A1=E6=81=AF=E5=80=BC=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 属性节点互信息值类 --- Others/DataMining_TAN/AttrMutualInfo.java | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 Others/DataMining_TAN/AttrMutualInfo.java diff --git a/Others/DataMining_TAN/AttrMutualInfo.java b/Others/DataMining_TAN/AttrMutualInfo.java new file mode 100644 index 0000000..6caf12d --- /dev/null +++ b/Others/DataMining_TAN/AttrMutualInfo.java @@ -0,0 +1,28 @@ +package DataMining_TAN; + +/** + * ֮ĻϢֵʾ֮ĹԴС + * @author lyq + * + */ +public class AttrMutualInfo implements Comparable{ + //Ϣֵ + Double value; + //ֵ + Node[] nodeArray; + + public AttrMutualInfo(double value, Node node1, Node node2){ + this.value = value; + + this.nodeArray = new Node[2]; + this.nodeArray[0] = node1; + this.nodeArray[1] = node2; + } + + @Override + public int compareTo(AttrMutualInfo o) { + // TODO Auto-generated method stub + return o.value.compareTo(this.value); + } + +} From 242ee0fa25b821429927b6e799f5ff799e6ebbac Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 5 Jul 2015 14:53:26 +0800 Subject: [PATCH 51/58] =?UTF-8?q?=E6=A0=91=E5=9E=8B=E6=9C=B4=E7=B4=A0?= =?UTF-8?q?=E8=B4=9D=E5=8F=B6=E6=96=AF=E7=AE=97=E6=B3=95=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 树型朴素贝叶斯算法工具类 --- Others/DataMining_TAN/TANTool.java | 571 +++++++++++++++++++++++++++++ 1 file changed, 571 insertions(+) create mode 100644 Others/DataMining_TAN/TANTool.java diff --git a/Others/DataMining_TAN/TANTool.java b/Others/DataMining_TAN/TANTool.java new file mode 100644 index 0000000..56e90a6 --- /dev/null +++ b/Others/DataMining_TAN/TANTool.java @@ -0,0 +1,571 @@ +package DataMining_TAN; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; + +/** + * TANرҶ˹㷨 + * + * @author lyq + * + */ +public class TANTool { + // ݼַ + private String filePath; + // ݼ,һ + private int attrNum; + // + private String classAttrName; + // + private String[] attrNames; + // Ҷ˹ߵķڵֵΪڵid,i->j + private int[][] edges; + // ±ӳ + private HashMap attr2Column; + // ԣԶȡֵӳ + private HashMap> attr2Values; + // Ҷ˹ܽڵб + private ArrayList totalNodes; + // ܵIJ + private ArrayList totalDatas; + + public TANTool(String filePath) { + this.filePath = filePath; + + readDataFile(); + } + + /** + * ļжȡ + */ + private void readDataFile() { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] array; + + while ((str = in.readLine()) != null) { + array = str.split(" "); + dataArray.add(array); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + this.totalDatas = dataArray; + this.attrNames = this.totalDatas.get(0); + this.attrNum = this.attrNames.length; + this.classAttrName = this.attrNames[attrNum - 1]; + + Node node; + this.edges = new int[attrNum][attrNum]; + this.totalNodes = new ArrayList<>(); + this.attr2Column = new HashMap<>(); + this.attr2Values = new HashMap<>(); + + // ԽڵidСΪ0 + node = new Node(0, attrNames[attrNum - 1]); + this.totalNodes.add(node); + for (int i = 0; i < attrNames.length; i++) { + if (i < attrNum - 1) { + // Ҷ˹ڵ㣬ÿһڵ + node = new Node(i + 1, attrNames[i]); + this.totalNodes.add(node); + } + + // Ե±ӳ + this.attr2Column.put(attrNames[i], i); + } + + String[] temp; + ArrayList values; + // ֵԵӳƥ + for (int i = 1; i < this.totalDatas.size(); i++) { + temp = this.totalDatas.get(i); + + for (int j = 0; j < temp.length; j++) { + // жmapǷ + if (this.attr2Values.containsKey(attrNames[j])) { + values = this.attr2Values.get(attrNames[j]); + } else { + values = new ArrayList<>(); + } + + if (!values.contains(temp[j])) { + // µֵ + values.add(temp[j]); + } + + this.attr2Values.put(attrNames[j], values); + } + } + } + + /** + * ϢȶԹȨؿ,صһڵΪڵ + * + * @param iArray + */ + private Node constructWeightTree(ArrayList iArray) { + Node node1; + Node node2; + Node root; + ArrayList existNodes; + + existNodes = new ArrayList<>(); + + for (Node[] i : iArray) { + node1 = i[0]; + node2 = i[1]; + + // 2ڵ + node1.connectNode(node2); + // ֻ· + addIfNotExist(node1, existNodes); + addIfNotExist(node2, existNodes); + + if (existNodes.size() == attrNum - 1) { + break; + } + } + + // صһΪڵ + root = existNodes.get(0); + return root; + } + + /** + * Ϊͽṹȷߵķ򣬷ΪԸڵ㷽ָԽڵ㷽 + * + * @param root + * ǰĽڵ + */ + private void confirmGraphDirection(Node currentNode) { + int i; + int j; + ArrayList connectedNodes; + + connectedNodes = currentNode.connectedNodes; + + i = currentNode.id; + for (Node n : connectedNodes) { + j = n.id; + + // жӴ2ڵķǷȷ + if (edges[i][j] == 0 && edges[j][i] == 0) { + // ûȷƶΪi->j + edges[i][j] = 1; + + // ݹ + confirmGraphDirection(n); + } + } + } + + /** + * ΪԽڵӷԽڵΪڵ + * + * @param parentNode + * ڵ + * @param nodeList + * ӽڵб + */ + private void addParentNode() { + // Խڵ + Node parentNode; + + parentNode = null; + for (Node n : this.totalNodes) { + if (n.id == 0) { + parentNode = n; + break; + } + } + + for (Node child : this.totalNodes) { + parentNode.connectNode(child); + + if (child.id != 0) { + // ȷӷ + this.edges[0][child.id] = 1; + } + } + } + + /** + * ڽڵ㼯ӽڵ + * + * @param node + * ӽڵ + * @param existNodes + * ѴڵĽڵб + * @return + */ + public boolean addIfNotExist(Node node, ArrayList existNodes) { + boolean canAdd; + + canAdd = true; + for (Node n : existNodes) { + // ڵбѾнڵ㣬ʧ + if (n.isEqual(node)) { + canAdd = false; + break; + } + } + + if (canAdd) { + existNodes.add(node); + } + + return canAdd; + } + + /** + * ڵ + * + * @param node + * nodeĺ + * @param queryParam + * ѯԲ + * @return + */ + private double calConditionPro(Node node, HashMap queryParam) { + int id; + double pro; + String value; + String[] attrValue; + + ArrayList priorAttrInfos; + ArrayList backAttrInfos; + ArrayList parentNodes; + + pro = 1; + id = node.id; + parentNodes = new ArrayList<>(); + priorAttrInfos = new ArrayList<>(); + backAttrInfos = new ArrayList<>(); + + for (int i = 0; i < this.edges.length; i++) { + // ѰҸڵid + if (this.edges[i][id] == 1) { + for (Node temp : this.totalNodes) { + // ѰĿڵid + if (temp.id == i) { + parentNodes.add(temp); + break; + } + } + } + } + + // ȡԵֵ, + value = queryParam.get(node.name); + attrValue = new String[2]; + attrValue[0] = node.name; + attrValue[1] = value; + priorAttrInfos.add(attrValue); + + // һӺ + for (Node p : parentNodes) { + value = queryParam.get(p.name); + attrValue = new String[2]; + attrValue[0] = p.name; + attrValue[1] = value; + + backAttrInfos.add(attrValue); + } + + pro = queryConditionPro(priorAttrInfos, backAttrInfos); + + return pro; + } + + /** + * ѯ + * + * @param attrValues + * ֵ + * @return + */ + private double queryConditionPro(ArrayList priorValues, + ArrayList backValues) { + // жǷֵ + boolean hasPrior; + // жǷֵ + boolean hasBack; + int attrIndex; + double backPro; + double totalPro; + double pro; + String[] tempData; + + pro = 0; + totalPro = 0; + backPro = 0; + + // һе + for (int i = 1; i < this.totalDatas.size(); i++) { + tempData = this.totalDatas.get(i); + + hasPrior = true; + hasBack = true; + + // жǷ + for (String[] array : priorValues) { + attrIndex = this.attr2Column.get(array[0]); + + // жֵǷ + if (!tempData[attrIndex].equals(array[1])) { + hasPrior = false; + break; + } + } + + // жǷ + for (String[] array : backValues) { + attrIndex = this.attr2Column.get(array[0]); + + // жֵǷ + if (!tempData[attrIndex].equals(array[1])) { + hasBack = false; + break; + } + } + + // мͳƣֱԵֵͬʱĸ + if (hasBack) { + backPro++; + if (hasPrior) { + totalPro++; + } + } else if (hasPrior && backValues.size() == 0) { + // ֻΪʵļ + totalPro++; + backPro = 1.0; + } + } + + if (backPro == 0) { + pro = 0; + } else { + // ܵĸ=/ֻʱ + pro = totalPro / backPro; + } + + return pro; + } + + /** + * ѯ㷢 + * + * @param queryParam + * + * @return + */ + public double calHappenedPro(String queryParam) { + double result; + double temp; + // ֵ + String classAttrValue; + String[] array; + String[] array2; + HashMap params; + + result = 1; + params = new HashMap<>(); + + // вѯַIJֽ + array = queryParam.split(","); + for (String s : array) { + array2 = s.split("="); + params.put(array2[0], array2[1]); + } + + classAttrValue = params.get(classAttrName); + // Ҷ˹ṹ + constructBayesNetWork(classAttrValue); + + for (Node n : this.totalNodes) { + temp = calConditionPro(n, params); + + // Ϊ˱Ϊ0󣬽΢ + if (temp == 0) { + temp = 0.001; + } + + // ϸʹʽг˻ + result *= temp; + } + + return result; + } + + /** + * ͱҶ˹ṹ + * + * @param value + * ֵ + */ + private void constructBayesNetWork(String value) { + Node rootNode; + ArrayList mInfoArray; + // Ϣȶ + ArrayList iArray; + + iArray = null; + rootNode = null; + + // ÿ¹Ҷ˹ṹʱԭеӽṹ + for (Node n : this.totalNodes) { + n.connectedNodes.clear(); + } + this.edges = new int[attrNum][attrNum]; + + // ӻϢȡֵ + iArray = new ArrayList<>(); + mInfoArray = calAttrMutualInfoArray(value); + for (AttrMutualInfo v : mInfoArray) { + iArray.add(v.nodeArray); + } + + // Ȩؿ + rootNode = constructWeightTree(iArray); + // Ϊͼȷߵķ + confirmGraphDirection(rootNode); + // ΪÿԽڵӷԸڵ + addParentNode(); + } + + /** + * ֵ֮ĻϢֵ + * + * @param value + * ֵ + * @return + */ + private ArrayList calAttrMutualInfoArray(String value) { + double iValue; + Node node1; + Node node2; + AttrMutualInfo mInfo; + ArrayList mInfoArray; + + mInfoArray = new ArrayList<>(); + + for (int i = 0; i < this.totalNodes.size() - 1; i++) { + node1 = this.totalNodes.get(i); + // Խڵ + if (node1.id == 0) { + continue; + } + + for (int j = i + 1; j < this.totalNodes.size(); j++) { + node2 = this.totalNodes.get(j); + // Խڵ + if (node2.id == 0) { + continue; + } + + // 2Խڵ֮ĻϢֵ + iValue = calMutualInfoValue(node1, node2, value); + mInfo = new AttrMutualInfo(iValue, node1, node2); + mInfoArray.add(mInfo); + } + } + + // нУûϢֵߵڹ + Collections.sort(mInfoArray); + + return mInfoArray; + } + + /** + * 2ԽڵĻϢֵ + * + * @param node1 + * ڵ1 + * @param node2 + * ڵ2 + * @param vlaue + * ֵ + */ + private double calMutualInfoValue(Node node1, Node node2, String value) { + double iValue; + double temp; + // ֲͬĺ + double pXiXj; + double pXi; + double pXj; + String[] array1; + String[] array2; + ArrayList attrValues1; + ArrayList attrValues2; + ArrayList priorValues; + // ʣֵ + ArrayList backValues; + + array1 = new String[2]; + array2 = new String[2]; + priorValues = new ArrayList<>(); + backValues = new ArrayList<>(); + + iValue = 0; + array1[0] = classAttrName; + array1[1] = value; + // Զ + backValues.add(array1); + + // ȡڵԵֵ + attrValues1 = this.attr2Values.get(node1.name); + attrValues2 = this.attr2Values.get(node2.name); + + for (String v1 : attrValues1) { + for (String v2 : attrValues2) { + priorValues.clear(); + + array1 = new String[2]; + array1[0] = node1.name; + array1[1] = v1; + priorValues.add(array1); + + array2 = new String[2]; + array2[0] = node2.name; + array2[1] = v2; + priorValues.add(array2); + + // 3µĸ + pXiXj = queryConditionPro(priorValues, backValues); + + priorValues.clear(); + priorValues.add(array1); + pXi = queryConditionPro(priorValues, backValues); + + priorValues.clear(); + priorValues.add(array2); + pXj = queryConditionPro(priorValues, backValues); + + // һΪ0ֱӸֵΪ0 + if (pXiXj == 0 || pXi == 0 || pXj == 0) { + temp = 0; + } else { + // ùʽԴֵϵĸ + temp = pXiXj * Math.log(pXiXj / (pXi * pXj)) / Math.log(2); + } + + // кֵϵۼӼΪԵĻϢֵ + iValue += temp; + } + } + + return iValue; + } +} From 13f6c78111595688c107e269e05d7301d778782d Mon Sep 17 00:00:00 2001 From: lyq <675250079@qq.com> Date: Sun, 5 Jul 2015 15:03:39 +0800 Subject: [PATCH 52/58] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=AF=B4=E6=98=8E?= =?UTF-8?q?=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新说明文档 --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 64801c5..2e87d47 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Others | DataMining_GA_Maze | GA_Maze-遗传算法在走迷宫游戏中的应用 Others | DataMining_KDTree | KDTree-k维空间关键数据检索算法工具类 Others | DataMining_MSApriori | MSApriori-基于多支持度的Apriori算法 Others | DataMining_RandomForest | RandomForest-随机森林算法 +Others | DataMining_TAN |TAN-树型朴素贝叶斯算法 ## 18大经典DM算法 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 @@ -124,7 +125,10 @@ K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与 蚁群算法。蚁群算法又称为蚂蚁算法。同GA遗传算法类似,也是运用了大自然规律的算法,用于在图中寻找最优路径的概率型算法。灵感来源于蚂蚁在寻找食物时会散播信息素的发现路径行为。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/45395491) * ### BayesNetwork -贝叶斯网络算法。弥补了朴素贝叶斯算法中必须要事件独立性的缺点,利用了贝叶斯网络的DAG有向无环图,允许各个事件保留一定的依赖关系,从而能得到精准的分类效果。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/46683729) +贝叶斯网络算法。弥补了朴素贝叶斯算法中必须要事件独立性的缺点,利用了贝叶斯网络的DAG有向无环图,允许各个事件保留一定的依赖关系,网络结构中的每个节点代表一种属性,边代表相应的条件概率值,通过计算从而能得到精准的分类效果。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/46683729) + +* ### TAN +树型朴素贝叶斯算法。此算法又被称为加强版朴素贝叶斯算法。在满足原有朴素贝叶斯条件的基础上,他允许部条件属性直接的关联性。形成树型的结构。 ## 算法使用方法 在每个算法中给出了3大类型,主算法程序,调用程序,输入数据,调用方法如下: From 20302fd5aa232c454811b81f211457ae7553dbdc Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 2 Aug 2015 14:07:21 +0800 Subject: [PATCH 53/58] =?UTF-8?q?=E7=BB=B4=E7=89=B9=E6=AF=94=E7=AE=97?= =?UTF-8?q?=E6=B3=95=E5=AE=8F=E5=AE=9A=E4=B9=89=E5=8F=98=E9=87=8F=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 维特比算法宏定义变量类 --- Others/DataMining_Viterbi/BaseNames.java | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 Others/DataMining_Viterbi/BaseNames.java diff --git a/Others/DataMining_Viterbi/BaseNames.java b/Others/DataMining_Viterbi/BaseNames.java new file mode 100644 index 0000000..cca0aaa --- /dev/null +++ b/Others/DataMining_Viterbi/BaseNames.java @@ -0,0 +1,24 @@ +package DataMining_Viterbi; + +/** + * + * @author lyq + * + */ +public class BaseNames { + //± + public static final int DAY1 = 0; + public static final int DAY2 = 1; + public static final int DAY3 = 2; + + // + public static final int WEATHER_SUNNY = 0; + public static final int WEATHER_CLOUDY = 1; + public static final int WEATHER_RAINY = 2; + + //ʪ + public static final int HUMIDITY_DRY = 0; + public static final int HUMIDITY_DRYISH = 1; + public static final int HUMIDITY_DAMP = 1; + public static final int HUMIDITY_SOGGY = 1; +} From 2cef388cf1a0cd5c7e0d3610278c437bd4def155 Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 2 Aug 2015 14:07:40 +0800 Subject: [PATCH 54/58] =?UTF-8?q?=E7=BB=B4=E7=89=B9=E6=AF=94=E7=AE=97?= =?UTF-8?q?=E6=B3=95=E5=9C=BA=E6=99=AF=E6=B5=8B=E8=AF=95=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 维特比算法场景测试类 --- Others/DataMining_Viterbi/Client.java | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 Others/DataMining_Viterbi/Client.java diff --git a/Others/DataMining_Viterbi/Client.java b/Others/DataMining_Viterbi/Client.java new file mode 100644 index 0000000..577eabd --- /dev/null +++ b/Others/DataMining_Viterbi/Client.java @@ -0,0 +1,31 @@ +package DataMining_Viterbi; + +/** + * άر㷨 + * + * @author lyq + * + */ +public class Client { + public static void main(String[] args) { + // ״̬תƸʾ· + String stmFilePath; + // · + String cfFilePath; + // ۲쵽״̬ + String[] observeStates; + // ʼ״̬ + double[] initStatePro; + ViterbiTool tool; + + stmFilePath = "C:\\Users\\lyq\\Desktop\\icon\\stmatrix.txt"; + cfFilePath = "C:\\Users\\lyq\\Desktop\\icon\\humidity-matrix.txt"; + + initStatePro = new double[] { 0.63, 0.17, 0.20 }; + observeStates = new String[] { "Dry", "Damp", "Soggy" }; + + tool = new ViterbiTool(stmFilePath, cfFilePath, initStatePro, + observeStates); + tool.calHMMObserve(); + } +} From d8b5c81fbdaedac4dfcca174b35b525fd87a1e3f Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 2 Aug 2015 14:07:55 +0800 Subject: [PATCH 55/58] =?UTF-8?q?=E7=BB=B4=E7=89=B9=E6=AF=94=E7=AE=97?= =?UTF-8?q?=E6=B3=95=E5=B7=A5=E5=85=B7=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 维特比算法工具类 --- Others/DataMining_Viterbi/ViterbiTool.java | 240 +++++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 Others/DataMining_Viterbi/ViterbiTool.java diff --git a/Others/DataMining_Viterbi/ViterbiTool.java b/Others/DataMining_Viterbi/ViterbiTool.java new file mode 100644 index 0000000..6f1ade6 --- /dev/null +++ b/Others/DataMining_Viterbi/ViterbiTool.java @@ -0,0 +1,240 @@ +package DataMining_Viterbi; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +/** + * άر㷨 + * + * @author lyq + * + */ +public class ViterbiTool { + // ״̬תƸʾļַ + private String stmFilePath; + // ļַ + private String confusionFilePath; + // ʼ״̬ + private double[] initStatePro; + // ۲쵽״̬ + public String[] observeStates; + // ״̬תƾֵ + private double[][] stMatrix; + // ֵ + private double[][] confusionMatrix; + // µDZֵ + private double[][] potentialValues; + // DZ + private ArrayList potentialAttrs; + // ֵӳͼ + private HashMap name2Index; + // ֵӳͼ + private HashMap index2name; + + public ViterbiTool(String stmFilePath, String confusionFilePath, + double[] initStatePro, String[] observeStates) { + this.stmFilePath = stmFilePath; + this.confusionFilePath = confusionFilePath; + this.initStatePro = initStatePro; + this.observeStates = observeStates; + + initOperation(); + } + + /** + * ʼݲ + */ + private void initOperation() { + double[] temp; + int index; + ArrayList smtDatas; + ArrayList cfDatas; + + smtDatas = readDataFile(stmFilePath); + cfDatas = readDataFile(confusionFilePath); + + index = 0; + this.stMatrix = new double[smtDatas.size()][]; + for (String[] array : smtDatas) { + temp = new double[array.length]; + for (int i = 0; i < array.length; i++) { + try { + temp[i] = Double.parseDouble(array[i]); + } catch (NumberFormatException e) { + temp[i] = -1; + } + } + + // תֵ + this.stMatrix[index] = temp; + index++; + } + + index = 0; + this.confusionMatrix = new double[cfDatas.size()][]; + for (String[] array : cfDatas) { + temp = new double[array.length]; + for (int i = 0; i < array.length; i++) { + try { + temp[i] = Double.parseDouble(array[i]); + } catch (NumberFormatException e) { + temp[i] = -1; + } + } + + // תֵ + this.confusionMatrix[index] = temp; + index++; + } + + this.potentialAttrs = new ArrayList<>(); + // DZ + for (String s : smtDatas.get(0)) { + this.potentialAttrs.add(s); + } + // ȥЧ + potentialAttrs.remove(0); + + this.name2Index = new HashMap<>(); + this.index2name = new HashMap<>(); + + // ±ӳϵ + for (int i = 1; i < smtDatas.get(0).length; i++) { + this.name2Index.put(smtDatas.get(0)[i], i); + // ±굽Ƶӳ + this.index2name.put(i, smtDatas.get(0)[i]); + } + + for (int i = 1; i < cfDatas.get(0).length; i++) { + this.name2Index.put(cfDatas.get(0)[i], i); + } + } + + /** + * ļжȡ + */ + private ArrayList readDataFile(String filePath) { + File file = new File(filePath); + ArrayList dataArray = new ArrayList(); + + try { + BufferedReader in = new BufferedReader(new FileReader(file)); + String str; + String[] tempArray; + while ((str = in.readLine()) != null) { + tempArray = str.split(" "); + dataArray.add(tempArray); + } + in.close(); + } catch (IOException e) { + e.getStackTrace(); + } + + return dataArray; + } + + /** + * ݹ۲صʾ + */ + private void calPotencialProMatrix() { + String curObserveState; + // ۲DZ± + int osIndex; + int psIndex; + double temp; + double maxPro; + // ֵӰظ + double confusionPro; + + this.potentialValues = new double[observeStates.length][potentialAttrs + .size() + 1]; + for (int i = 0; i < this.observeStates.length; i++) { + curObserveState = this.observeStates[i]; + osIndex = this.name2Index.get(curObserveState); + maxPro = -1; + + // Ϊǵһ۲ûǰӰ죬ݳʼ״̬ + if (i == 0) { + for (String attr : this.potentialAttrs) { + psIndex = this.name2Index.get(attr); + confusionPro = this.confusionMatrix[psIndex][osIndex]; + + temp = this.initStatePro[psIndex - 1] * confusionPro; + this.potentialValues[BaseNames.DAY1][psIndex] = temp; + } + } else { + // DZǰһӰ죬ԼǰĻӰ + for (String toDayAttr : this.potentialAttrs) { + psIndex = this.name2Index.get(toDayAttr); + confusionPro = this.confusionMatrix[psIndex][osIndex]; + + int index; + maxPro = -1; + // ͨĸʼ + for (String yAttr : this.potentialAttrs) { + index = this.name2Index.get(yAttr); + temp = this.potentialValues[i - 1][index] + * this.stMatrix[index][psIndex]; + + // õDZ + if (temp > maxPro) { + maxPro = temp; + } + } + + this.potentialValues[i][psIndex] = maxPro * confusionPro; + } + } + } + } + + /** + * ͬʱֵDZֵ + */ + private void outputResultAttr() { + double maxPro; + int maxIndex; + ArrayList psValues; + + psValues = new ArrayList<>(); + for (int i = 0; i < this.potentialValues.length; i++) { + maxPro = -1; + maxIndex = 0; + + for (int j = 0; j < potentialValues[i].length; j++) { + if (this.potentialValues[i][j] > maxPro) { + maxPro = potentialValues[i][j]; + maxIndex = j; + } + } + + // ȡ±ӦDZ + psValues.add(this.index2name.get(maxIndex)); + } + + System.out.println("۲Ϊ"); + for (String s : this.observeStates) { + System.out.print(s + ", "); + } + System.out.println(); + + System.out.println("DZΪ"); + for (String s : psValues) { + System.out.print(s + ", "); + } + System.out.println(); + } + + /** + * ݹ۲ԣõDZϢ + */ + public void calHMMObserve() { + calPotencialProMatrix(); + outputResultAttr(); + } +} From a90e091230525a67577d9f271e9b33fb8f6aa0ba Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 2 Aug 2015 14:08:09 +0800 Subject: [PATCH 56/58] =?UTF-8?q?=E8=BD=AC=E7=A7=BB=E6=A6=82=E7=8E=87?= =?UTF-8?q?=E7=9F=A9=E9=98=B5=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 转移概率矩阵数据 --- Others/DataMining_Viterbi/stmatrix.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Others/DataMining_Viterbi/stmatrix.txt diff --git a/Others/DataMining_Viterbi/stmatrix.txt b/Others/DataMining_Viterbi/stmatrix.txt new file mode 100644 index 0000000..af66956 --- /dev/null +++ b/Others/DataMining_Viterbi/stmatrix.txt @@ -0,0 +1,4 @@ +# Sunny Cloudy Rainy +Sunny 0.5 0.375 0.125 +Cloudy 0.25 0.125 0.625 +Rainy 0.25 0.375 0.375 \ No newline at end of file From 12c5ec84285f65add63f1790b7244cc26e8c41ae Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 2 Aug 2015 14:08:28 +0800 Subject: [PATCH 57/58] =?UTF-8?q?=E6=B7=B7=E6=B7=86=E7=9F=A9=E9=98=B5?= =?UTF-8?q?=E6=A6=82=E7=8E=87=E7=9F=A9=E9=98=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 混淆矩阵概率矩阵 --- Others/DataMining_Viterbi/humidity-matrix.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Others/DataMining_Viterbi/humidity-matrix.txt diff --git a/Others/DataMining_Viterbi/humidity-matrix.txt b/Others/DataMining_Viterbi/humidity-matrix.txt new file mode 100644 index 0000000..ff41df6 --- /dev/null +++ b/Others/DataMining_Viterbi/humidity-matrix.txt @@ -0,0 +1,4 @@ +# Dry Dryish Damp Soggy +Sunny 0.6 0.2 0.15 0.05 +Cloudy 0.25 0.25 0.25 0.25 +Rainy 0.05 0.10 0.35 0.50 \ No newline at end of file From 6f5caa12cae4535d936a9b6d533e18640881966c Mon Sep 17 00:00:00 2001 From: linyiqun <675250079@qq.com> Date: Sun, 2 Aug 2015 14:36:11 +0800 Subject: [PATCH 58/58] =?UTF-8?q?=E8=AF=B4=E6=98=8E=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E6=9B=B4=E6=96=B0Viterbi=E7=AE=97=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 说明文档更新Viterbi算法 --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2e87d47..7dd82f0 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ SequentialPatterns | DataMining_GSP | GSP-序列模式分析算法 SequentialPatterns | DataMining_PrefixSpan | PrefixSpan-序列模式分析算法 StatisticalLearning | DataMining_EM | EM-期望最大化算法 StatisticalLearning | DataMining_SVM | SVM-支持向量机算法 + #### 其他经典DM算法 包名 | 目录名 | 算法名 | -----| ------ |--------| @@ -35,7 +36,8 @@ Others | DataMining_GA_Maze | GA_Maze-遗传算法在走迷宫游戏中的应用 Others | DataMining_KDTree | KDTree-k维空间关键数据检索算法工具类 Others | DataMining_MSApriori | MSApriori-基于多支持度的Apriori算法 Others | DataMining_RandomForest | RandomForest-随机森林算法 -Others | DataMining_TAN |TAN-树型朴素贝叶斯算法 +Others | DataMining_TAN | TAN-树型朴素贝叶斯算法 +Others | DataMining_Viterbi | Viterbi-维特比算法 ## 18大经典DM算法 18大数据挖掘的经典算法以及代码实现,涉及到了决策分类,聚类,链接挖掘,关联挖掘,模式挖掘等等方面,后面都是相应算法的博文链接,希望能够帮助大家学。 @@ -128,7 +130,10 @@ K-Dimension Tree。多维空间划分树,数据在多维空间进行划分与 贝叶斯网络算法。弥补了朴素贝叶斯算法中必须要事件独立性的缺点,利用了贝叶斯网络的DAG有向无环图,允许各个事件保留一定的依赖关系,网络结构中的每个节点代表一种属性,边代表相应的条件概率值,通过计算从而能得到精准的分类效果。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/46683729) * ### TAN -树型朴素贝叶斯算法。此算法又被称为加强版朴素贝叶斯算法。在满足原有朴素贝叶斯条件的基础上,他允许部条件属性直接的关联性。形成树型的结构。 +树型朴素贝叶斯算法。此算法又被称为加强版朴素贝叶斯算法。在满足原有朴素贝叶斯条件的基础上,他允许部条件属性直接的关联性。形成树型的结构。[详细介绍链接](http://blog.csdn.net/androidlushangderen/article/details/46763427) + +* ### Viterbi +维特比算法。给定一个隐马尔科夫模型以及一个观察序列,求出潜在的状态序列信息,每个潜在状态信息又会受到前一个状态信息的影响。 ## 算法使用方法 在每个算法中给出了3大类型,主算法程序,调用程序,输入数据,调用方法如下: