diff --git a/.gitignore b/.gitignore index 69fa6ec5..885fe8a7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,7 @@ -*.pyc -.DS_Store -target -bin -build -.gradle -*.iml -*.ipr -*.iws -*.log -.classpath -.project -.settings -.idea \ No newline at end of file +__pycache__ +*.pdf +*.json +*.pkl +*.csv +*.edgelist +*.labels \ No newline at end of file diff --git a/README.md b/README.md index a6c4e85d..972feead 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,49 @@ # node2vec -This repository provides a reference implementation of *node2vec* as described in the paper:
-> node2vec: Scalable Feature Learning for Networks.
-> Aditya Grover and Jure Leskovec.
-> Knowledge Discovery and Data Mining, 2016.
-> +This repository provides an implementation of *node2vec* extended with restart probabilities and ensembles:
+The extensions are added by Koen Bouwman and Jerry Schonenberg
+*node2vec* is introduced by Aditya Grover and Jure Leskovec.
-The *node2vec* algorithm learns continuous representations for nodes in any (un)directed, (un)weighted graph. Please check the [project page](https://snap.stanford.edu/node2vec/) for more details. ### Basic Usage #### Example -To run *node2vec* on Zachary's karate club network, execute the following command from the project home directory:
- ``python src/main.py --input graph/karate.edgelist --output emb/karate.emd`` +To run *node2vec* on the email-Eu-core dataset, execute the following command from the project home directory:
+ ``python src/main.py --input email-Eu-core.edgelist --labels email-Eu-core.labels --output results-email-Eu-core`` #### Options You can check out the other options available to use with *node2vec* using:
``python src/main.py --help`` +#### Options added with the added functionality +We have added the following parameters to configure the added functionality: + - To configure the bayesian optimisation: + * ``--train_set`` to specify the proportion of dataset used for optimisation + * ``--bayesian_opt`` to toggle Enable bayesian optimisation + * ``--iter_bayesian`` to specify the number of iterations for bayesian optimisation + * ``--scoring`` to specify how to evaluate each iteration of bayesian optimisation + * ``--cross_validation`` to specify the size of cross validation + * ``--replications`` to specify the number of replications to evaluate hyperparameter configuration + - To configure the restart method: + * ``--restarts`` to toggle the restart functionality + * ``--tau`` to set the $tau$ parameter + * ``--omega`` to set the $\omega$ parameter + * ``--epsilon`` to set the $\varepsilon$ parameter + * ``--s`` to set the $s$ parameter + - To configure the ensemble method: + * ``--partitions`` to define how many ensembles you want + * ``--p`` now also supports a sequence of floats + * ``--q`` now also supports a sequence of floats + +#### post processing +To find the $\lambda$ and/or $p,q$-lists to use for partitions you can use ``post_processing.py`` + +#### Example post processing +To run *post_process* on the email-Eu-core dataset, execute the following command from the project home directory:
+ ``python src/post_process.py --dir results-email-Eu-core --partitions 4 --read --write`` +To run learn about the options for *post_process* execute the following command from the project home directory:
+ ``python src/post_process.py --help`` + #### Input The supported input format is an edgelist: @@ -26,17 +52,15 @@ The supported input format is an edgelist: The graph is assumed to be undirected and unweighted by default. These options can be changed by setting the appropriate flags. #### Output -The output file has *n+1* lines for a graph with *n* vertices. -The first line has the following format: - - num_of_nodes dim_of_representation - -The next *n* lines are as follows: - - node_id dim1 dim2 ... dimd - -where dim1, ... , dimd is the *d*-dimensional representation learned by *node2vec*. - +The output file directory contains the following + - ``cl_args.json``: a file with the settings of all the calleble arguments + - The ``eval`` directory, which contains the following: + * directories for each replication with an embeddings.pkl file containing the vector embedding of the input graph for that replication + * ``results.csv``: a file with the results of the classifier over all replications + * ``best_settings.json``: a file that contains the best settings for each calleble argument + - If the program was called with the ``--bayesian_opt`` flag the following will also be in the output directory: + * ``BO_opt*.pdf``: a plot of the bayesian optimisation + * ``opt_results.pkl``: the scores of each configuration of the bayesian optimisation run ### Citing If you find *node2vec* useful for your research, please consider citing the following paper: @@ -47,9 +71,3 @@ If you find *node2vec* useful for your research, please consider citing the foll year = {2016} } - -### Miscellaneous - -Please send any questions you might have about the code and/or the algorithm to . - -*Note:* This is only a reference implementation of the *node2vec* algorithm and could benefit from several performance enhancement schemes, some of which are discussed in the paper. diff --git a/emb/karate.emb b/emb/karate.emb deleted file mode 100644 index 8f7edb45..00000000 --- a/emb/karate.emb +++ /dev/null @@ -1,35 +0,0 @@ -34 128 -1 -0.014876 -0.229356 0.023806 0.067841 0.090910 -0.144880 0.150130 0.075787 0.046873 -0.025290 -0.095061 0.084067 0.022556 -0.197516 0.051331 -0.241755 0.004764 -0.232580 -0.031161 -0.013199 0.233664 -0.011230 0.140365 0.096059 0.068695 -0.165448 -0.100326 -0.012106 0.011358 0.170880 -0.012065 0.044922 -0.139547 -0.037438 -0.095815 -0.138876 0.183298 0.116599 -0.055275 -0.080746 -0.100899 0.053135 0.141539 0.179031 -0.131575 0.127407 0.099880 -0.064466 -0.029267 0.157667 -0.028006 0.174557 -0.022628 0.148424 -0.174256 -0.179070 -0.059111 0.088243 0.086486 -0.033525 0.117324 -0.099924 -0.026303 0.151631 -0.063171 -0.079035 -0.073884 0.059158 0.136396 -0.156904 -0.089332 0.054849 -0.108355 -0.044886 0.131026 -0.063695 0.086190 0.014780 -0.002258 -0.120929 -0.115947 -0.029460 0.023848 -0.102442 -0.111003 0.058831 0.121057 0.008298 -0.056379 0.072060 0.048521 -0.006848 0.087435 -0.054377 -0.153402 0.207793 -0.070880 -0.107374 0.120661 -0.008031 -0.145239 0.157040 -0.122576 -0.109220 -0.100591 0.017339 0.173152 0.001466 0.015540 -0.049476 0.057062 0.019142 -0.027390 0.016272 -0.091249 -0.008336 -0.042350 0.178391 -0.066354 -0.029698 -0.038339 -0.048144 0.134590 -0.234350 -0.217189 0.074112 0.057659 0.201265 -34 -0.043236 -0.184561 0.086898 0.155603 0.126673 -0.103197 0.194487 -0.020264 -0.096339 0.018373 -0.113667 0.012346 0.015797 -0.198612 -0.094771 -0.205460 0.049062 -0.081919 0.027739 0.088356 0.151644 -0.009231 0.201849 0.050836 -0.023565 -0.149574 -0.049034 -0.044401 -0.037658 0.224190 -0.010381 0.071883 -0.170195 0.034827 -0.167335 -0.075119 0.218483 0.090103 -0.035649 -0.134331 -0.168675 0.210952 0.128298 0.087990 -0.063460 0.092851 0.023417 -0.128993 -0.074394 0.129745 0.093197 0.249170 -0.001558 0.100948 -0.161581 -0.110734 -0.069279 0.041938 0.008046 0.054898 0.023925 -0.086794 -0.147930 0.155933 0.051813 -0.310828 -0.066930 -0.007734 0.062363 -0.038964 0.059932 0.004334 -0.072056 0.082316 -0.050440 -0.114736 0.039152 0.079540 0.074609 0.144051 -0.082203 -0.145513 0.004876 -0.252469 0.005756 0.245110 -0.101092 0.065562 -0.171087 0.005519 -0.005527 0.094499 0.054681 -0.044853 -0.037731 0.285537 -0.169364 0.082962 0.097452 -0.184904 -0.246170 0.201754 -0.079283 -0.180332 -0.009282 0.074115 0.221057 0.033268 0.021976 0.014238 0.086000 -0.090141 -0.070816 -0.046511 -0.025651 -0.059833 -0.086892 0.139139 -0.074664 0.109995 -0.096493 -0.132756 0.014005 -0.184265 0.008678 0.209333 0.024216 0.046068 -33 -0.037151 -0.193182 0.077220 0.145653 0.123061 -0.110840 0.185281 -0.003266 -0.074293 0.013228 -0.108248 0.023448 0.017491 -0.203065 -0.068931 -0.211348 0.036744 -0.099373 0.013532 0.072291 0.169995 -0.009909 0.193386 0.056882 -0.015445 -0.150086 -0.060747 -0.037020 -0.029384 0.216726 -0.015124 0.072654 -0.162907 0.029320 -0.163866 -0.086833 0.208782 0.090374 -0.040612 -0.127639 -0.159738 0.193453 0.128516 0.092407 -0.070034 0.102198 0.026696 -0.120459 -0.073511 0.130051 0.074555 0.242593 0.000266 0.103154 -0.166959 -0.113425 -0.069956 0.046228 0.019883 0.047479 0.043045 -0.096340 -0.130470 0.151841 0.041735 -0.279075 -0.063606 0.000315 0.068115 -0.050509 0.045616 0.007611 -0.069524 0.064300 -0.031539 -0.102848 0.052158 0.066086 0.064956 0.109075 -0.078587 -0.132586 0.004139 -0.232968 -0.010310 0.221729 -0.075292 0.054786 -0.151444 0.012827 0.007983 0.083758 0.057488 -0.049456 -0.048113 0.276285 -0.158635 0.053341 0.102118 -0.161438 -0.231241 0.187751 -0.082484 -0.163907 -0.014804 0.060307 0.209479 0.030831 0.023088 0.002885 0.075017 -0.076178 -0.068085 -0.047820 -0.030464 -0.050482 -0.082474 0.133255 -0.076936 0.086339 -0.085110 -0.118404 0.027944 -0.186448 -0.009368 0.183909 0.023062 0.057413 -3 -0.028561 -0.205411 0.056815 0.105204 0.107464 -0.127154 0.173082 0.029493 -0.026830 -0.005606 -0.101965 0.047164 0.018824 -0.210376 -0.017246 -0.231535 0.027110 -0.170617 -0.010041 0.031033 0.197806 -0.006828 0.168036 0.082497 0.027082 -0.157874 -0.080517 -0.019565 -0.000477 0.203291 -0.015622 0.058068 -0.156901 0.000501 -0.131919 -0.116712 0.199355 0.106443 -0.044448 -0.103124 -0.131104 0.115538 0.131251 0.140924 -0.098906 0.113108 0.063865 -0.097658 -0.058951 0.142671 0.027667 0.202568 -0.018042 0.131458 -0.174052 -0.145032 -0.066079 0.062356 0.048391 0.014275 0.072580 -0.089248 -0.084891 0.152485 -0.006358 -0.189735 -0.068464 0.033716 0.102633 -0.109285 -0.020809 0.035645 -0.092487 0.013135 0.052743 -0.089519 0.070987 0.045890 0.035846 -0.001593 -0.098935 -0.083680 0.012137 -0.175651 -0.054943 0.149670 0.019547 0.031285 -0.114596 0.042073 0.024935 0.042274 0.071669 -0.057264 -0.105576 0.250713 -0.121465 -0.015699 0.109578 -0.090905 -0.194813 0.173052 -0.108581 -0.135182 -0.051883 0.047640 0.190100 0.018619 0.022392 -0.026230 0.077392 -0.031160 -0.044318 -0.011937 -0.059609 -0.030396 -0.066347 0.157975 -0.064017 0.032325 -0.066449 -0.089884 0.073737 -0.207898 -0.113919 0.137631 0.044525 0.131904 -2 -0.014195 -0.206891 0.039293 0.085593 0.095233 -0.124536 0.156750 0.051983 0.001926 -0.015428 -0.094487 0.068283 0.022594 -0.197800 0.013198 -0.227624 0.008343 -0.180030 -0.019804 0.011482 0.205034 -0.006173 0.155285 0.082322 0.037030 -0.151400 -0.087726 -0.019770 0.005120 0.180771 -0.010223 0.048888 -0.152389 -0.009914 -0.112808 -0.117258 0.191241 0.099122 -0.047605 -0.088254 -0.111873 0.091852 0.139276 0.156382 -0.111017 0.116605 0.073830 -0.084164 -0.043307 0.137957 0.005370 0.179475 -0.012714 0.137657 -0.170627 -0.155057 -0.059229 0.074431 0.059581 -0.005058 0.088855 -0.090385 -0.056987 0.144462 -0.028853 -0.128125 -0.068681 0.038364 0.110911 -0.127508 -0.048368 0.043526 -0.091543 -0.015820 0.086858 -0.069705 0.077332 0.030236 0.014120 -0.050888 -0.104370 -0.061443 0.009650 -0.136330 -0.077768 0.111878 0.061145 0.022060 -0.081506 0.053774 0.040389 0.023848 0.076200 -0.059771 -0.129641 0.226712 -0.095737 -0.059849 0.105248 -0.044966 -0.162578 0.155537 -0.106955 -0.124103 -0.077882 0.022313 0.180785 0.018175 0.012676 -0.037498 0.061517 -0.005846 -0.034860 -0.004758 -0.074445 -0.020110 -0.054517 0.165222 -0.071541 0.005364 -0.052736 -0.073605 0.103373 -0.216421 -0.150806 0.105295 0.055725 0.159106 -4 -0.014641 -0.203711 0.034961 0.085020 0.095704 -0.131868 0.150660 0.056755 0.011886 -0.016651 -0.089766 0.067894 0.024813 -0.194787 0.020632 -0.224535 0.010860 -0.193962 -0.023700 0.008739 0.211455 -0.009271 0.145376 0.082361 0.047274 -0.151999 -0.089413 -0.014303 0.007932 0.174257 -0.014606 0.045983 -0.143194 -0.019511 -0.106699 -0.121798 0.188384 0.107257 -0.049663 -0.084457 -0.107101 0.077382 0.134446 0.157648 -0.108230 0.113253 0.079461 -0.079183 -0.043650 0.138382 0.001375 0.174463 -0.015088 0.134528 -0.166295 -0.153995 -0.057372 0.071027 0.069189 -0.016081 0.099031 -0.091601 -0.048610 0.142226 -0.029463 -0.114172 -0.072477 0.043485 0.114788 -0.127927 -0.054399 0.046114 -0.091841 -0.023762 0.094924 -0.068927 0.077757 0.029653 0.009168 -0.056918 -0.098812 -0.055162 0.015343 -0.122034 -0.078487 0.094937 0.074184 0.015670 -0.075195 0.058859 0.034632 0.014311 0.071581 -0.055793 -0.125368 0.216919 -0.088715 -0.066234 0.104393 -0.041869 -0.155419 0.150385 -0.102527 -0.107840 -0.081059 0.026027 0.165621 0.010435 0.014196 -0.037497 0.064977 -0.004085 -0.032967 0.003786 -0.069743 -0.013293 -0.051573 0.158915 -0.062837 -0.001615 -0.044520 -0.061123 0.099564 -0.209667 -0.159169 0.095354 0.052871 0.161271 -32 -0.028685 -0.213070 0.047301 0.108818 0.109415 -0.128147 0.176653 0.027303 -0.025159 -0.004165 -0.105345 0.053321 0.018802 -0.204727 -0.014004 -0.228356 0.026948 -0.172561 -0.012788 0.035630 0.203829 -0.004683 0.168709 0.078730 0.022190 -0.164596 -0.086578 -0.023510 -0.008559 0.203468 -0.010573 0.059740 -0.148701 -0.003012 -0.134304 -0.112708 0.203686 0.109460 -0.050573 -0.109072 -0.136512 0.126445 0.128359 0.133714 -0.103106 0.115873 0.066037 -0.090537 -0.055603 0.139785 0.027898 0.210594 -0.017086 0.130375 -0.174136 -0.149086 -0.063542 0.062951 0.050607 0.008530 0.075894 -0.092578 -0.084852 0.157976 -0.014030 -0.184613 -0.072039 0.032966 0.101139 -0.104970 -0.026077 0.038771 -0.092542 0.011978 0.047402 -0.082716 0.062619 0.046412 0.032246 0.000488 -0.098291 -0.080592 0.019979 -0.166354 -0.054311 0.148484 0.019857 0.032152 -0.108305 0.035798 0.024141 0.041786 0.075691 -0.055285 -0.095965 0.244279 -0.114245 -0.016270 0.102579 -0.095522 -0.187384 0.167964 -0.106744 -0.134279 -0.050422 0.048754 0.190782 0.013687 0.018321 -0.018169 0.071399 -0.032380 -0.043966 -0.010604 -0.056195 -0.028461 -0.064954 0.154093 -0.059146 0.030605 -0.064815 -0.091060 0.073829 -0.205478 -0.104267 0.131276 0.034881 0.128345 -24 -0.029315 -0.189772 0.068621 0.134244 0.113263 -0.111526 0.179366 0.006049 -0.055758 0.008379 -0.101671 0.030253 0.020045 -0.200031 -0.051633 -0.212913 0.031469 -0.122085 0.007588 0.056852 0.173856 -0.008787 0.179263 0.067520 0.002459 -0.148246 -0.065914 -0.037638 -0.025133 0.208168 -0.016622 0.063915 -0.162850 0.012209 -0.148385 -0.094801 0.212578 0.094363 -0.044566 -0.125276 -0.148900 0.172375 0.128087 0.110670 -0.080469 0.106983 0.040901 -0.105512 -0.063393 0.132020 0.064159 0.230891 -0.009803 0.113915 -0.160158 -0.123251 -0.063602 0.050361 0.029889 0.029696 0.051962 -0.091280 -0.110055 0.153433 0.019818 -0.245125 -0.068114 0.014500 0.076046 -0.066689 0.022533 0.020194 -0.083352 0.046498 -0.001833 -0.098371 0.049496 0.056079 0.051472 0.077022 -0.081356 -0.108193 0.010358 -0.201614 -0.025917 0.187518 -0.045225 0.052643 -0.135496 0.023657 0.010510 0.068199 0.063236 -0.049041 -0.063124 0.265099 -0.134215 0.031199 0.096965 -0.135953 -0.212055 0.182376 -0.084096 -0.150302 -0.031544 0.058467 0.196833 0.024433 0.022537 0.000038 0.078107 -0.060734 -0.059215 -0.031952 -0.043136 -0.043312 -0.075321 0.136955 -0.064975 0.072500 -0.075805 -0.107042 0.043740 -0.190984 -0.039007 0.166575 0.030490 0.083785 -14 -0.027129 -0.198493 0.061423 0.105227 0.107909 -0.116048 0.165620 0.021164 -0.039371 0.000798 -0.105297 0.046454 0.020127 -0.201215 -0.025640 -0.218301 0.026254 -0.145145 -0.003993 0.036285 0.189837 -0.001548 0.169266 0.074046 0.012566 -0.145471 -0.070417 -0.019415 -0.011722 0.199796 -0.013031 0.061321 -0.155937 0.010501 -0.129864 -0.105259 0.194787 0.093700 -0.043040 -0.106005 -0.136448 0.129663 0.130622 0.132160 -0.088718 0.109683 0.051199 -0.096384 -0.058666 0.135832 0.033188 0.202058 -0.010150 0.124697 -0.166948 -0.134049 -0.066910 0.061093 0.035829 0.016422 0.067523 -0.093323 -0.088033 0.141622 0.001714 -0.188714 -0.062100 0.020227 0.088135 -0.091097 -0.010059 0.031509 -0.082984 0.023107 0.036634 -0.082443 0.064717 0.047751 0.036376 0.020414 -0.091274 -0.092302 0.010257 -0.170598 -0.046212 0.162372 -0.004781 0.033150 -0.109946 0.033680 0.021587 0.046615 0.063440 -0.057116 -0.089360 0.243070 -0.123019 0.000268 0.100286 -0.097457 -0.190594 0.162703 -0.092669 -0.130177 -0.047080 0.045170 0.186444 0.027099 0.014230 -0.019465 0.063650 -0.042349 -0.049969 -0.016807 -0.047529 -0.034889 -0.069347 0.142300 -0.068355 0.042539 -0.065908 -0.092345 0.059285 -0.191807 -0.077834 0.134570 0.036721 0.108272 -30 -0.029805 -0.195961 0.067188 0.138667 0.112269 -0.110647 0.186154 -0.000130 -0.064409 0.011206 -0.110666 0.029180 0.017283 -0.202088 -0.059766 -0.216968 0.035102 -0.117452 0.007007 0.065951 0.173340 -0.011271 0.187594 0.059655 -0.000878 -0.153483 -0.060203 -0.036643 -0.028904 0.212611 -0.014515 0.062677 -0.160968 0.015566 -0.153099 -0.088347 0.209994 0.097673 -0.040055 -0.127585 -0.150775 0.176175 0.128623 0.109289 -0.077416 0.104198 0.038445 -0.114737 -0.066683 0.134714 0.065543 0.233909 -0.003867 0.105055 -0.159656 -0.122573 -0.070121 0.050437 0.028871 0.034093 0.049062 -0.093662 -0.113433 0.153901 0.031128 -0.253415 -0.068951 0.007318 0.077145 -0.065763 0.028671 0.015899 -0.078109 0.049580 -0.008314 -0.102949 0.051671 0.064192 0.055459 0.087741 -0.088441 -0.117364 0.004683 -0.212010 -0.019745 0.199981 -0.053953 0.052947 -0.143431 0.021250 0.012143 0.067632 0.060727 -0.045437 -0.058646 0.265112 -0.144213 0.040904 0.099171 -0.143674 -0.210708 0.183055 -0.085558 -0.160468 -0.027837 0.057454 0.201077 0.025756 0.017544 -0.000789 0.073068 -0.067587 -0.063775 -0.036334 -0.034737 -0.045930 -0.079362 0.137832 -0.069735 0.079231 -0.082655 -0.107603 0.038808 -0.186957 -0.032019 0.173386 0.033657 0.076414 -6 0.002078 -0.268463 -0.003595 0.061690 0.088494 -0.169720 0.158606 0.118915 0.104116 -0.043795 -0.095630 0.123569 0.022248 -0.218749 0.115644 -0.277518 -0.014723 -0.313166 -0.060689 -0.047875 0.288100 -0.020190 0.132303 0.118331 0.117887 -0.200003 -0.132013 -0.017603 0.019659 0.177378 -0.006264 0.034392 -0.144590 -0.073253 -0.077074 -0.172127 0.207134 0.149657 -0.083728 -0.071973 -0.090280 0.020017 0.167903 0.226158 -0.175951 0.159062 0.152982 -0.053364 -0.014949 0.189750 -0.065455 0.174992 -0.030634 0.172406 -0.186966 -0.233308 -0.063218 0.106419 0.135712 -0.083162 0.167874 -0.111255 0.019922 0.172214 -0.106326 -0.006885 -0.089321 0.084059 0.181457 -0.217192 -0.148822 0.081893 -0.144617 -0.089499 0.217508 -0.047376 0.106679 -0.000777 -0.035308 -0.219362 -0.139127 0.023513 0.039169 -0.045416 -0.159940 -0.008134 0.218900 -0.008446 -0.026698 0.104784 0.072880 -0.043530 0.111360 -0.052411 -0.210585 0.205173 -0.034588 -0.189699 0.137564 0.055408 -0.113309 0.151570 -0.151678 -0.094919 -0.158202 -0.006576 0.163549 -0.015275 0.019636 -0.075174 0.055843 0.066792 -0.019707 0.035878 -0.116778 0.019480 -0.031952 0.205222 -0.063552 -0.076776 -0.017083 -0.022952 0.203400 -0.281455 -0.323782 0.030069 0.078347 0.272427 -28 -0.031933 -0.197667 0.064274 0.115887 0.109673 -0.112831 0.172487 0.015226 -0.045440 0.000922 -0.100285 0.041999 0.019360 -0.195612 -0.036887 -0.218494 0.025783 -0.140635 0.001926 0.047809 0.179597 -0.007532 0.172192 0.069171 0.007049 -0.146625 -0.074777 -0.025957 -0.010022 0.198732 -0.015366 0.065234 -0.151395 0.012260 -0.137601 -0.103174 0.199766 0.096171 -0.039490 -0.110485 -0.138451 0.140136 0.128419 0.120470 -0.087535 0.103411 0.041446 -0.101929 -0.063897 0.129588 0.039896 0.211609 -0.006230 0.115810 -0.161193 -0.129435 -0.065928 0.050153 0.031238 0.023657 0.060655 -0.092088 -0.094960 0.147079 0.012121 -0.212376 -0.067530 0.022987 0.080594 -0.086674 0.001573 0.025931 -0.081351 0.024362 0.018821 -0.089987 0.060000 0.054546 0.038437 0.043187 -0.088256 -0.103463 0.010522 -0.187656 -0.037527 0.173049 -0.014719 0.041709 -0.121271 0.031382 0.019447 0.054744 0.061988 -0.056360 -0.078127 0.246409 -0.128369 0.010664 0.099073 -0.115142 -0.199420 0.175094 -0.094355 -0.143670 -0.035543 0.051635 0.192875 0.027025 0.020782 -0.017472 0.072300 -0.045097 -0.055823 -0.026098 -0.047309 -0.039429 -0.071946 0.141525 -0.068355 0.048830 -0.069811 -0.097455 0.053574 -0.189748 -0.071610 0.145047 0.036588 0.100697 -9 -0.028741 -0.211076 0.054543 0.113454 0.112192 -0.125957 0.179462 0.027106 -0.028922 -0.004419 -0.107562 0.053037 0.021575 -0.217922 -0.024299 -0.237413 0.024300 -0.160490 -0.008231 0.041769 0.199263 -0.003726 0.183270 0.076422 0.018500 -0.159869 -0.083470 -0.031711 -0.011421 0.213216 -0.012059 0.066545 -0.164235 0.000059 -0.135374 -0.114367 0.211365 0.103426 -0.050803 -0.107760 -0.144874 0.131460 0.138487 0.143084 -0.099800 0.116327 0.059543 -0.104551 -0.061709 0.139687 0.031561 0.216445 -0.014551 0.132551 -0.174128 -0.147784 -0.069049 0.061083 0.044929 0.013560 0.073751 -0.095200 -0.094416 0.159048 0.001244 -0.203643 -0.068519 0.027594 0.098585 -0.101190 -0.011652 0.036458 -0.086973 0.017561 0.045833 -0.092934 0.063652 0.055411 0.034048 0.017822 -0.095928 -0.093888 0.008178 -0.187700 -0.052890 0.162931 0.005551 0.035677 -0.122494 0.039770 0.026802 0.046470 0.076745 -0.054114 -0.100411 0.253836 -0.121971 -0.008027 0.105489 -0.097454 -0.201514 0.171386 -0.106460 -0.139697 -0.050451 0.044910 0.193437 0.025752 0.016820 -0.024537 0.076425 -0.037152 -0.045500 -0.020131 -0.057735 -0.035467 -0.071962 0.158357 -0.070841 0.044458 -0.070259 -0.096717 0.068233 -0.206222 -0.099504 0.146867 0.042443 0.123567 -8 -0.017865 -0.211108 0.047087 0.089959 0.095930 -0.129200 0.162559 0.046570 -0.004375 -0.008637 -0.098684 0.061683 0.021941 -0.204175 0.012324 -0.233112 0.015407 -0.184187 -0.015316 0.016029 0.207185 -0.006227 0.154479 0.078815 0.037447 -0.147990 -0.084808 -0.012550 0.005331 0.188156 -0.016680 0.052601 -0.150952 -0.007804 -0.117620 -0.122416 0.198871 0.103815 -0.043373 -0.089026 -0.118659 0.092678 0.133247 0.160776 -0.106863 0.122023 0.070368 -0.087064 -0.050659 0.143238 0.005149 0.183299 -0.011017 0.141231 -0.177581 -0.152867 -0.065982 0.068455 0.058193 -0.006387 0.090205 -0.095814 -0.063365 0.149157 -0.020920 -0.143041 -0.067499 0.037596 0.111694 -0.126333 -0.047938 0.041035 -0.095825 -0.012171 0.082612 -0.074008 0.073319 0.033771 0.017426 -0.041337 -0.098758 -0.070711 0.009757 -0.141484 -0.076250 0.121256 0.057527 0.022052 -0.087179 0.051636 0.038312 0.022068 0.071565 -0.062129 -0.121745 0.227285 -0.099703 -0.054338 0.100271 -0.059531 -0.161960 0.151497 -0.109298 -0.116800 -0.066790 0.026844 0.177323 0.011312 0.014639 -0.038243 0.063205 -0.009735 -0.033455 0.000628 -0.073667 -0.017436 -0.058714 0.157400 -0.066595 0.007332 -0.054345 -0.075661 0.096529 -0.204896 -0.138533 0.104377 0.054124 0.149600 -7 0.004536 -0.254080 -0.005178 0.057268 0.083031 -0.160793 0.152251 0.109620 0.097156 -0.041831 -0.095120 0.116381 0.023259 -0.211618 0.107966 -0.263637 -0.010155 -0.292228 -0.062469 -0.041445 0.275748 -0.017947 0.131967 0.117549 0.107937 -0.190531 -0.131160 -0.015198 0.016388 0.167363 -0.002318 0.040055 -0.140953 -0.069910 -0.079856 -0.160896 0.193364 0.132179 -0.076877 -0.067679 -0.087655 0.021391 0.153731 0.214042 -0.160299 0.151596 0.140628 -0.047208 -0.016141 0.176074 -0.061066 0.166253 -0.026844 0.168321 -0.184576 -0.212191 -0.057258 0.100167 0.125962 -0.066334 0.153393 -0.106310 0.015351 0.163925 -0.100897 -0.008578 -0.081105 0.081465 0.172725 -0.203431 -0.141983 0.080377 -0.138018 -0.082646 0.200963 -0.042303 0.096555 -0.001276 -0.029828 -0.206510 -0.126383 0.019984 0.033506 -0.044342 -0.150970 0.000560 0.194671 -0.005433 -0.028069 0.098008 0.067910 -0.039512 0.106683 -0.054327 -0.194675 0.199312 -0.033676 -0.172281 0.121926 0.047836 -0.112250 0.145785 -0.146147 -0.089646 -0.142902 -0.010297 0.153126 -0.016220 0.020992 -0.066057 0.052915 0.063302 -0.019462 0.027044 -0.111650 0.011834 -0.033692 0.189723 -0.062758 -0.066532 -0.020178 -0.020366 0.188008 -0.256947 -0.294357 0.027996 0.071474 0.257231 -11 -0.011104 -0.238662 0.017651 0.075369 0.092326 -0.150237 0.153379 0.086072 0.059588 -0.030736 -0.095822 0.094182 0.019264 -0.201241 0.068715 -0.252125 -0.001463 -0.247265 -0.041062 -0.019052 0.248472 -0.015812 0.142417 0.101158 0.078408 -0.176218 -0.110534 -0.015693 0.006325 0.172081 -0.007481 0.042705 -0.139252 -0.048455 -0.091122 -0.140135 0.198064 0.126991 -0.064129 -0.073303 -0.096557 0.056212 0.147887 0.185500 -0.139213 0.137412 0.115886 -0.059514 -0.031032 0.163974 -0.028957 0.175716 -0.019992 0.148652 -0.174109 -0.192077 -0.060479 0.089457 0.094409 -0.044620 0.125362 -0.103718 -0.012229 0.158348 -0.069101 -0.065849 -0.080717 0.060038 0.149384 -0.170787 -0.098626 0.059951 -0.119967 -0.054091 0.151870 -0.056308 0.087860 0.008745 -0.011980 -0.135536 -0.113264 -0.013711 0.025772 -0.088830 -0.123062 0.048005 0.141393 0.006782 -0.051220 0.078137 0.050331 -0.016018 0.092582 -0.048781 -0.161419 0.206455 -0.058673 -0.120110 0.115591 0.007645 -0.128734 0.152522 -0.128197 -0.101811 -0.113094 0.007255 0.165345 -0.009922 0.018239 -0.054996 0.055853 0.032588 -0.030402 0.013102 -0.091771 -0.001437 -0.045370 0.179492 -0.062742 -0.036980 -0.031203 -0.044651 0.151537 -0.240748 -0.230398 0.058123 0.057474 0.209633 -26 -0.023020 -0.198035 0.052784 0.110189 0.107711 -0.121333 0.171473 0.021895 -0.032644 -0.004038 -0.101837 0.043534 0.021820 -0.197700 -0.022046 -0.215946 0.026357 -0.152653 -0.007251 0.039833 0.193019 -0.007593 0.170483 0.074832 0.018060 -0.156527 -0.080434 -0.030780 -0.016583 0.199059 -0.008473 0.058927 -0.147682 -0.004470 -0.132989 -0.106190 0.197362 0.098274 -0.048066 -0.107485 -0.132600 0.128865 0.123421 0.123282 -0.093707 0.113453 0.053405 -0.093328 -0.056185 0.139182 0.038118 0.209039 -0.012684 0.115021 -0.161031 -0.138554 -0.060245 0.059080 0.041824 0.012730 0.069979 -0.091850 -0.083236 0.151016 -0.000600 -0.187181 -0.064777 0.020779 0.092040 -0.088948 -0.010133 0.029883 -0.087822 0.018203 0.037744 -0.086909 0.054650 0.046275 0.033388 0.017380 -0.092644 -0.080701 0.014382 -0.165403 -0.043736 0.150511 -0.000230 0.035232 -0.110608 0.033784 0.020769 0.043880 0.070284 -0.044217 -0.083884 0.235181 -0.114249 -0.003872 0.101984 -0.097426 -0.181580 0.163574 -0.092290 -0.129696 -0.045518 0.044875 0.183125 0.017928 0.022999 -0.011724 0.063166 -0.037035 -0.048019 -0.016431 -0.049066 -0.030906 -0.062725 0.139018 -0.059069 0.036730 -0.063343 -0.088477 0.062320 -0.195873 -0.085636 0.136442 0.033857 0.105298 -5 -0.004594 -0.243196 0.005785 0.065577 0.090126 -0.153967 0.154449 0.088095 0.073435 -0.034911 -0.090112 0.105713 0.019047 -0.202371 0.080980 -0.256649 -0.007039 -0.264985 -0.047738 -0.023556 0.256849 -0.011095 0.131734 0.108428 0.090315 -0.180211 -0.114032 -0.017658 0.012227 0.170835 -0.010182 0.037963 -0.142614 -0.050532 -0.080946 -0.151261 0.194782 0.130602 -0.067567 -0.069304 -0.087982 0.037704 0.145634 0.197020 -0.148580 0.139400 0.123792 -0.058108 -0.025629 0.162819 -0.041917 0.169209 -0.018981 0.148528 -0.171418 -0.200696 -0.057636 0.091955 0.108952 -0.060386 0.133370 -0.104544 -0.001118 0.159968 -0.081236 -0.038192 -0.073945 0.064121 0.156134 -0.179419 -0.115688 0.067210 -0.123707 -0.069146 0.169208 -0.053796 0.087754 0.002529 -0.021773 -0.162555 -0.120582 -0.002197 0.026520 -0.068466 -0.132423 0.023636 0.163203 0.004269 -0.038202 0.083627 0.058034 -0.025884 0.094432 -0.049474 -0.176491 0.197759 -0.052375 -0.139631 0.121160 0.028213 -0.123731 0.142645 -0.131270 -0.097683 -0.127133 0.000257 0.157086 -0.010924 0.011315 -0.059939 0.049982 0.040890 -0.020221 0.020389 -0.098609 0.009737 -0.034123 0.182243 -0.056289 -0.046906 -0.025897 -0.031945 0.160402 -0.243433 -0.254963 0.048785 0.060837 0.227462 -20 -0.018263 -0.203119 0.044032 0.096708 0.093959 -0.128945 0.162148 0.043488 -0.007986 -0.009540 -0.096424 0.056310 0.026605 -0.195246 0.002651 -0.219970 0.010107 -0.169680 -0.017086 0.016119 0.202129 -0.011583 0.155043 0.076787 0.035868 -0.145195 -0.074411 -0.020829 0.000230 0.177906 -0.018463 0.050058 -0.145409 -0.009500 -0.112073 -0.111484 0.195414 0.098849 -0.049618 -0.097075 -0.119716 0.099080 0.128173 0.144992 -0.099394 0.113066 0.072445 -0.084550 -0.045729 0.134319 0.009104 0.186786 -0.007951 0.121093 -0.163520 -0.145787 -0.061469 0.061240 0.054717 -0.003879 0.090869 -0.089566 -0.062849 0.143736 -0.016331 -0.146340 -0.067350 0.033961 0.102259 -0.106254 -0.036726 0.037258 -0.089962 -0.005363 0.069195 -0.070990 0.071186 0.033198 0.022662 -0.022449 -0.093300 -0.071684 0.012518 -0.146311 -0.064636 0.120725 0.039939 0.025594 -0.091103 0.050819 0.034654 0.027948 0.068914 -0.054067 -0.108076 0.222217 -0.094144 -0.042908 0.096560 -0.056561 -0.162761 0.147145 -0.100650 -0.112914 -0.066399 0.030850 0.167822 0.012409 0.018386 -0.025553 0.059479 -0.014568 -0.037243 -0.002874 -0.064027 -0.021660 -0.052850 0.151709 -0.062739 0.015709 -0.051125 -0.065736 0.086524 -0.197463 -0.121859 0.108248 0.041986 0.136731 -29 -0.029217 -0.192326 0.054517 0.112169 0.109076 -0.116282 0.168307 0.012174 -0.039994 0.001809 -0.098568 0.043412 0.016036 -0.194299 -0.030963 -0.214521 0.024745 -0.142613 0.001282 0.044642 0.182777 -0.008179 0.165616 0.068198 0.007098 -0.151797 -0.073324 -0.030752 -0.013393 0.199615 -0.007267 0.059101 -0.145442 0.004420 -0.136301 -0.096027 0.197822 0.096257 -0.041719 -0.111380 -0.130995 0.141976 0.118482 0.111795 -0.082441 0.109905 0.046891 -0.095926 -0.059965 0.130532 0.041412 0.206435 -0.013023 0.116034 -0.163667 -0.124636 -0.056119 0.056049 0.037650 0.022077 0.060789 -0.090658 -0.095441 0.145759 0.002065 -0.204009 -0.069661 0.018610 0.086614 -0.079012 -0.004151 0.026184 -0.085070 0.029903 0.024578 -0.082741 0.056755 0.050838 0.039933 0.035330 -0.089804 -0.088362 0.014101 -0.179165 -0.036571 0.161763 -0.008797 0.033754 -0.115875 0.024550 0.017247 0.047270 0.064382 -0.047846 -0.073251 0.242764 -0.122172 0.010656 0.094378 -0.111195 -0.190514 0.167477 -0.090849 -0.132405 -0.037286 0.048096 0.180554 0.017383 0.021162 -0.010220 0.072079 -0.041447 -0.053255 -0.023962 -0.050160 -0.038334 -0.069232 0.134364 -0.059308 0.047420 -0.068231 -0.094575 0.052337 -0.190503 -0.066888 0.144212 0.031240 0.094808 -25 -0.035282 -0.206988 0.058312 0.120805 0.111692 -0.122058 0.177212 0.011913 -0.038791 0.000927 -0.108893 0.039183 0.017992 -0.200517 -0.036279 -0.230321 0.033777 -0.150954 0.003472 0.053558 0.195164 -0.010270 0.183469 0.076504 0.012123 -0.165208 -0.076565 -0.032481 -0.022911 0.213152 -0.007764 0.065099 -0.152898 0.001475 -0.147454 -0.099935 0.198991 0.104754 -0.047584 -0.118899 -0.147034 0.153900 0.125362 0.112688 -0.085567 0.114880 0.050960 -0.097374 -0.057590 0.140256 0.042312 0.225302 -0.010500 0.123003 -0.172205 -0.137021 -0.060565 0.052122 0.037197 0.026949 0.059353 -0.090034 -0.097365 0.150803 0.002949 -0.221907 -0.071696 0.022900 0.083551 -0.083490 0.000902 0.022435 -0.083410 0.035583 0.017835 -0.096991 0.054303 0.057029 0.046537 0.039250 -0.092136 -0.098973 0.014844 -0.194283 -0.038230 0.170998 -0.015587 0.034901 -0.129690 0.023305 0.011655 0.053895 0.073493 -0.053029 -0.081261 0.257914 -0.129617 0.012464 0.105111 -0.118070 -0.207143 0.181815 -0.098697 -0.145136 -0.036175 0.052328 0.198538 0.023094 0.025383 -0.012243 0.072062 -0.047573 -0.054932 -0.028930 -0.052588 -0.039808 -0.068874 0.144699 -0.067824 0.051959 -0.071644 -0.097496 0.054996 -0.205749 -0.075276 0.158864 0.029401 0.100663 -31 -0.024721 -0.196744 0.058380 0.113582 0.105830 -0.121297 0.167197 0.019532 -0.034359 0.000843 -0.097467 0.050559 0.023386 -0.199146 -0.025068 -0.221343 0.019521 -0.144300 -0.002430 0.040385 0.189624 -0.010693 0.165790 0.073749 0.016852 -0.147830 -0.069561 -0.021950 -0.011725 0.194129 -0.016407 0.062914 -0.153591 0.008004 -0.129214 -0.098710 0.194018 0.095890 -0.038745 -0.108933 -0.131078 0.135748 0.126969 0.121269 -0.088126 0.106741 0.049527 -0.099419 -0.056658 0.134461 0.031972 0.206586 -0.006793 0.116740 -0.162558 -0.134426 -0.061724 0.054186 0.037368 0.011571 0.070313 -0.091875 -0.092170 0.139323 0.001843 -0.195481 -0.071051 0.018516 0.087994 -0.088950 -0.006768 0.022944 -0.078728 0.019804 0.034330 -0.084489 0.066822 0.045952 0.039824 0.025499 -0.089017 -0.093305 0.008225 -0.178437 -0.045249 0.156472 -0.003366 0.031600 -0.112111 0.031002 0.017235 0.050212 0.068844 -0.052336 -0.091898 0.240929 -0.123371 -0.004094 0.099499 -0.095697 -0.192394 0.166625 -0.092753 -0.139534 -0.047868 0.042766 0.186704 0.021796 0.018163 -0.016969 0.065114 -0.043754 -0.051935 -0.024208 -0.049579 -0.030092 -0.069234 0.149659 -0.071942 0.045299 -0.067596 -0.091622 0.060503 -0.197020 -0.083051 0.139514 0.033104 0.107531 -22 -0.013324 -0.230635 0.030984 0.088591 0.098398 -0.146396 0.163462 0.062319 0.023642 -0.020648 -0.099384 0.080721 0.024131 -0.211143 0.037357 -0.247551 0.004881 -0.220844 -0.026082 0.005365 0.235157 -0.011633 0.157542 0.093512 0.056209 -0.167003 -0.100317 -0.023583 0.006637 0.191703 -0.011954 0.047766 -0.152447 -0.027922 -0.108449 -0.136209 0.200194 0.117877 -0.060827 -0.089997 -0.109545 0.076386 0.139793 0.174219 -0.123702 0.133703 0.092394 -0.075265 -0.038618 0.153266 -0.011535 0.186323 -0.021598 0.141207 -0.172176 -0.175036 -0.060644 0.081859 0.082517 -0.028496 0.111178 -0.097627 -0.040972 0.160566 -0.045475 -0.110720 -0.077981 0.050738 0.134783 -0.140888 -0.071112 0.053822 -0.108662 -0.030518 0.117828 -0.066935 0.077736 0.026950 0.008733 -0.083885 -0.111567 -0.043562 0.023202 -0.124658 -0.096351 0.084337 0.097076 0.012733 -0.073115 0.064600 0.044382 0.006441 0.085194 -0.057568 -0.141985 0.225707 -0.083323 -0.081665 0.116622 -0.032448 -0.156502 0.157640 -0.117900 -0.110662 -0.090711 0.023899 0.173482 0.005736 0.018212 -0.042824 0.065584 0.007001 -0.030511 0.008229 -0.081783 -0.010709 -0.054062 0.172114 -0.066413 -0.011092 -0.042839 -0.063262 0.123231 -0.230025 -0.185480 0.091342 0.055049 0.181622 -18 -0.015417 -0.214277 0.033568 0.083404 0.097872 -0.136150 0.154404 0.056222 0.014330 -0.011620 -0.096729 0.072457 0.026223 -0.203257 0.023977 -0.237472 0.008288 -0.197167 -0.026483 0.010702 0.221724 -0.008589 0.155733 0.087548 0.049251 -0.153534 -0.090079 -0.016234 0.008239 0.184684 -0.013617 0.052859 -0.144751 -0.016284 -0.107096 -0.126905 0.189289 0.107405 -0.049989 -0.084131 -0.112761 0.084732 0.130879 0.158727 -0.113563 0.124991 0.077743 -0.078021 -0.041654 0.141569 -0.004045 0.177146 -0.013704 0.141408 -0.172169 -0.155901 -0.059869 0.069247 0.066292 -0.015319 0.100727 -0.096667 -0.051426 0.146661 -0.030962 -0.120108 -0.069545 0.041643 0.121472 -0.126646 -0.059835 0.045643 -0.093986 -0.024604 0.099842 -0.067088 0.080691 0.030676 0.013505 -0.064096 -0.106151 -0.051701 0.018067 -0.132257 -0.087097 0.100177 0.077086 0.015532 -0.081299 0.060097 0.039849 0.012022 0.080452 -0.060635 -0.132832 0.221661 -0.089939 -0.063792 0.107578 -0.042526 -0.156943 0.152924 -0.109913 -0.109055 -0.076827 0.024759 0.166726 0.006231 0.015524 -0.042275 0.060000 -0.004546 -0.034933 0.000960 -0.073690 -0.017648 -0.052686 0.157787 -0.060782 -0.004237 -0.047121 -0.067493 0.103352 -0.211196 -0.156758 0.091279 0.046849 0.158989 -15 -0.038742 -0.181860 0.076422 0.131426 0.114511 -0.098082 0.178220 -0.010870 -0.073351 0.011362 -0.102118 0.022538 0.017705 -0.190122 -0.066676 -0.203583 0.031577 -0.098463 0.013143 0.065663 0.161812 -0.006469 0.180472 0.057628 -0.016408 -0.140837 -0.057834 -0.035330 -0.027581 0.201601 -0.016786 0.068714 -0.151775 0.025442 -0.151069 -0.079852 0.197741 0.085267 -0.030836 -0.120840 -0.152323 0.180546 0.119559 0.091588 -0.062273 0.098330 0.021480 -0.112452 -0.068518 0.119750 0.070742 0.224954 -0.001131 0.101519 -0.157838 -0.112446 -0.068340 0.037492 0.016034 0.044963 0.041362 -0.083752 -0.120886 0.138655 0.039096 -0.254089 -0.061011 0.000680 0.064335 -0.045800 0.042723 0.005416 -0.063940 0.055067 -0.029680 -0.099408 0.048734 0.063396 0.055471 0.101279 -0.075155 -0.121488 0.004649 -0.219147 -0.012046 0.203272 -0.072077 0.048097 -0.141964 0.013599 0.007170 0.079798 0.055884 -0.049460 -0.051760 0.258512 -0.143937 0.057159 0.090866 -0.145522 -0.209132 0.174501 -0.077477 -0.152952 -0.012560 0.055284 0.195636 0.030398 0.019197 -0.000547 0.072016 -0.072628 -0.064853 -0.043871 -0.032922 -0.043684 -0.075295 0.130494 -0.071538 0.086941 -0.077534 -0.114188 0.026202 -0.169823 -0.011775 0.173016 0.026535 0.057350 -21 -0.037420 -0.185872 0.075212 0.132879 0.113678 -0.109162 0.182616 -0.006157 -0.069429 0.015099 -0.106679 0.026768 0.019289 -0.203249 -0.063559 -0.210844 0.034637 -0.106484 0.014566 0.065066 0.167636 -0.008758 0.178195 0.061537 -0.008662 -0.145704 -0.056369 -0.033000 -0.026404 0.205464 -0.012827 0.065584 -0.158924 0.023812 -0.155812 -0.089556 0.206468 0.094386 -0.034654 -0.124362 -0.155135 0.181627 0.122971 0.098689 -0.074483 0.101350 0.031976 -0.111959 -0.065660 0.129340 0.071359 0.233446 0.000091 0.104223 -0.162610 -0.115789 -0.067045 0.041616 0.017170 0.040659 0.043462 -0.090928 -0.119716 0.149512 0.037119 -0.255346 -0.065734 0.000813 0.065450 -0.061481 0.036765 0.007069 -0.071992 0.055703 -0.015099 -0.099670 0.046978 0.058795 0.057399 0.096904 -0.081081 -0.120390 0.004858 -0.215712 -0.017138 0.200160 -0.064753 0.048738 -0.137154 0.013772 0.011125 0.075829 0.053434 -0.044530 -0.054077 0.259626 -0.147324 0.047909 0.094240 -0.146305 -0.214405 0.176736 -0.080233 -0.151088 -0.017518 0.060732 0.197316 0.025517 0.018441 -0.000798 0.078102 -0.066112 -0.064402 -0.035222 -0.035596 -0.042058 -0.081538 0.131786 -0.067695 0.083589 -0.080054 -0.110993 0.029966 -0.183058 -0.017387 0.173030 0.031126 0.062610 -17 -0.003820 -0.258748 0.007511 0.073799 0.087261 -0.166561 0.164332 0.103193 0.086301 -0.032979 -0.095967 0.114296 0.023835 -0.209231 0.087164 -0.264820 -0.005851 -0.289037 -0.053388 -0.027257 0.271622 -0.022929 0.143493 0.109600 0.104106 -0.188538 -0.123153 -0.017891 0.014887 0.181230 -0.006468 0.039014 -0.144970 -0.062753 -0.089660 -0.163364 0.210163 0.142884 -0.075443 -0.073802 -0.096341 0.035099 0.155266 0.212611 -0.160332 0.146925 0.138555 -0.055615 -0.024053 0.181979 -0.048630 0.178089 -0.024028 0.157596 -0.180789 -0.216782 -0.066465 0.103743 0.122200 -0.068508 0.147465 -0.110282 0.003898 0.167303 -0.087383 -0.037417 -0.086446 0.072099 0.169359 -0.193109 -0.129178 0.071290 -0.134761 -0.075162 0.187776 -0.051138 0.093057 0.001767 -0.021234 -0.181341 -0.128279 0.001945 0.036843 -0.066448 -0.146718 0.022767 0.183774 -0.000486 -0.038667 0.091760 0.056919 -0.029739 0.102607 -0.047634 -0.190349 0.213284 -0.046043 -0.152270 0.132497 0.031042 -0.125947 0.157345 -0.144651 -0.101663 -0.137798 0.000075 0.170512 -0.016977 0.015676 -0.058873 0.058126 0.048742 -0.019961 0.026767 -0.105519 0.007691 -0.039213 0.200967 -0.064217 -0.058417 -0.023250 -0.029369 0.180964 -0.262813 -0.285498 0.048213 0.068696 0.249874 -27 -0.032797 -0.195288 0.069216 0.133867 0.116239 -0.108610 0.185740 0.001955 -0.069550 0.011745 -0.109189 0.025742 0.016789 -0.195331 -0.063692 -0.214459 0.037900 -0.112968 0.012206 0.062131 0.171920 -0.008059 0.188124 0.063971 -0.005678 -0.152287 -0.063619 -0.034477 -0.029278 0.212291 -0.014158 0.063290 -0.162670 0.017202 -0.150986 -0.090824 0.207567 0.092173 -0.041477 -0.128388 -0.156671 0.178513 0.125848 0.103988 -0.072492 0.104946 0.038800 -0.114130 -0.067412 0.133543 0.070577 0.235690 -0.006427 0.108785 -0.161936 -0.123446 -0.068509 0.048673 0.026509 0.039035 0.043219 -0.087330 -0.122572 0.151673 0.033088 -0.258004 -0.068483 0.008963 0.071063 -0.058590 0.029735 0.015208 -0.073341 0.049656 -0.017644 -0.104967 0.052495 0.060227 0.055990 0.094341 -0.085367 -0.121196 0.006500 -0.214212 -0.020933 0.204051 -0.055618 0.050813 -0.148297 0.018130 0.005314 0.070086 0.059821 -0.045203 -0.059863 0.264393 -0.141945 0.042087 0.099247 -0.147625 -0.218392 0.180336 -0.078466 -0.159995 -0.019518 0.060393 0.201583 0.023242 0.017542 0.004161 0.076312 -0.067760 -0.062782 -0.038626 -0.033126 -0.048242 -0.077472 0.137043 -0.071898 0.078713 -0.083230 -0.106774 0.034610 -0.186021 -0.027404 0.176257 0.028692 0.068829 -13 -0.016251 -0.202210 0.044203 0.088466 0.096507 -0.126337 0.159309 0.039759 -0.004149 -0.007473 -0.097593 0.056936 0.024332 -0.200536 0.006949 -0.221349 0.011997 -0.176938 -0.019105 0.014213 0.206004 -0.006592 0.149878 0.080568 0.030047 -0.150166 -0.079204 -0.015752 0.001669 0.182771 -0.015711 0.054895 -0.149152 -0.011902 -0.116221 -0.118940 0.195229 0.105479 -0.047902 -0.094361 -0.116620 0.097874 0.131618 0.149932 -0.103497 0.118331 0.071413 -0.082989 -0.049628 0.140691 0.008150 0.189791 -0.012064 0.134192 -0.171061 -0.151393 -0.064362 0.064645 0.060668 -0.005979 0.090131 -0.090428 -0.066909 0.141823 -0.018341 -0.146798 -0.070384 0.033815 0.111008 -0.118907 -0.042392 0.039118 -0.091291 -0.008561 0.075513 -0.077024 0.069740 0.033468 0.017223 -0.035632 -0.096566 -0.069470 0.013172 -0.144055 -0.072647 0.118751 0.048097 0.022301 -0.091841 0.053665 0.037036 0.025523 0.069075 -0.060058 -0.112545 0.225052 -0.098872 -0.048194 0.099967 -0.061093 -0.164970 0.155288 -0.104344 -0.117836 -0.068449 0.027251 0.178255 0.012349 0.016290 -0.034276 0.068646 -0.016171 -0.038488 -0.000464 -0.068722 -0.024287 -0.060751 0.158546 -0.063426 0.011159 -0.052163 -0.070405 0.093148 -0.201658 -0.132882 0.112448 0.048381 0.147612 -16 -0.035511 -0.202433 0.074765 0.137003 0.120944 -0.118830 0.192556 0.003611 -0.063796 0.008194 -0.113915 0.035100 0.017987 -0.209628 -0.054792 -0.230371 0.031344 -0.130028 0.005399 0.060351 0.183567 -0.007812 0.195586 0.067352 -0.000746 -0.156934 -0.066758 -0.034735 -0.023160 0.217018 -0.017857 0.071274 -0.169265 0.016625 -0.155325 -0.095593 0.218590 0.094721 -0.042875 -0.128845 -0.157651 0.177348 0.135764 0.114344 -0.078313 0.107115 0.041332 -0.114620 -0.072006 0.140880 0.065547 0.235994 -0.004975 0.119182 -0.171357 -0.127930 -0.069664 0.051912 0.027303 0.033553 0.053918 -0.097968 -0.121946 0.152773 0.029148 -0.257393 -0.069424 0.013343 0.078061 -0.073751 0.027466 0.012917 -0.077428 0.046133 -0.000511 -0.104481 0.061024 0.061056 0.054253 0.080574 -0.086068 -0.123117 0.007125 -0.222340 -0.023653 0.207997 -0.047278 0.051753 -0.143597 0.023012 0.008929 0.073471 0.060931 -0.052019 -0.070416 0.277388 -0.146257 0.033883 0.101535 -0.145533 -0.219929 0.189848 -0.087398 -0.162705 -0.030509 0.055889 0.206549 0.028086 0.023714 -0.004239 0.078097 -0.061218 -0.065298 -0.034074 -0.040147 -0.044006 -0.080637 0.149864 -0.073541 0.072015 -0.083348 -0.112196 0.039528 -0.199341 -0.039605 0.177897 0.033195 0.086814 -10 -0.027585 -0.194088 0.059197 0.119915 0.110972 -0.109276 0.174774 0.014288 -0.042095 -0.000858 -0.100241 0.042340 0.015622 -0.194950 -0.039811 -0.211997 0.029892 -0.130081 -0.000935 0.049677 0.180622 -0.008247 0.175306 0.071121 0.006256 -0.149930 -0.067700 -0.032686 -0.017300 0.199301 -0.015064 0.058100 -0.155358 0.013960 -0.142041 -0.098388 0.204942 0.091471 -0.045583 -0.114575 -0.142506 0.146433 0.133451 0.124666 -0.082104 0.110766 0.052044 -0.105706 -0.057250 0.130125 0.046488 0.215138 -0.005023 0.115444 -0.165828 -0.127673 -0.063402 0.051871 0.031160 0.026004 0.060362 -0.090928 -0.104118 0.150557 0.012237 -0.215296 -0.068170 0.014316 0.081505 -0.084544 0.007917 0.021841 -0.077922 0.032457 0.022610 -0.088440 0.057599 0.052640 0.040163 0.045724 -0.092811 -0.102962 0.008316 -0.186414 -0.038811 0.175640 -0.023215 0.038689 -0.124989 0.026973 0.020717 0.056677 0.068148 -0.047932 -0.078206 0.253664 -0.129277 0.009820 0.098881 -0.111326 -0.200409 0.169981 -0.092098 -0.142026 -0.041227 0.047309 0.194378 0.028011 0.015665 -0.009721 0.075129 -0.051944 -0.050750 -0.027879 -0.043975 -0.038653 -0.069770 0.145200 -0.069432 0.052571 -0.072166 -0.100100 0.053520 -0.189645 -0.060939 0.148898 0.033137 0.097645 -19 -0.027732 -0.191410 0.071374 0.126421 0.111070 -0.107876 0.176676 0.002585 -0.056623 0.007753 -0.107521 0.034261 0.017425 -0.196124 -0.054218 -0.209829 0.028584 -0.119391 0.010084 0.061913 0.173410 -0.011680 0.179995 0.065147 -0.001625 -0.142977 -0.063433 -0.032499 -0.024087 0.202601 -0.014654 0.065021 -0.157166 0.014068 -0.147521 -0.090713 0.201155 0.093299 -0.036933 -0.116991 -0.146786 0.164469 0.126893 0.108293 -0.072268 0.104702 0.041552 -0.111925 -0.064965 0.130148 0.057931 0.221071 -0.006557 0.107943 -0.158583 -0.118838 -0.068929 0.049770 0.029246 0.031526 0.049580 -0.091647 -0.108593 0.149711 0.025498 -0.241462 -0.062069 0.007796 0.072132 -0.069099 0.023806 0.015895 -0.073660 0.045420 -0.004704 -0.098964 0.055777 0.060156 0.047455 0.072451 -0.083713 -0.113153 0.005848 -0.206370 -0.026630 0.191575 -0.042831 0.048624 -0.137622 0.019955 0.012079 0.069761 0.063439 -0.046717 -0.067139 0.259465 -0.137931 0.031655 0.095115 -0.133943 -0.204654 0.176064 -0.087317 -0.151846 -0.029655 0.053095 0.194311 0.026293 0.017557 -0.005392 0.070930 -0.060431 -0.056958 -0.035492 -0.035889 -0.039722 -0.070587 0.140291 -0.072385 0.073148 -0.077481 -0.105852 0.037980 -0.184668 -0.039701 0.162597 0.027359 0.074613 -23 -0.033210 -0.184668 0.068293 0.134566 0.114270 -0.104607 0.178711 0.001390 -0.065522 0.009054 -0.106138 0.027556 0.018583 -0.199027 -0.063989 -0.206200 0.033015 -0.105313 0.014056 0.067949 0.166519 -0.009827 0.178455 0.063617 -0.006858 -0.146158 -0.062987 -0.037576 -0.029466 0.209047 -0.015356 0.066887 -0.160449 0.021807 -0.155659 -0.090378 0.207961 0.088324 -0.040872 -0.123138 -0.151890 0.176836 0.126912 0.103274 -0.069964 0.100173 0.030907 -0.109336 -0.069626 0.127748 0.067974 0.227998 -0.000047 0.103285 -0.161713 -0.120993 -0.068281 0.045139 0.020741 0.034411 0.045739 -0.092077 -0.115655 0.148847 0.035084 -0.252089 -0.062115 0.008133 0.069460 -0.054618 0.033052 0.012278 -0.067598 0.053203 -0.019130 -0.097868 0.052464 0.061815 0.059491 0.093377 -0.076428 -0.121617 0.008539 -0.212696 -0.018011 0.203398 -0.061353 0.051035 -0.142295 0.014292 0.010089 0.074233 0.053472 -0.047926 -0.059945 0.256730 -0.142284 0.041962 0.092629 -0.139135 -0.213449 0.175437 -0.076828 -0.156503 -0.021848 0.053856 0.192573 0.025691 0.021557 0.001070 0.069397 -0.069436 -0.063409 -0.040655 -0.036039 -0.045850 -0.079422 0.133649 -0.068391 0.078443 -0.077553 -0.106411 0.028381 -0.181549 -0.019292 0.166210 0.027056 0.064981 -12 -0.021273 -0.227668 0.038558 0.090407 0.099022 -0.140312 0.164310 0.052033 0.009098 -0.013410 -0.097127 0.077289 0.022158 -0.207640 0.021235 -0.240299 0.006905 -0.202974 -0.020711 0.006990 0.226572 -0.006636 0.162136 0.086941 0.045041 -0.167648 -0.092316 -0.020622 0.001998 0.192480 -0.011041 0.053884 -0.157959 -0.018872 -0.112976 -0.125813 0.202160 0.115976 -0.054787 -0.090532 -0.118860 0.091723 0.145099 0.166208 -0.120442 0.130763 0.087454 -0.084446 -0.048270 0.149799 -0.000930 0.194755 -0.016566 0.135677 -0.178469 -0.171928 -0.064194 0.077802 0.071331 -0.014707 0.104369 -0.097693 -0.055641 0.159833 -0.037280 -0.129457 -0.070626 0.042838 0.127276 -0.129173 -0.052652 0.049975 -0.101368 -0.018337 0.098129 -0.073238 0.081095 0.033745 0.013358 -0.056144 -0.107931 -0.054709 0.014202 -0.137441 -0.089349 0.108359 0.072167 0.020375 -0.085731 0.056013 0.039300 0.019503 0.078827 -0.056725 -0.129791 0.227478 -0.089512 -0.069089 0.113283 -0.044508 -0.168694 0.157203 -0.113424 -0.119602 -0.082808 0.021959 0.180182 0.008560 0.020195 -0.041412 0.060062 -0.004268 -0.036427 0.002955 -0.074725 -0.013607 -0.056984 0.172375 -0.070957 -0.001239 -0.051512 -0.070750 0.111761 -0.219853 -0.165831 0.102570 0.048231 0.167540 diff --git a/graph/karate.edgelist b/graph/karate.edgelist deleted file mode 100644 index fa6fc750..00000000 --- a/graph/karate.edgelist +++ /dev/null @@ -1,78 +0,0 @@ -1 32 -1 22 -1 20 -1 18 -1 14 -1 13 -1 12 -1 11 -1 9 -1 8 -1 7 -1 6 -1 5 -1 4 -1 3 -1 2 -2 31 -2 22 -2 20 -2 18 -2 14 -2 8 -2 4 -2 3 -3 14 -3 9 -3 10 -3 33 -3 29 -3 28 -3 8 -3 4 -4 14 -4 13 -4 8 -5 11 -5 7 -6 17 -6 11 -6 7 -7 17 -9 34 -9 33 -9 33 -10 34 -14 34 -15 34 -15 33 -16 34 -16 33 -19 34 -19 33 -20 34 -21 34 -21 33 -23 34 -23 33 -24 30 -24 34 -24 33 -24 28 -24 26 -25 32 -25 28 -25 26 -26 32 -27 34 -27 30 -28 34 -29 34 -29 32 -30 34 -30 33 -31 34 -31 33 -32 34 -32 33 -33 34 \ No newline at end of file diff --git a/node2vec_spark/README.md b/node2vec_spark/README.md deleted file mode 100644 index 43f6684f..00000000 --- a/node2vec_spark/README.md +++ /dev/null @@ -1,139 +0,0 @@ -# node2vec on spark - -This library is a implementation using scala for running on spark of *node2vec* as described in the paper: -> node2vec: Scalable Feature Learning for Networks. -> Aditya Grover and Jure Leskovec. -> Knowledge Discovery and Data Mining, 2016. -> - -The *node2vec* algorithm learns continuous representations for nodes in any (un)directed, (un)weighted graph. Please check the [project page](https://snap.stanford.edu/node2vec/) for more details. - - -### Building node2vec_spark -**In order to build node2vec_spark, use the following:** - -``` -$ git clone https://github.com/Skarface-/node2vec.git -$ mvn clean package -``` - -**and requires:**
-Maven 3.0.5 or newer
-Java 7+
-Scala 2.10 or newer. - -This will produce jar file in "node2vec_spark/target/" - -### Examples -This library has two functions: *randomwalk* and *embedding*.
-These were described in these papers [node2vec: Scalable Feature Learning for Networks](http://arxiv.org/abs/1607.00653) and [Efficient Estimation of Word Representations in Vector Space](https://arxiv.org/abs/1301.3781). - -### Random walk -Example: - - ./spark-submit --class com.navercorp.Main \ - ./node2vec_spark/target/node2vec-0.0.1-SNAPSHOT.jar \ - --cmd randomwalk --p 100.0 --q 100.0 --walkLength 40 \ - --input --output - -#### Options -Invoke a command without arguments to list available arguments and their default values: - -``` ---cmd COMMAND - Functions: randomwalk or embedding. If you want to execute all functions "randomwalk" and "embedding" sequentially input "node2vec". Default "node2vec" ---input [INPUT] - Input edgelist path. The supported input format is an edgelist: "node1_id_int node2_id_int " ---output [OUTPUT] - Random paths path. ---walkLength WALK_LENGTH - Length of walk per source. Default is 80. ---numWalks NUM_WALKS - Number of walks per source. Default is 10. ---p P - Return hyperparaemter. Default is 1.0. ---q Q - Inout hyperparameter. Default is 1.0. ---weighted Boolean - Specifying (un)weighted. Default is true. ---directed Boolean - Specifying (un)directed. Default is false. ---degree UPPER_BOUND_OF_NUMBER_OF_NEIGHBORS - Specifying upper bound of number of neighbors. Default is 30. ---indexed Boolean - Specifying whether nodes in edgelist are indexed or not. Default is true. -``` - -* If "indexed" is set to false, *node2vec_spark* index nodes in input edgelist, example:
- **unindexed edgelist:**
- node1 node2 1.0
- node2 node7 1.0
- - **indexed:**
- 1 2 1.0
- 2 3 1.0
- - 1 node1
- 2 node2
- 3 node7 - -#### Input -The supported input format is an edgelist: - - node1_id_int node2_id_int - or - node1_str node2_str , Please set the option "indexed" to false - - -#### Output -The output file (number of nodes)*numWalks random paths as follows: - - src_node_id_int node1_id_int node2_id_int ... noden_id_int - - -### Embedding random paths -Example: - - ./spark-submit --class com.navercorp.Main \ - ./node2vec_spark/target/node2vec-0.0.1-SNAPSHOT.jar \ - --cmd embedding --dim 50 --iter 20 \ - --input --nodePath --output - -#### Options -Invoke a command without arguments to list available arguments and their default values: - -``` ---cmd COMMAND - embedding. If you want to execute sequentially all functions: "randomwalk" and "embedding", input "node2vec". default "node2vec" ---input [INPUT] - Input random paths. The supported input format is an random paths: "src_node_id_int node1_id_int ... noden_id_int" ---output [OUTPUT] - word2vec model(.bin) and embeddings(.emb). ---nodePath [NODE\_PATH] - Input node2index path. The supported input format: "node1_str node1_id_int" ---iter ITERATION - Number of epochs in SGD. Default 10. ---dim DIMENSION - Number of dimensions. Default is 128. ---window WINDOW_SIZE - Context size for optimization. Default is 10. - -``` - -#### Input -The supported input format is an random paths: - - src_node_id_int node1_id_int ... noden_id_int - -#### Output -The output files are **embeddings and word2vec model.** The embeddings file has the following format: - - node1_str dim1 dim2 ... dimd - -where dim1, ... , dimd is the d-dimensional representation learned by word2vec. - -the output file *word2vec model* has the spark word2vec model format. please reference to https://spark.apache.org/docs/1.5.2/mllib-feature-extraction.html#word2vec - -## References -1. [node2vec: Scalable Feature Learning for Networks](http://arxiv.org/abs/1607.00653) -2. [Efficient Estimation of Word Representations in Vector Space](https://arxiv.org/abs/1301.3781) \ No newline at end of file diff --git a/node2vec_spark/pom.xml b/node2vec_spark/pom.xml deleted file mode 100644 index b9585761..00000000 --- a/node2vec_spark/pom.xml +++ /dev/null @@ -1,129 +0,0 @@ - - - - 4.0.0 - - com.navercorp - node2vec - jar - 0.0.1-SNAPSHOT - - node2vec_spark - http://snap.stanford.edu/node2vec/ - - - UTF-8 - bin - 2.4.3 - 1.4.0 - 1.7 - 2.10 - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - - - - compile - - - - - - org.apache.maven.plugins - maven-dependency-plugin - 2.4 - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/lib - - - - - - org.apache.maven.plugins - maven-shade-plugin - 1.6 - - - package - - shade - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.7 - 1.7 - UTF-8 - - - - org.apache.maven.plugins - maven-surefire-plugin - - false - - - - - - - - org.apache.hadoop - hadoop-hdfs - 2.7.1 - - - org.scala-lang - scala-library - ${scala.binary.version}.5 - provided - - - org.apache.spark - spark-core_${scala.binary.version} - 1.6.1 - provided - - - org.apache.spark - spark-mllib_${scala.binary.version} - 1.6.1 - provided - - - com.github.scopt - scopt_${scala.binary.version} - 3.3.0 - - - org.scala-lang - scala-library - - - - - com.google.guava - guava - 19.0 - - - - diff --git a/node2vec_spark/src/main/resources/log4j2.properties b/node2vec_spark/src/main/resources/log4j2.properties deleted file mode 100644 index d941e1b9..00000000 --- a/node2vec_spark/src/main/resources/log4j2.properties +++ /dev/null @@ -1,9 +0,0 @@ - -appender.out.type = Console -appender.out.name = out -appender.out.layout.type = PatternLayout -appender.out.layout.pattern = [%30.30t] %-30.30c{1} %-5p %m%n -logger.springframework.name = org.springframework -logger.springframework.level = WARN -rootLogger.level = INFO -rootLogger.appenderRef.out.ref = out diff --git a/node2vec_spark/src/main/scala/com/navercorp/Main.scala b/node2vec_spark/src/main/scala/com/navercorp/Main.scala deleted file mode 100644 index f3494e54..00000000 --- a/node2vec_spark/src/main/scala/com/navercorp/Main.scala +++ /dev/null @@ -1,119 +0,0 @@ -package com.navercorp - -import java.io.Serializable -import org.apache.spark.{SparkContext, SparkConf} -import scopt.OptionParser -import com.navercorp.lib.AbstractParams - -object Main { - object Command extends Enumeration { - type Command = Value - val node2vec, randomwalk, embedding = Value - } - import Command._ - - case class Params(iter: Int = 10, - lr: Double = 0.025, - numPartition: Int = 10, - dim: Int = 128, - window: Int = 10, - walkLength: Int = 80, - numWalks: Int = 10, - p: Double = 1.0, - q: Double = 1.0, - weighted: Boolean = true, - directed: Boolean = false, - degree: Int = 30, - indexed: Boolean = true, - nodePath: String = null, - input: String = null, - output: String = null, - cmd: Command = Command.node2vec) extends AbstractParams[Params] with Serializable - val defaultParams = Params() - - val parser = new OptionParser[Params]("Node2Vec_Spark") { - head("Main") - opt[Int]("walkLength") - .text(s"walkLength: ${defaultParams.walkLength}") - .action((x, c) => c.copy(walkLength = x)) - opt[Int]("numWalks") - .text(s"numWalks: ${defaultParams.numWalks}") - .action((x, c) => c.copy(numWalks = x)) - opt[Double]("p") - .text(s"return parameter p: ${defaultParams.p}") - .action((x, c) => c.copy(p = x)) - opt[Double]("q") - .text(s"in-out parameter q: ${defaultParams.q}") - .action((x, c) => c.copy(q = x)) - opt[Boolean]("weighted") - .text(s"weighted: ${defaultParams.weighted}") - .action((x, c) => c.copy(weighted = x)) - opt[Boolean]("directed") - .text(s"directed: ${defaultParams.directed}") - .action((x, c) => c.copy(directed = x)) - opt[Int]("degree") - .text(s"degree: ${defaultParams.degree}") - .action((x, c) => c.copy(degree = x)) - opt[Boolean]("indexed") - .text(s"Whether nodes are indexed or not: ${defaultParams.indexed}") - .action((x, c) => c.copy(indexed = x)) - opt[String]("nodePath") - .text("Input node2index file path: empty") - .action((x, c) => c.copy(nodePath = x)) - opt[String]("input") - .required() - .text("Input edge file path: empty") - .action((x, c) => c.copy(input = x)) - opt[String]("output") - .required() - .text("Output path: empty") - .action((x, c) => c.copy(output = x)) - opt[String]("cmd") - .required() - .text(s"command: ${defaultParams.cmd.toString}") - .action((x, c) => c.copy(cmd = Command.withName(x))) - note( - """ - |For example, the following command runs this app on a synthetic dataset: - | - | bin/spark-submit --class com.nhn.sunny.vegapunk.ml.model.Node2vec \ - """.stripMargin + - s"| --lr ${defaultParams.lr}" + - s"| --iter ${defaultParams.iter}" + - s"| --numPartition ${defaultParams.numPartition}" + - s"| --dim ${defaultParams.dim}" + - s"| --window ${defaultParams.window}" + - s"| --input " + - s"| --node " + - s"| --output " - ) - } - - def main(args: Array[String]) = { - parser.parse(args, defaultParams).map { param => - val conf = new SparkConf().setAppName("Node2Vec") - val context: SparkContext = new SparkContext(conf) - - Node2vec.setup(context, param) - - param.cmd match { - case Command.node2vec => Node2vec.load() - .initTransitionProb() - .randomWalk() - .embedding() - .save() - case Command.randomwalk => Node2vec.load() - .initTransitionProb() - .randomWalk() - .saveRandomPath() - case Command.embedding => { - val randomPaths = Word2vec.setup(context, param).read(param.input) - Word2vec.fit(randomPaths).save(param.output) - Node2vec.loadNode2Id(param.nodePath).saveVectors() - } - } - } getOrElse { - sys.exit(1) - } - } -} diff --git a/node2vec_spark/src/main/scala/com/navercorp/Node2vec.scala b/node2vec_spark/src/main/scala/com/navercorp/Node2vec.scala deleted file mode 100644 index 07ec21a1..00000000 --- a/node2vec_spark/src/main/scala/com/navercorp/Node2vec.scala +++ /dev/null @@ -1,281 +0,0 @@ -package com.navercorp - - -import java.io.Serializable -import scala.util.Try -import scala.collection.mutable.ArrayBuffer -import org.slf4j.{Logger, LoggerFactory} -import org.apache.spark.SparkContext -import org.apache.spark.rdd.RDD -import org.apache.spark.graphx.{EdgeTriplet, Graph, _} -import com.navercorp.graph.{GraphOps, EdgeAttr, NodeAttr} - -object Node2vec extends Serializable { - lazy val logger: Logger = LoggerFactory.getLogger(getClass.getName); - - var context: SparkContext = null - var config: Main.Params = null - var node2id: RDD[(String, Long)] = null - var indexedEdges: RDD[Edge[EdgeAttr]] = _ - var indexedNodes: RDD[(VertexId, NodeAttr)] = _ - var graph: Graph[NodeAttr, EdgeAttr] = _ - var randomWalkPaths: RDD[(Long, ArrayBuffer[Long])] = null - - def setup(context: SparkContext, param: Main.Params): this.type = { - this.context = context - this.config = param - - this - } - - def load(): this.type = { - val bcMaxDegree = context.broadcast(config.degree) - val bcEdgeCreator = config.directed match { - case true => context.broadcast(GraphOps.createDirectedEdge) - case false => context.broadcast(GraphOps.createUndirectedEdge) - } - - val inputTriplets: RDD[(Long, Long, Double)] = config.indexed match { - case true => readIndexedGraph(config.input) - case false => indexingGraph(config.input) - } - - indexedNodes = inputTriplets.flatMap { case (srcId, dstId, weight) => - bcEdgeCreator.value.apply(srcId, dstId, weight) - }.reduceByKey(_++_).map { case (nodeId, neighbors: Array[(VertexId, Double)]) => - var neighbors_ = neighbors - if (neighbors_.length > bcMaxDegree.value) { - neighbors_ = neighbors.sortWith{ case (left, right) => left._2 > right._2 }.slice(0, bcMaxDegree.value) - } - - (nodeId, NodeAttr(neighbors = neighbors_.distinct)) - }.repartition(200).cache - - indexedEdges = indexedNodes.flatMap { case (srcId, clickNode) => - clickNode.neighbors.map { case (dstId, weight) => - Edge(srcId, dstId, EdgeAttr()) - } - }.repartition(200).cache - - this - } - - def initTransitionProb(): this.type = { - val bcP = context.broadcast(config.p) - val bcQ = context.broadcast(config.q) - - graph = Graph(indexedNodes, indexedEdges) - .mapVertices[NodeAttr] { case (vertexId, clickNode) => - val (j, q) = GraphOps.setupAlias(clickNode.neighbors) - val nextNodeIndex = GraphOps.drawAlias(j, q) - clickNode.path = Array(vertexId, clickNode.neighbors(nextNodeIndex)._1) - - clickNode - } - .mapTriplets { edgeTriplet: EdgeTriplet[NodeAttr, EdgeAttr] => - val (j, q) = GraphOps.setupEdgeAlias(bcP.value, bcQ.value)(edgeTriplet.srcId, edgeTriplet.srcAttr.neighbors, edgeTriplet.dstAttr.neighbors) - edgeTriplet.attr.J = j - edgeTriplet.attr.q = q - edgeTriplet.attr.dstNeighbors = edgeTriplet.dstAttr.neighbors.map(_._1) - - edgeTriplet.attr - }.cache - - this - } - - def randomWalk(): this.type = { - val edge2attr = graph.triplets.map { edgeTriplet => - (s"${edgeTriplet.srcId}${edgeTriplet.dstId}", edgeTriplet.attr) - }.repartition(200).cache - edge2attr.first - - for (iter <- 0 until config.numWalks) { - var prevWalk: RDD[(Long, ArrayBuffer[Long])] = null - var randomWalk = graph.vertices.map { case (nodeId, clickNode) => - val pathBuffer = new ArrayBuffer[Long]() - pathBuffer.append(clickNode.path:_*) - (nodeId, pathBuffer) - }.cache - var activeWalks = randomWalk.first - graph.unpersist(blocking = false) - graph.edges.unpersist(blocking = false) - for (walkCount <- 0 until config.walkLength) { - prevWalk = randomWalk - randomWalk = randomWalk.map { case (srcNodeId, pathBuffer) => - val prevNodeId = pathBuffer(pathBuffer.length - 2) - val currentNodeId = pathBuffer.last - - (s"$prevNodeId$currentNodeId", (srcNodeId, pathBuffer)) - }.join(edge2attr).map { case (edge, ((srcNodeId, pathBuffer), attr)) => - try { - val nextNodeIndex = GraphOps.drawAlias(attr.J, attr.q) - val nextNodeId = attr.dstNeighbors(nextNodeIndex) - pathBuffer.append(nextNodeId) - - (srcNodeId, pathBuffer) - } catch { - case e: Exception => throw new RuntimeException(e.getMessage) - } - }.cache - - activeWalks = randomWalk.first() - prevWalk.unpersist(blocking=false) - } - - - if (randomWalkPaths != null) { - val prevRandomWalkPaths = randomWalkPaths - randomWalkPaths = randomWalkPaths.union(randomWalk).cache() - randomWalkPaths.first - prevRandomWalkPaths.unpersist(blocking = false) - } else { - randomWalkPaths = randomWalk - } - } - - this - } - - def embedding(): this.type = { - val randomPaths = randomWalkPaths.map { case (vertexId, pathBuffer) => - Try(pathBuffer.map(_.toString).toIterable).getOrElse(null) - }.filter(_!=null) - - Word2vec.setup(context, config).fit(randomPaths) - - this - } - - def save(): this.type = { - this.saveRandomPath() - .saveModel() - .saveVectors() - } - - def saveRandomPath(): this.type = { - randomWalkPaths - .map { case (vertexId, pathBuffer) => - Try(pathBuffer.mkString("\t")).getOrElse(null) - } - .filter(x => x != null && x.replaceAll("\\s", "").length > 0) - .repartition(200) - .saveAsTextFile(config.output) - - this - } - - def saveModel(): this.type = { - Word2vec.save(config.output) - - this - } - - def saveVectors(): this.type = { - val node2vector = context.parallelize(Word2vec.getVectors.toList) - .map { case (nodeId, vector) => - (nodeId.toLong, vector.mkString(",")) - } - - if (this.node2id != null) { - val id2Node = this.node2id.map{ case (strNode, index) => - (index, strNode) - } - - node2vector.join(id2Node) - .map { case (nodeId, (vector, name)) => s"$name\t$vector" } - .repartition(200) - .saveAsTextFile(s"${config.output}.emb") - } else { - node2vector.map { case (nodeId, vector) => s"$nodeId\t$vector" } - .repartition(200) - .saveAsTextFile(s"${config.output}.emb") - } - - this - } - - def cleanup(): this.type = { - node2id.unpersist(blocking = false) - indexedEdges.unpersist(blocking = false) - indexedNodes.unpersist(blocking = false) - graph.unpersist(blocking = false) - randomWalkPaths.unpersist(blocking = false) - - this - } - - def loadNode2Id(node2idPath: String): this.type = { - try { - this.node2id = context.textFile(config.nodePath).map { node2index => - val Array(strNode, index) = node2index.split("\\s") - (strNode, index.toLong) - } - } catch { - case e: Exception => logger.info("Failed to read node2index file.") - this.node2id = null - } - - this - } - - def readIndexedGraph(tripletPath: String) = { - val bcWeighted = context.broadcast(config.weighted) - - val rawTriplets = context.textFile(tripletPath) - if (config.nodePath == null) { - this.node2id = createNode2Id(rawTriplets.map { triplet => - val parts = triplet.split("\\s") - (parts.head, parts(1), -1) - }) - } else { - loadNode2Id(config.nodePath) - } - - rawTriplets.map { triplet => - val parts = triplet.split("\\s") - val weight = bcWeighted.value match { - case true => Try(parts.last.toDouble).getOrElse(1.0) - case false => 1.0 - } - - (parts.head.toLong, parts(1).toLong, weight) - } - } - - - def indexingGraph(rawTripletPath: String): RDD[(Long, Long, Double)] = { - val rawEdges = context.textFile(rawTripletPath).map { triplet => - val parts = triplet.split("\\s") - - Try { - (parts.head, parts(1), Try(parts.last.toDouble).getOrElse(1.0)) - }.getOrElse(null) - }.filter(_!=null) - - this.node2id = createNode2Id(rawEdges) - - rawEdges.map { case (src, dst, weight) => - (src, (dst, weight)) - }.join(node2id).map { case (src, (edge: (String, Double), srcIndex: Long)) => - try { - val (dst: String, weight: Double) = edge - (dst, (srcIndex, weight)) - } catch { - case e: Exception => null - } - }.filter(_!=null).join(node2id).map { case (dst, (edge: (Long, Double), dstIndex: Long)) => - try { - val (srcIndex, weight) = edge - (srcIndex, dstIndex, weight) - } catch { - case e: Exception => null - } - }.filter(_!=null) - } - - def createNode2Id[T <: Any](triplets: RDD[(String, String, T)]) = triplets.flatMap { case (src, dst, weight) => - Try(Array(src, dst)).getOrElse(Array.empty[String]) - }.distinct().zipWithIndex() - -} diff --git a/node2vec_spark/src/main/scala/com/navercorp/Word2vec.scala b/node2vec_spark/src/main/scala/com/navercorp/Word2vec.scala deleted file mode 100644 index aa209cf6..00000000 --- a/node2vec_spark/src/main/scala/com/navercorp/Word2vec.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.navercorp - -import org.apache.spark.SparkContext -import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel} -import org.apache.spark.rdd.RDD - -object Word2vec extends Serializable { - var context: SparkContext = null - var word2vec = new Word2Vec() - var model: Word2VecModel = null - - def setup(context: SparkContext, param: Main.Params): this.type = { - this.context = context - /** - * model = sg - * update = hs - */ - word2vec.setLearningRate(param.lr) - .setNumIterations(param.iter) - .setNumPartitions(param.numPartition) - .setMinCount(0) - .setVectorSize(param.dim) - - val word2vecWindowField = word2vec.getClass.getDeclaredField("org$apache$spark$mllib$feature$Word2Vec$$window") - word2vecWindowField.setAccessible(true) - word2vecWindowField.setInt(word2vec, param.window) - - this - } - - def read(path: String): RDD[Iterable[String]] = { - context.textFile(path).repartition(200).map(_.split("\\s").toSeq) - } - - def fit(input: RDD[Iterable[String]]): this.type = { - model = word2vec.fit(input) - - this - } - - def save(outputPath: String): this.type = { - model.save(context, s"$outputPath.bin") - this - } - - def load(path: String): this.type = { - model = Word2VecModel.load(context, path) - - this - } - - def getVectors = this.model.getVectors - -} - diff --git a/node2vec_spark/src/main/scala/com/navercorp/graph/GraphOps.scala b/node2vec_spark/src/main/scala/com/navercorp/graph/GraphOps.scala deleted file mode 100644 index 960fa8c9..00000000 --- a/node2vec_spark/src/main/scala/com/navercorp/graph/GraphOps.scala +++ /dev/null @@ -1,69 +0,0 @@ -package com.navercorp.graph - -import scala.collection.mutable.ArrayBuffer - -object GraphOps { - def setupAlias(nodeWeights: Array[(Long, Double)]): (Array[Int], Array[Double]) = { - val K = nodeWeights.length - val J = Array.fill(K)(0) - val q = Array.fill(K)(0.0) - - val smaller = new ArrayBuffer[Int]() - val larger = new ArrayBuffer[Int]() - - val sum = nodeWeights.map(_._2).sum - nodeWeights.zipWithIndex.foreach { case ((nodeId, weight), i) => - q(i) = K * weight / sum - if (q(i) < 1.0) { - smaller.append(i) - } else { - larger.append(i) - } - } - - while (smaller.nonEmpty && larger.nonEmpty) { - val small = smaller.remove(smaller.length - 1) - val large = larger.remove(larger.length - 1) - - J(small) = large - q(large) = q(large) + q(small) - 1.0 - if (q(large) < 1.0) smaller.append(large) - else larger.append(large) - } - - (J, q) - } - - def setupEdgeAlias(p: Double = 1.0, q: Double = 1.0)(srcId: Long, srcNeighbors: Array[(Long, Double)], dstNeighbors: Array[(Long, Double)]): (Array[Int], Array[Double]) = { - val neighbors_ = dstNeighbors.map { case (dstNeighborId, weight) => - var unnormProb = weight / q - if (srcId == dstNeighborId) unnormProb = weight / p - else if (srcNeighbors.exists(_._1 == dstNeighborId)) unnormProb = weight - - (dstNeighborId, unnormProb) - } - - setupAlias(neighbors_) - } - - def drawAlias(J: Array[Int], q: Array[Double]): Int = { - val K = J.length - val kk = math.floor(math.random * K).toInt - - if (math.random < q(kk)) kk - else J(kk) - } - - lazy val createUndirectedEdge = (srcId: Long, dstId: Long, weight: Double) => { - Array( - (srcId, Array((dstId, weight))), - (dstId, Array((srcId, weight))) - ) - } - - lazy val createDirectedEdge = (srcId: Long, dstId: Long, weight: Double) => { - Array( - (srcId, Array((dstId, weight))) - ) - } -} diff --git a/node2vec_spark/src/main/scala/com/navercorp/graph/package.scala b/node2vec_spark/src/main/scala/com/navercorp/graph/package.scala deleted file mode 100644 index 1b83969c..00000000 --- a/node2vec_spark/src/main/scala/com/navercorp/graph/package.scala +++ /dev/null @@ -1,12 +0,0 @@ -package com.navercorp - -import java.io.Serializable - -package object graph { - case class NodeAttr(var neighbors: Array[(Long, Double)] = Array.empty[(Long, Double)], - var path: Array[Long] = Array.empty[Long]) extends Serializable - - case class EdgeAttr(var dstNeighbors: Array[Long] = Array.empty[Long], - var J: Array[Int] = Array.empty[Int], - var q: Array[Double] = Array.empty[Double]) extends Serializable -} diff --git a/node2vec_spark/src/main/scala/com/navercorp/lib/AbstractParams.scala b/node2vec_spark/src/main/scala/com/navercorp/lib/AbstractParams.scala deleted file mode 100644 index 0790ab9c..00000000 --- a/node2vec_spark/src/main/scala/com/navercorp/lib/AbstractParams.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.navercorp.lib - -import scala.reflect.runtime.universe._ - -/** - * Abstract class for parameter case classes. - * This overrides the [[toString]] method to print all case class fields by name and value. - * @tparam T Concrete parameter class. - */ -abstract class AbstractParams[T: TypeTag] { - - private def tag: TypeTag[T] = typeTag[T] - - /** - * Finds all case class fields in concrete class instance, and outputs them in JSON-style format: - * { - * [field name]:\t[field value]\n - * [field name]:\t[field value]\n - * ... - * } - */ - override def toString: String = { - val tpe = tag.tpe - val allAccessors = tpe.declarations.collect { - case m: MethodSymbol if m.isCaseAccessor => m - } - val mirror = runtimeMirror(getClass.getClassLoader) - val instanceMirror = mirror.reflect(this) - allAccessors.map { f => - val paramName = f.name.toString - val fieldMirror = instanceMirror.reflectField(f) - val paramValue = fieldMirror.get - s" $paramName:\t$paramValue" - }.mkString("{\n", ",\n", "\n}") - } -} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index faa710ca..319e7392 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,167 @@ -networkx==1.11 -numpy==1.11.2 -gensim==0.13.3 +absl-py==1.0.0 +argon2-cffi==21.1.0 +astunparse==1.6.3 +attrs==21.2.0 +backcall==0.2.0 +beautifulsoup4==4.10.0 +bleach==4.1.0 +blis==0.7.4 +cachetools==4.2.4 +catalogue==2.0.6 +cbor==1.0.0 +cffi==1.14.6 +charset-normalizer==2.1.1 +chest==0.2.3 +click==7.1.2 +cwl-eval==1.0.10 +cycler==0.10.0 +cymem==2.0.5 +Cython==0.29.28 +debugpy==1.4.3 +decorator==5.1.0 +defusedxml==0.7.1 +deprecation==2.1.0 +dill==0.3.4 +entrypoints==0.3 +filelock==3.4.2 +flatbuffers==2.0 +fonttools==4.28.1 +gast==0.4.0 +gensim==4.2.0 +google-auth==2.3.3 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +grpcio==1.42.0 +h5py==3.6.0 +HeapDict==1.0.1 +huggingface-hub==0.2.1 +idna==3.4 +ijson==3.1.4 +imageio-ffmpeg==0.4.5 +importlib-metadata==4.8.2 +ipykernel==6.4.1 +ipython==7.27.0 +ipython-genutils==0.2.0 +ir-datasets==0.5.0 +ir-measures==0.2.3 +jedi==0.18.0 +Jinja2==3.0.1 +joblib==1.0.1 +jsonschema==4.0.1 +jupyter-client==7.0.3 +jupyter-core==4.8.1 +jupyterlab-pygments==0.1.2 +keras==2.7.0 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.2 +libclang==12.0.0 +lxml==4.6.3 +lz4==4.0.0 +Markdown==3.3.6 +MarkupSafe==2.0.1 +matchpy==0.5.5 +matplotlib==3.5.0 +matplotlib-inline==0.1.3 +mistune==0.8.4 +more-itertools==8.12.0 +multiset==2.1.1 +murmurhash==1.0.5 +nbclient==0.5.4 +nbconvert==6.2.0 +nbformat==5.1.3 +NERDA==1.0.0 +nest-asyncio==1.5.1 +networkx==2.8.8 +nltk==3.6.5 +notebook==6.4.4 +nptyping==1.4.4 +numpy==1.21.2 +oauthlib==3.2.2 +opt-einsum==3.3.0 +packaging==21.3 +pandas==1.3.3 +pandocfilters==1.5.0 +parso==0.8.2 +pathy==0.6.0 +patsy==0.5.2 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==8.4.0 +preshed==3.0.5 +progressbar==2.5 +prometheus-client==0.11.0 +prompt-toolkit==3.0.20 +protobuf==3.19.1 +ptyprocess==0.7.0 +pyaml==21.10.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyautocorpus==0.1.8 +pyconll==3.1.0 +pycparser==2.20 +pydantic==1.8.2 +Pygments==2.10.0 +pyjnius==1.3.0 +pyndeval==0.0.2 +pyparsing==2.4.7 +pyrsistent==0.18.0 +python-crfsuite==0.9.7 +python-dateutil==2.8.2 +python-terrier==0.8.0 +pytrec-eval-terrier==0.5.1 +PyYAML==6.0 +pyzmq==22.3.0 +regex==2021.10.23 +requests==2.28.1 +requests-oauthlib==1.3.0 +rsa==4.8 +sacremoses==0.0.46 +scikit-learn==0.23.2 +scikit-optimize==0.9.0 +scipy==1.7.1 +seaborn==0.11.2 +Send2Trash==1.8.0 +setuptools-scm==6.3.2 +six==1.16.0 +sklearn==0.0 +sklearn-crfsuite==0.3.6 +smart-open==5.2.1 +soupsieve==2.3.1 +spacy==3.1.2 +spacy-legacy==3.0.8 +srsly==2.4.1 +statsmodels==0.13.2 +tabulate==0.8.9 +tensorboard==2.7.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorflow==2.7.0 +tensorflow-estimator==2.7.0 +tensorflow-io-gcs-filesystem==0.22.0 +termcolor==1.1.0 +terminado==0.12.1 +testpath==0.5.0 +thinc==8.0.10 +threadpoolctl==2.2.0 +tokenizers==0.10.3 +tomli==1.2.2 +torch==1.10.1 +tornado==6.1 +tqdm==4.62.2 +traitlets==5.1.0 +transformers==4.15.0 +trec-car-tools==2.6 +typer==0.3.2 +typing-extensions==3.10.0.2 +typish==1.9.3 +urllib3==1.26.13 +warc3-wet==0.2.3 +warc3-wet-clueweb09==0.2.5 +wasabi==0.8.2 +wcwidth==0.2.5 +webencodings==0.5.1 +Werkzeug==2.0.2 +wget==3.2 +wrapt==1.13.3 +zipp==3.6.0 +zlib-state==0.1.5 diff --git a/scripts/bayesian_email_200.sh b/scripts/bayesian_email_200.sh new file mode 100755 index 00000000..d5911909 --- /dev/null +++ b/scripts/bayesian_email_200.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_bayesian_email --bayesian_opt --iter_bayesian 200 --train_set 0.1 --directed --seed 20034 diff --git a/scripts/bayesian_facebook_200.sh b/scripts/bayesian_facebook_200.sh new file mode 100755 index 00000000..e595776c --- /dev/null +++ b/scripts/bayesian_facebook_200.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_bayesian_facebook --bayesian_opt --iter_bayesian 200 --train_set 0.1 --directed --seed 20038 diff --git a/scripts/bayesian_git_200.sh b/scripts/bayesian_git_200.sh new file mode 100755 index 00000000..fc487923 --- /dev/null +++ b/scripts/bayesian_git_200.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_bayesian_git --bayesian_opt --iter_bayesian 200 --train_set 0.1 --directed --seed 20091 diff --git a/scripts/email_baseline.sh b/scripts/email_baseline.sh new file mode 100755 index 00000000..c994a37a --- /dev/null +++ b/scripts/email_baseline.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_baseline_email --train_set 0.8 --directed --seed 20234 --p 4.0 --q 1.8984324672375856 diff --git a/scripts/email_best_restart.sh b/scripts/email_best_restart.sh new file mode 100755 index 00000000..9f39e9c6 --- /dev/null +++ b/scripts/email_best_restart.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_best_restart_email --train_set 0.8 --directed --seed 20434 --restarts --p 4.0 --q 1.8984324672375856 --omega 0.28151118018663446 --epsilon 0.15 diff --git a/scripts/email_partitions_2_l0.sh b/scripts/email_partitions_2_l0.sh new file mode 100755 index 00000000..555c309b --- /dev/null +++ b/scripts/email_partitions_2_l0.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_partitions_2_email_l0 --train_set 0.8 --directed --seed 22134 --partitions 2 --p 4.0 4.0 --q 2.045629250781856 1.8984324672375856 + + +#2: +#lambda = 0.0 +#p-list = [4.0, 4.0] +#q-list = [2.045629250781856, 1.8984324672375856] + diff --git a/scripts/email_partitions_2_log_mu.sh b/scripts/email_partitions_2_log_mu.sh new file mode 100755 index 00000000..aaccacc9 --- /dev/null +++ b/scripts/email_partitions_2_log_mu.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_partitions_2_email_log_mu --train_set 0.8 --directed --seed 22334 --partitions 2 --p 0.20020465693121992 3.9485236573580504 --q 0.20324601704201256 3.9989870044384537 + + +#mean: +#lambda = 0.35799953691 +#p-list = [0.20020465693121992, 3.9485236573580504] +#q-list = [0.20324601704201256, 3.9989870044384537] diff --git a/scripts/email_partitions_2_log_sigma.sh b/scripts/email_partitions_2_log_sigma.sh new file mode 100755 index 00000000..de7805af --- /dev/null +++ b/scripts/email_partitions_2_log_sigma.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_partitions_2_email_log_sigma --train_set 0.8 --directed --seed 22034 --partitions 2 --p 0.20020465693121992 4.0 --q 0.20324601704201256 1.8984324672375856 + + +#std: +#lambda = 0.02614553577 +#p-list = [0.20020465693121992, 4.0] +#q-list = [0.20324601704201256, 1.8984324672375856] diff --git a/scripts/email_partitions_4_l0.sh b/scripts/email_partitions_4_l0.sh new file mode 100755 index 00000000..cf91e1f0 --- /dev/null +++ b/scripts/email_partitions_4_l0.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_partitions_4_email_l0 --train_set 0.8 --directed --seed 24134 --partitions 4 --p 2.8674846577475455 3.70664598066076 4.0 4.0 --q 2.05561706560439 1.4022557731626055 2.045629250781856 1.8984324672375856 + + +#4: +#lambda = 0.0 +#p-list = [2.8674846577475455, 3.70664598066076, 4.0, 4.0] +#q-list = [2.05561706560439, 1.4022557731626055, 2.045629250781856, 1.8984324672375856] diff --git a/scripts/email_partitions_4_log_mu.sh b/scripts/email_partitions_4_log_mu.sh new file mode 100755 index 00000000..89f7b379 --- /dev/null +++ b/scripts/email_partitions_4_log_mu.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_partitions_4_email_log_mu --train_set 0.8 --directed --seed 24334 --partitions 4 --p 0.20055087005505193 3.86869464452510942 0.20020465693121992 3.9485236573580504 --q 3.9964945282600097 0.20473325176637816 0.20324601704201256 3.9989870044384537 + + +#mean: +#lambda = 0.11933317897 +#p-list = [0.20055087005505193, 3.8686946445251094, 0.20020465693121992, 3.9485236573580504] +#q-list = [3.9964945282600097, 0.20473325176637816, 0.20324601704201256, 3.9989870044384537] diff --git a/scripts/email_partitions_4_log_sigma.sh b/scripts/email_partitions_4_log_sigma.sh new file mode 100755 index 00000000..cd77d45c --- /dev/null +++ b/scripts/email_partitions_4_log_sigma.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_partitions_4_email_log_sigma --train_set 0.8 --directed --seed 24034 --partitions 4 --p 0.20055087005505193 0.20020465693121992 4.0 4.0 --q 3.9964945282600097 0.20324601704201256 2.045629250781856 1.8984324672375856 + + +#std: +#lambda = 0.01228759817 +#p-list = [0.20055087005505193, 0.20020465693121992, 4.0, 4.0] +#q-list = [3.9964945282600097, 0.20324601704201256, 2.045629250781856, 1.8984324672375856] diff --git a/scripts/facebook_baseline.sh b/scripts/facebook_baseline.sh new file mode 100755 index 00000000..ba810dda --- /dev/null +++ b/scripts/facebook_baseline.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_baseline_facebook --train_set 0.8 --directed --seed 20638 --p 0.7572958018829294 --q 1.473612599456155 diff --git a/scripts/facebook_best_restart.sh b/scripts/facebook_best_restart.sh new file mode 100755 index 00000000..f189ea4b --- /dev/null +++ b/scripts/facebook_best_restart.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_best_restart_facebook --train_set 0.8 --directed --seed 20438 --restarts --p 0.7572958018829294 --q 1.473612599456155 --omega 0.14421504937982915 --epsilon 0.028564075866728048 diff --git a/scripts/facebook_partitions_2_l0.sh b/scripts/facebook_partitions_2_l0.sh new file mode 100755 index 00000000..b2e3047f --- /dev/null +++ b/scripts/facebook_partitions_2_l0.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_partitions_2_facebook_l0 --train_set 0.8 --directed --seed 22138 --partitions 2 --p 0.7572958018829294 3.952918871253783 --q 1.473612599456155 3.882408596560285 + +#2: +#lambda = 0.0 +#p-list = [0.7572958018829294, 3.952918871253783] +#q-list = [1.473612599456155, 3.882408596560285] diff --git a/scripts/facebook_partitions_2_log_mu.sh b/scripts/facebook_partitions_2_log_mu.sh new file mode 100755 index 00000000..4043eaff --- /dev/null +++ b/scripts/facebook_partitions_2_log_mu.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_partitions_2_facebook_log_mu --train_set 0.8 --directed --seed 22338 --partitions 2 --p 0.20035678700497078 3.9624962332158433 --q 3.9346541105982795 0.20002695371111892 + +#mean: +#lambda = 0.35799953691 +#p-list = [0.20035678700497078, 3.9624962332158433] +#q-list = [3.9346541105982795, 0.20002695371111892] diff --git a/scripts/facebook_partitions_2_log_sigma.sh b/scripts/facebook_partitions_2_log_sigma.sh new file mode 100755 index 00000000..99b55938 --- /dev/null +++ b/scripts/facebook_partitions_2_log_sigma.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_partitions_2_facebook_log_sigma --train_set 0.8 --directed --seed 22038 --partitions 2 --p 3.952918871253783 0.2582509494247336 --q 3.882408596560285 0.20001483371562215 + +#std: +#lambda = 0.0096750859 +#p-list = [3.952918871253783, 0.2582509494247336] +#q-list = [3.882408596560285, 0.20001483371562215] diff --git a/scripts/facebook_partitions_4_l0.sh b/scripts/facebook_partitions_4_l0.sh new file mode 100755 index 00000000..d8432b26 --- /dev/null +++ b/scripts/facebook_partitions_4_l0.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_partitions_4_facebook_l0 --train_set 0.8 --directed --seed 24138 --partitions 4 --p 0.7572958018829294 0.43847646369464355 0.5441322973617186 3.952918871253783 --q 1.473612599456155 1.356481752864832 1.4738792294786813 3.882408596560285 + +#4: +#lambda = 0.0 +#p-list = [0.7572958018829294, 0.43847646369464355, 0.5441322973617186, 3.952918871253783] +#q-list = [1.473612599456155, 1.356481752864832, 1.4738792294786813, 3.882408596560285] diff --git a/scripts/facebook_partitions_4_log_mu.sh b/scripts/facebook_partitions_4_log_mu.sh new file mode 100755 index 00000000..f845d58c --- /dev/null +++ b/scripts/facebook_partitions_4_log_mu.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_partitions_4_facebook_log_mu --train_set 0.8 --directed --seed 24338 --partitions 4 --p 4.0 0.20035678700497078 3.9624962332158433 0.20343128081760056 --q 4.0 3.9346541105982795 0.20002695371111892 0.20319502640848378 + +#p-list = [4.0, 0.20035678700497078, 3.9624962332158433, 0.20343128081760056] +#q-list = [4.0, 3.9346541105982795, 0.20002695371111892, 0.20319502640848378] diff --git a/scripts/facebook_partitions_4_log_sigma.sh b/scripts/facebook_partitions_4_log_sigma.sh new file mode 100755 index 00000000..eea7059c --- /dev/null +++ b/scripts/facebook_partitions_4_log_sigma.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_partitions_4_facebook_log_sigma --train_set 0.8 --directed --seed 24038 --partitions 4 --p 0.2789901587924842 3.952918871253783 0.2582509494247336 3.932640274977744 --q 2.8674846577475455 3.882408596560285 0.20001483371562215 0.20123973200915848 + +#std: +#lambda = 0.00432788555 +#p-list = [0.2789901587924842, 3.952918871253783, 0.2582509494247336, 3.932640274977744] +#q-list = [2.8674846577475455, 3.882408596560285, 0.20001483371562215, 0.20123973200915848] diff --git a/scripts/git_baseline.sh b/scripts/git_baseline.sh new file mode 100755 index 00000000..d402fec8 --- /dev/null +++ b/scripts/git_baseline.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_baseline_git --train_set 0.8 --directed --seed 20691 --p 0.20044094569330795 --q 0.9203925397880377 diff --git a/scripts/git_best_restart.sh b/scripts/git_best_restart.sh new file mode 100755 index 00000000..9e74a30f --- /dev/null +++ b/scripts/git_best_restart.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_best_restart_git --train_set 0.8 --directed --seed 20491 --restarts --p 0.20044094569330795 --q 0.9203925397880377 --omega 0.8690968446233921 --epsilon 0.01073628193103627 diff --git a/scripts/git_partitions_2_l0.sh b/scripts/git_partitions_2_l0.sh new file mode 100755 index 00000000..417b9eb6 --- /dev/null +++ b/scripts/git_partitions_2_l0.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_partitions_2_git_l0 --train_set 0.8 --directed --seed 22191 --partitions 2 --p 0.5428835233189814 0.20044094569330795 --q 0.2789901587924842 0.9203925397880377 + +#2: +#lambda = 0.0 +#p-list = [0.5428835233189814, 0.20044094569330795] +#q-list = [0.2789901587924842, 0.9203925397880377] diff --git a/scripts/git_partitions_2_log_mu.sh b/scripts/git_partitions_2_log_mu.sh new file mode 100755 index 00000000..5fad7e31 --- /dev/null +++ b/scripts/git_partitions_2_log_mu.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_partitions_2_git_log_mu --train_set 0.8 --directed --seed 22391 --partitions 2 --p 3.9918006380440088 0.2006002451797317 --q 0.21978237126151454 3.983347440320607 + +#mean: +#lambda = 0.76243839424 +#p-list = [3.9918006380440088, 0.2006002451797317] +#q-list = [0.21978237126151454, 3.983347440320607] diff --git a/scripts/git_partitions_2_log_sigma.sh b/scripts/git_partitions_2_log_sigma.sh new file mode 100755 index 00000000..a5629508 --- /dev/null +++ b/scripts/git_partitions_2_log_sigma.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_partitions_2_git_log_sigma --train_set 0.8 --directed --seed 22091 --partitions 2 --p 3.9045821112145163 0.20044094569330795 --q 3.9816568985190535 0.9203925397880377 + +#std: +#lambda = 0.00781529216 +#p-list = [3.9045821112145163, 0.20044094569330795] +#q-list = [3.9816568985190535, 0.9203925397880377] diff --git a/scripts/git_partitions_4_l0.sh b/scripts/git_partitions_4_l0.sh new file mode 100755 index 00000000..1d65999d --- /dev/null +++ b/scripts/git_partitions_4_l0.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_partitions_4_git_l0 --train_set 0.8 --directed --seed 24191 --partitions 4 --p 0.5428835233189814 2.05561706560439 1.5241911771276138 0.20044094569330795 --q 0.2789901587924842 2.8674846577475455 0.23400689010420173 0.9203925397880377 + +#4: +#lambda = 0.0 +#p-list = [0.5428835233189814, 2.05561706560439, 1.5241911771276138, 0.20044094569330795] +#q-list = [0.2789901587924842, 2.8674846577475455, 0.23400689010420173, 0.9203925397880377] diff --git a/scripts/git_partitions_4_log_mu.sh b/scripts/git_partitions_4_log_mu.sh new file mode 100755 index 00000000..fcf69b63 --- /dev/null +++ b/scripts/git_partitions_4_log_mu.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_partitions_4_git_log_mu --train_set 0.8 --directed --seed 24391 --partitions 4 --p 0.23678312106818603 3.9918006380440088 0.2006002451797317 3.9981061401791536 --q 0.2053622068995652 0.21978237126151454 3.983347440320607 3.935297027740182 + +#mean +#lambda = 0.25414613141 +#p-list = [0.23678312106818603, 3.9918006380440088, 0.2006002451797317, 3.9981061401791536] +#q-list = [0.2053622068995652, 0.21978237126151454, 3.983347440320607, 3.935297027740182] diff --git a/scripts/git_partitions_4_log_sigma.sh b/scripts/git_partitions_4_log_sigma.sh new file mode 100755 index 00000000..11a6326e --- /dev/null +++ b/scripts/git_partitions_4_log_sigma.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_partitions_4_git_log_sigma --train_set 0.8 --directed --seed 24091 --partitions 4 --p 0.3236534359984023 1.5241911771276138 3.9045821112145163 0.20044094569330795 --q 0.20252830270937253 0.23400689010420173 3.9816568985190535 0.9203925397880377 + +#std +#lambda = 0.00387189876 +#p-list = [0.3236534359984023, 1.5241911771276138, 3.9045821112145163, 0.20044094569330795] +#q-list = [0.20252830270937253, 0.23400689010420173, 3.9816568985190535, 0.9203925397880377] diff --git a/scripts/restarts_email_200.sh b/scripts/restarts_email_200.sh new file mode 100755 index 00000000..e73d6ae9 --- /dev/null +++ b/scripts/restarts_email_200.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.edgelist --labels /data/s1674307/snacs/Datasets/email-Eu-core/email-Eu-core.labels --output /data/s1674307/snacs/Results/email-Eu-core/results_restarts_email --bayesian_opt --iter_bayesian 200 --train_set 0.1 --directed --seed 20234 --restarts --p 4.0 --q 1.8984324672375856 diff --git a/scripts/restarts_facebook_200.sh b/scripts/restarts_facebook_200.sh new file mode 100755 index 00000000..be34cef5 --- /dev/null +++ b/scripts/restarts_facebook_200.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/facebook/facebook.edgelist --labels /data/s1674307/snacs/Datasets/facebook/facebook.labels --output /data/s1674307/snacs/Results/facebook/results_restart_facebook --bayesian_opt --iter_bayesian 200 --train_set 0.1 --directed --seed 20238 --restarts --p 0.7572958018829294 --q 1.473612599456155 diff --git a/scripts/restarts_git_200.sh b/scripts/restarts_git_200.sh new file mode 100755 index 00000000..c8779292 --- /dev/null +++ b/scripts/restarts_git_200.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Bayesian Opt +python ../src/main.py --input /data/s1674307/snacs/Datasets/git/git.edgelist --labels /data/s1674307/snacs/Datasets/git/git.labels --output /data/s1674307/snacs/Results/git/results_restart_git --bayesian_opt --iter_bayesian 200 --train_set 0.1 --directed --seed 20291 --restarts --p 0.20044094569330795 --q 0.9203925397880377 diff --git a/src/command_line.py b/src/command_line.py new file mode 100644 index 00000000..3d7c6d1a --- /dev/null +++ b/src/command_line.py @@ -0,0 +1,90 @@ +import argparse + +def parse_args(): + ''' + Parses the node2vec arguments. + ''' + parser = argparse.ArgumentParser(description="Run node2vec.") + + # ===================== I/O ===================== # + parser.add_argument('--input', type=str, default='../graph/karate.edgelist', + help='Input graph path') + parser.add_argument('--labels', type=str, default='../graph/karate.labels', + help='Label per node of graph') + parser.add_argument('--output_dir_name', type=str, default=None, + help='Name of output directory') + parser.add_argument('--loc_results_dir', type=str, default="../results", + help='Location of results') + parser.add_argument('--output', type=str, default=None, + help='User-specified output location') + # =============================================== # + + # ========= Skip-gram training settings ========= # + parser.add_argument('--seed', default=42, type=int, + help='Random seed') + parser.add_argument('--epochs', default=1, type=int, + help='Number of epochs in SGD') + parser.add_argument('--workers', type=int, default=8, + help='Number of parallel workers. Default is 8.') + # =============================================== # + + # ===== (Bayesian) Optimization parameters ===== # + parser.add_argument('--train_set', default=0.8, type=float, + help='Portion of dataset used for optimization') + parser.add_argument('--bayesian_opt', action='store_true', + help='Enable bayesian optimization') + parser.add_argument('--iter_bayesian', default=50, type=int, + help='Number of iterations for bayesian optimization') + parser.add_argument('--scoring', default="f1_macro", type=str, + help='How to evaluate each iteration of bayesian opt') + parser.add_argument('--cross_validation', default=10, type=int, + help='Size of cross validation') + parser.add_argument('--replications', default=5, type=int, + help='Number of replications to evaluate hyperparameter config') + # =============================================== # + + # ==== Original Hyperparameter configuration ==== # + parser.add_argument('--d', type=int, default=128, + help='Number of dimensions. Default is 128.') + parser.add_argument('--l', type=int, default=100, + help='Length of walk per source. Default is 100.') + parser.add_argument('--r', type=int, default=16, + help='Number of walks per source. Default is 18.') + parser.add_argument('--k', type=int, default=14, + help='Context size for optimization. Default is 16.') + + # ==== Parameters Added/Modified for partitions + parser.add_argument('--p', nargs='+', default=[1.], + help='Return hyperparameter. Default is 1.') + parser.add_argument('--q', nargs='+', default=[1.], + help='Return hyperparameter. Default is 1.') + parser.add_argument('--partitions', type=int, default=1, + help='Amout of partitions of dimensionality') + # =============================================== # + + # ==== Restart probability related arguments ==== # + parser.add_argument('--restarts', action="store_true", + help='Enable restart probability') + parser.add_argument('--tau', type=float, default=0.001, + help='Min. restart probability, irrespective of degree') + parser.add_argument('--omega', type=float, default=2.0, + help='Scalar for r (i.e., no. random walks per node)') + parser.add_argument('--epsilon', type=float, default=0.0001, + help='Max. restart probability for nodes with high degrees') + parser.add_argument('--s', type=int, default=10, + help='Min. length of all biased random walks') + # =============================================== # + + # ================= Graph type self.model = Word2Vec( ================= # + parser.add_argument('--weighted', dest='weighted', action='store_true', + help='Boolean specifying (un)weighted. Default is unweighted.') + parser.add_argument('--unweighted', dest='unweighted', action='store_false') + parser.set_defaults(weighted=False) + + parser.add_argument('--directed', dest='directed', action='store_true', + help='Graph is (un)directed. Default is undirected.') + parser.add_argument('--undirected', dest='undirected', action='store_false') + parser.set_defaults(directed=False) + # =============================================== # + + return parser.parse_args() diff --git a/src/main.py b/src/main.py index 82ac7357..2404dbbd 100644 --- a/src/main.py +++ b/src/main.py @@ -1,104 +1,145 @@ -''' -Reference implementation of node2vec. - -Author: Aditya Grover - -For more details, refer to the paper: -node2vec: Scalable Feature Learning for Networks -Aditya Grover and Jure Leskovec -Knowledge Discovery and Data Mining (KDD), 2016 -''' - -import argparse -import numpy as np -import networkx as nx -import node2vec -from gensim.models import Word2Vec - -def parse_args(): - ''' - Parses the node2vec arguments. - ''' - parser = argparse.ArgumentParser(description="Run node2vec.") - - parser.add_argument('--input', nargs='?', default='graph/karate.edgelist', - help='Input graph path') - - parser.add_argument('--output', nargs='?', default='emb/karate.emb', - help='Embeddings path') - - parser.add_argument('--dimensions', type=int, default=128, - help='Number of dimensions. Default is 128.') - - parser.add_argument('--walk-length', type=int, default=80, - help='Length of walk per source. Default is 80.') - - parser.add_argument('--num-walks', type=int, default=10, - help='Number of walks per source. Default is 10.') - - parser.add_argument('--window-size', type=int, default=10, - help='Context size for optimization. Default is 10.') - - parser.add_argument('--iter', default=1, type=int, - help='Number of epochs in SGD') - - parser.add_argument('--workers', type=int, default=8, - help='Number of parallel workers. Default is 8.') - - parser.add_argument('--p', type=float, default=1, - help='Return hyperparameter. Default is 1.') - - parser.add_argument('--q', type=float, default=1, - help='Inout hyperparameter. Default is 1.') - - parser.add_argument('--weighted', dest='weighted', action='store_true', - help='Boolean specifying (un)weighted. Default is unweighted.') - parser.add_argument('--unweighted', dest='unweighted', action='store_false') - parser.set_defaults(weighted=False) - - parser.add_argument('--directed', dest='directed', action='store_true', - help='Graph is (un)directed. Default is undirected.') - parser.add_argument('--undirected', dest='undirected', action='store_false') - parser.set_defaults(directed=False) - - return parser.parse_args() - -def read_graph(): - ''' - Reads the input network in networkx. - ''' - if args.weighted: - G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) - else: - G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph()) - for edge in G.edges(): - G[edge[0]][edge[1]]['weight'] = 1 - - if not args.directed: - G = G.to_undirected() - - return G - -def learn_embeddings(walks): - ''' - Learn embeddings by optimizing the Skipgram objective using SGD. - ''' - walks = [map(str, walk) for walk in walks] - model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter) - model.save_word2vec_format(args.output) - - return - -def main(args): - ''' - Pipeline for representational learning for all nodes in a graph. - ''' - nx_G = read_graph() - G = node2vec.Graph(nx_G, args.directed, args.p, args.q) - G.preprocess_transition_probs() - walks = G.simulate_walks(args.num_walks, args.walk_length) - learn_embeddings(walks) +import os +import datetime +import json +import pickle +from command_line import parse_args +from model import Model +from plot_bayes_opt import plot_heatmap +from sklearn.utils.estimator_checks import check_estimator +from sklearn.model_selection import train_test_split +from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score +from skopt import BayesSearchCV +from skopt.space import Real, Integer + +def bayes_opt_pq(args, X_train, y_train, DIR): + opt = BayesSearchCV( + Model(r=args.r, d=args.d, l=args.l, k=args.k, restarts=args.restarts, \ + tau=args.tau, omega=args.omega, epsilon=args.epsilon, s=args.s, \ + weighted=args.weighted, directed=args.directed, epochs=args.epochs, \ + nodes=args.input, dir_base=None), + { + 'p': Real(0.2, 4, prior='log-uniform'), + 'q': Real(0.2, 4, prior='log-uniform') + }, + n_iter=args.iter_bayesian, + cv=args.cross_validation, + scoring=args.scoring, + verbose=1, + random_state=args.seed, + n_jobs=args.workers, + optimizer_kwargs={'n_initial_points': 64, 'initial_point_generator': "grid"} + ) + opt.fit(X_train, y_train) + with open(f"{DIR}/opt_results.pkl", 'wb') as f: + pickle.dump(opt.cv_results_, f) + plot_heatmap(DIR, args.restarts) + + p = opt.best_params_['p'] + q = opt.best_params_['q'] + return p, q + +def bayes_opt_restarts(args, X_train, y_train, DIR): + opt = BayesSearchCV( + Model(p=args.p, q=args.q, r=args.r, d=args.d, l=args.l, \ + k=args.k, restarts=args.restarts, epsilon=args.epsilon, s=args.s, \ + weighted=args.weighted, directed=args.directed, epochs=args.epochs, \ + nodes=args.input, dir_base=None), + { + 'omega': Real(0.1, 4.0, prior='log-uniform'), + 'epsilon': Real(0.001, 0.15, prior='log-uniform') + }, + n_iter=args.iter_bayesian, + cv=args.cross_validation, + scoring=args.scoring, + verbose=1, + random_state=args.seed, + n_jobs=args.workers, + optimizer_kwargs={'n_initial_points': 64, 'initial_point_generator': "grid"} + ) + opt.fit(X_train, y_train) + with open(f"{DIR}/opt_results.pkl", 'wb') as f: + pickle.dump(opt.cv_results_, f) + plot_heatmap(DIR, args.restarts) + + omega = opt.best_params_['omega'] + tau = opt.best_params_['tau'] + return omega, tau + +def evaluate(args, train_X, train_y, test_X, test_y, DIR): + dir_out = f"{DIR}/eval" + os.mkdir(dir_out) + + with open(f"{dir_out}/best_settings.json", 'w') as f: + json.dump(args.__dict__, f, indent=3) + + with open(f"{dir_out}/results.csv", 'w') as f_out: + f_out.write("F1_macro,F1_micro,accuracy\n") + for i in range(args.replications): + # Create model + m = Model(p=args.p, q=args.q, r=args.r, d=args.d, \ + l=args.l, k=args.k, restarts=args.restarts, \ + tau=args.tau, omega=args.omega, epsilon=args.epsilon, \ + s=args.s, weighted=args.weighted, directed=args.directed, \ + epochs=args.epochs, nodes=args.input, dir_base=dir_out, \ + partitions=args.partitions) + m.fit(train_X, train_y) + + pred_y = m.predict(test_X) + + f1_macro = f1_score(test_y, pred_y, average="macro") + f1_micro = f1_score(test_y, pred_y, average="micro") + accuracy = accuracy_score(test_y, pred_y) + + f_out.write(f"{f1_macro},{f1_micro},{accuracy}\n") if __name__ == "__main__": - args = parse_args() - main(args) + args = parse_args() + + arg_len = args.partitions + q_len = len(args.q) + p_len = len(args.p) + + q_error = "Error: expected q-list to have: " + str(arg_len) + " parameters but got: " + str(q_len) + " parameters!" + p_error = "Error: expected p-list to have: " + str(arg_len) + " parameters but got: " + str(p_len) + " parameters!" + + assert q_len == arg_len, q_error + assert p_len == arg_len, p_error + assert args.d % arg_len == 0, "Error: dimensions must be divisible by partitions" + + if not os.path.isdir(args.loc_results_dir): + os.mkdir(args.loc_results_dir) + + if args.output is not None: + DIR = f"{args.output}" + elif args.output_dir_name is not None: + DIR = f"{args.loc_results_dir}/{args.output_dir_name}" + else: + t = datetime.datetime.now() + DIR = f"{args.loc_results_dir}/results_{t.day}-{t.month}-{t.year}_{t.hour}-{t.minute}-{t.second}" + os.mkdir(DIR) + with open(f"{DIR}/cl_args.json", 'w') as f: + json.dump(args.__dict__, f, indent=3) + + # Obtain nodes and corresponding labels from dataset + X, y = [], [] + with open(args.labels, 'r') as f_labels: + data = f_labels.read() + lines = data.split('\n') + lines = lines[:-1] + for line in lines: + xy = line.split('\t') + X.append(xy[0]) + y.append(xy[1]) + + X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=args.train_set) + + if args.bayesian_opt: + if args.restarts: # Optimize omega, epsilon + args.omega, args.tau = bayes_opt_restarts(args, X_train, y_train, DIR) + else: # Optimize p and q + p, q = bayes_opt_pq(args, X_train, y_train, DIR) + args.p[0] = p + args.q[0] = q + + #TODO else: + evaluate(args, X_train, y_train, X_test, y_test, DIR) diff --git a/src/model.py b/src/model.py new file mode 100644 index 00000000..1fc3662c --- /dev/null +++ b/src/model.py @@ -0,0 +1,138 @@ +import networkx as nx +import datetime +import os +import numpy as np +import pickle +from node2vec import Node2Vec +from sklearn.base import BaseEstimator +from sklearn.utils.validation import check_X_y +from gensim.models import Word2Vec +from sklearn.multiclass import OneVsRestClassifier +from sklearn.svm import SVC +from sklearn.metrics import f1_score, accuracy_score + +EMBEDDING_F_OUT = "embeddings.emb" +EMBEDDING_F_OUT_PKL = "embeddings.pkl" + +class Model(BaseEstimator): + + def __init__(self, p=[1.0], q=[1.0], r=10, d=128, l=80, k=10, \ + restarts=False, tau=0., omega=2, epsilon=0.01, s=10, \ + epochs=1, weighted=False, directed=False, nodes=None, \ + dir_base=None, partitions=1): + ''' + Hyperparameter configuration + ''' + # Hyperparameters for node2vec + self.p = p # Return hyperparameter + self.q = q # In-out hyperparameter + self.r = r # No. random walks per node + self.d = d # Dimensionality of vector embedding + self.l = l # Length of one random walk + self.k = k # Sliding-window size + + #hyperparams for partitions + self.partitions = partitions # How many different models to concatinate + + # Hyperparameters for restarts + self.restarts = restarts # Enable restarts + self.tau = tau # Min. restart probability + self.omega = omega # Scalar for r + self.epsilon = epsilon # Max. restart prob for nodes with high degree + self.s = s # Min. length of all biased random walks + + self.weighted = weighted + self.directed = directed + self.epochs = epochs + + self.nodes = nodes # File to import the graph into networkx + + self.dir_out = dir_base + if dir_base is not None: + t = datetime.datetime.now() + self.dir_out = f"{dir_base}/run_{t.day}-{t.month}-{t.year}_{t.hour}-{t.minute}-{t.second}-{t.microsecond}" + os.mkdir(self.dir_out) + + def _read_graph(self): + ''' + Reads the input network in networkx. + ''' + if self.weighted: + G = nx.read_edgelist(self.nodes, delimiter='\t', nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) + else: + G = nx.read_edgelist(self.nodes, delimiter='\t', nodetype=int, create_using=nx.DiGraph()) + for edge in G.edges(): + G[edge[0]][edge[1]]['weight'] = 1 + + if not self.directed: + G = G.to_undirected() + + return G + + def _learn_embeddings(self, walks, part_d): + # Convert each vertex ID to a string + walks = [list(map(str, walk)) for walk in walks] + model = Word2Vec(walks, vector_size=part_d, window=self.k, min_count=0, \ + sg=1, workers=8, epochs=self.epochs) + return model + + def get_embeddings(self, X): + return [self.wv[x] for x in X] + + def fit(self, X, y): + models = [] + part_size = int(self.d / self.partitions) + + G = self._read_graph() + if type(self.p) is list: + # create an embedding for each (p,q) + for i in range(self.partitions): + n2v_G = Node2Vec(G, self.directed, float(self.p[i]), \ + float(self.q[i]), self.r, self.restarts, \ + self.tau, self.omega, self.epsilon, self.s) + n2v_G.preprocess_transition_probs() + walks = n2v_G.simulate_walks(self.r, self.l) + models.append(self._learn_embeddings(walks, part_size)) + + self.wv = {} + + # concatenate the embeddings + for key in models[0].wv.index_to_key: + self.wv[key] = np.zeros(self.d, dtype=float) + vect_size = 0 + for i in range(0, self.partitions): + old_size = vect_size + vect_size = part_size*(i+1) + + # fill in the array + k = 0 + for j in range(old_size, vect_size): + self.wv[key][j] = models[i].wv[key][k] + k += 1 + #for j + #for key + #for i + if self.dir_out is not None: + with open(f"{self.dir_out}/{EMBEDDING_F_OUT_PKL}", 'wb') as f: + pickle.dump(self.wv, f) + else: + self.node2vec = Node2Vec(G, self.directed, float(self.p), \ + float(self.q), self.r, self.restarts, \ + self.tau, self.omega, self.epsilon, self.s) + self.node2vec.preprocess_transition_probs() + walks = self.node2vec.simulate_walks(self.r, self.l) + model = self._learn_embeddings(walks, self.d) + self.wv = model.wv + + if self.dir_out is not None: + self.wv.save(f"{self.dir_out}/{EMBEDDING_F_OUT}") + + X_embeddings = self.get_embeddings(X) + self.clf = OneVsRestClassifier(SVC()).fit(X_embeddings, y) + + return self + + + def predict(self, X): + X_embeddings = self.get_embeddings(X) + return self.clf.predict(X_embeddings) diff --git a/src/node2vec.py b/src/node2vec.py index 0293411a..2b96884c 100644 --- a/src/node2vec.py +++ b/src/node2vec.py @@ -1,26 +1,59 @@ import numpy as np import networkx as nx import random +from tqdm import tqdm - -class Graph(): - def __init__(self, nx_G, is_directed, p, q): +class Node2Vec(): + def __init__(self, nx_G, is_directed, p, q, r, restarts, tau, omega, epsilon, s): self.G = nx_G self.is_directed = is_directed self.p = p self.q = q + + self.r = r + + # Restart related hyperparameters + self.restarts = restarts + self.tau = tau + self.omega = omega + self.epsilon = epsilon + self.s = s + + def fn_restart(self, degree): + ''' + Compute restart probability based on degree + ''' + return self.epsilon / (1 + np.exp((-1*degree) + self.omega*self.r)) + self.tau + + def can_restart(self, len_walk, leftover): + ''' + Only able to restart when there are sufficient no steps leftover and + the current walk has done a sufficient no steps. Both based on restarts_window + ''' + return len_walk >= self.s and leftover >= self.s def node2vec_walk(self, walk_length, start_node): ''' Simulate a random walk starting from start node. ''' + G = self.G alias_nodes = self.alias_nodes alias_edges = self.alias_edges + degree = G.degree(start_node) + p_restarts = self.fn_restart(degree) + + walks = [] walk = [start_node] + steps = 0 + + while steps < walk_length: + # Simulate restarts, independent of probabilities of edges + if self.restarts and self.can_restart(len(walk), walk_length-steps) and np.random.rand() <= p_restarts: + walks.append(walk) + walk = [start_node] - while len(walk) < walk_length: cur = walk[-1] cur_nbrs = sorted(G.neighbors(cur)) if len(cur_nbrs) > 0: @@ -33,8 +66,11 @@ def node2vec_walk(self, walk_length, start_node): walk.append(next) else: break - - return walk + + steps += 1 + + walks.append(walk) + return walks def simulate_walks(self, num_walks, walk_length): ''' @@ -43,12 +79,13 @@ def simulate_walks(self, num_walks, walk_length): G = self.G walks = [] nodes = list(G.nodes()) - print 'Walk iteration:' - for walk_iter in range(num_walks): - print str(walk_iter+1), '/', str(num_walks) + print('Walk iteration:') + for walk_iter in tqdm(range(num_walks)): random.shuffle(nodes) for node in nodes: - walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node)) + sim_walks = self.node2vec_walk(walk_length=walk_length, start_node=node) + for i in range(len(sim_walks)): + walks.append(sim_walks[i]) return walks @@ -146,4 +183,4 @@ def alias_draw(J, q): if np.random.rand() < q[kk]: return kk else: - return J[kk] \ No newline at end of file + return J[kk] diff --git a/src/plot_bayes_opt.py b/src/plot_bayes_opt.py new file mode 100644 index 00000000..1560c254 --- /dev/null +++ b/src/plot_bayes_opt.py @@ -0,0 +1,54 @@ +import matplotlib.pyplot as plt +import pickle +import numpy as np + +# https://scikit-optimize.github.io/stable/modules/generated/skopt.BayesSearchCV.html + +CMAP = "viridis" + +def plot_heatmap(DIR, restarts): + with open(f"{DIR}/opt_results.pkl", 'rb') as f: + data = pickle.load(f) + + # Scores per hyperparameter configuration + scores = data["mean_test_score"] + best_score = np.argmax(scores) + + fig, ax = plt.subplots() + + if restarts: + # Obtain hyperparameter configurations + omega, tau = [], [] + for item in data["params"]: + omega.append(item["omega"]) + tau.append(item["tau"]) + + ax.plot(omega, tau, "ko", markersize=2) + ax.plot(omega[best_score], tau[best_score], markersize=6, marker='*', color='red') + cntr = ax.tricontourf(omega, tau, scores, cmap=CMAP) + + ax.set( + xlim=(min(omega), max(omega)), + ylim=(min(tau), max(tau)), + xlabel=r"$\omega$", + ylabel=r"$\tau$" + ) + else: + # Obtain hyperparameter configurations + p, q = [], [] + for item in data["params"]: + p.append(item["p"]) + q.append(item["q"]) + + ax.plot(p, q, "ko", markersize=2) + ax.plot(p[best_score], q[best_score], markersize=6, marker='*', color='red') + cntr = ax.tricontourf(p, q, scores, cmap=CMAP) + + ax.set( + xlim=(min(p), max(p)), + ylim=(min(q), max(q)), + xlabel=r"$p$", + ylabel=r"$q$" + ) + fig.colorbar(cntr, ax=ax, label="F1-macro score") + plt.savefig(bbox_inches="tight", fname=f"{DIR}/BO_opt.pdf", format="pdf", dpi=100) diff --git a/src/post_process.py b/src/post_process.py new file mode 100644 index 00000000..97147627 --- /dev/null +++ b/src/post_process.py @@ -0,0 +1,196 @@ +import numpy as np +import itertools +import argparse + +from math import comb +import csv + + +def parse_args(): + ''' + Parses the node2vec arguments. + ''' + parser = argparse.ArgumentParser(description="Run post-processing.") + + # ===================== I/O ===================== # + parser.add_argument('--pkl', type=str, default="opt_results.pkl", + help='Input pkl path') + parser.add_argument('--scores', type=str, default='scores.csv', + help='where to put the table') + parser.add_argument('--dataset', type=str, default=None, + help='Name of dataset; mutually exclusive with the ') + parser.add_argument('--dir', type=str, default=None, + help='Path to fil locations') + # =============================================== # + + # =============== Hyperparameters =============== # + parser.add_argument('--partitions', default=1, type=int, + help='Amount of ensembles/partitions used') + parser.add_argument('--bayesopts', default=200, type=int, + help='Amount of bayesian optimisations run') + parser.add_argument('--lamb', default=-1.0, type=float, + help='weight of the distance distribution') + parser.add_argument('--logspace', dest='logspace', action='store_true', + help='should p-q distances be logarithmic or not?') + parser.set_defaults(weighted=False) + # =============================================== # + + # ================ read or write ================ # + parser.add_argument('--read', dest='read', action='store_true', + help='process scores from .csv') + parser.set_defaults(weighted=False) + + parser.add_argument('--write', dest='write', action='store_true', + help='write scores to .csv') + parser.set_defaults(directed=False) + # =============================================== # + + return parser.parse_args() + + +def find_best(A, bayesopts): + highscore = -1 + omega = -1 + epsilon = -1 + for i in range(bayesopts): + if A["mean_test_score"][i] > highscore: + highscore = A["mean_test_score"][i] + omega = A["param_omega"][i] + epsilon = A["param_epsilon"][i] + + print("--omega", omega, "--epsilon", epsilon) + +def read_scores(A, scores_file, lamb): + highscore = -1.0 + best_params = "(0, 1, 2, 3)" + + with open (scores_file, 'r') as f: + for line in f.readlines(): + split_line = line.split(";") + + f1_score = float(split_line[1]) + dist_score = float(split_line[2]) + score = f1_score + (lamb*dist_score) + + if (score > highscore): + highscore = score + best_params = split_line[0] + #if best + #for line + final_params = best_params[1:-1].split(',') + + p_list = [] + q_list = [] + for param in final_params: + p_list.append(A["param_p"][int(param)]) + q_list.append(A["param_q"][int(param)]) + + print("\nlambda = ", lamb) + print("p-list = ", p_list) + print("q-list = ", q_list) + + +def write_scores(A, scores_file, partitions, bayesopts, log_space=True): + f1_score_list = [] + dist_score_list = [] + + pqs = itertools.combinations(range(bayesopts), partitions) + + with open (scores_file, 'w') as f: + writer = csv.writer(f, delimiter=';') + + for pq in pqs: + f1_score = 0 + dist_score = 0 + + for i in range(partitions): # partitions = len(pq) #by definition + f1_score += A["mean_test_score"][pq[i]] + for j in range(i+1, partitions): + if log_space: + print("log") + dist_score += np.sqrt((np.log(A["param_p"][pq[i]]) - np.log(A["param_p"][pq[j]]))**2 + (np.log(A["param_q"][pq[i]]) - np.log(A["param_q"][pq[j]]))**2) + else: + dist_score += np.sqrt((A["param_p"][pq[i]] - A["param_p"][pq[j]])**2 + (A["param_q"][pq[i]] - A["param_q"][pq[j]])**2) + print("else") + #if/else + #for j + #for i + + f1_score_list.append(f1_score) + dist_score_list.append(dist_score) + + row = [pq, f1_score, dist_score] + + writer.writerow(row) + #for pq + #with open + + dist_mean = np.mean(dist_score_list) + dist_std = np.std(dist_score_list) + + f1_mean = np.mean(f1_score_list) + f1_std = np.std(f1_score_list) + + lamb_mean = f1_mean / dist_mean + lamb_std = f1_std / dist_std + + print("\n\ndist-scores:") + print("The mean score is ", dist_mean) + print("The standard deviation is ", dist_std) + + print("\n\nF1-scores:") + print("The mean score is ", np.mean(f1_mean)) + print("The standard deviation is ", np.std(f1_std)) + + print("lambda_mean = ", lamb_mean) + print("lambda_std = ", lamb_std) + + return lamb_mean, lamb_std + + +if __name__ == "__main__": + args = parse_args() + + assert (args.read or args.write or args.partitions == 1), "You must specify to read or write" + + data_set = args.dataset + + pkl_file = args.pkl + scores_file = args.scores + + if data_set != None: + scores_file = scores_file [:-4] # -= ".csv" + scores_file += "_" + str(args.partitions) + "_" + data_set + ".csv" + pkl_file = data_set + "_" + args.pkl + + if args.dir != None: + if args.dir[-1] == '/': + scores_file = args.dir + scores_file + pkl_file = args.dir + pkl_file + else: + scores_file = args.dir + "/" + scores_file + pkl_file = args.dir + "/" + pkl_file + + A = np.load(pkl_file, allow_pickle=True) + + + if args.partitions == 1: + find_best(A, args.bayesopts) + + else: + if args.write: + lamb_mean, lamb_std = write_scores(A, scores_file, args.partitions, args.bayesopts, log_space=args.logspace) + read_scores(A, scores_file, lamb_mean) + read_scores(A, scores_file, lamb_std) + + elif args.read: + assert (args.lamb > -0.001), "Please specify a valid lamb!" + read_scores(A, scores_file, args.lamb) + + + + + + + +