diff --git a/.gitignore b/.gitignore index 0a8d3539..cdeb645d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,13 @@ *.class +*.swp +.classpath +.project +.settings # Package Files # *.jar *.war -*.ear \ No newline at end of file +*.ear +.idea +target +walkmanager.log diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..9b259bdf --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index d967f9b6..95aaed29 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,12 @@ # GraphChi-java -Version 0.2 - +Version 0.2.2 ## News * Performance has been improved by parallelizing shard loading better (Oct 22, 2013) -* GraphChi was moved to GitHub from Google Code (July 24). Please report/fix any broken links. * GraphChi's Java version has a new cool random walk simulation engine: https://github.com/GraphChi/graphchi-java/wiki/Personalized-Pagerank-with-DrunkardMob - - - - + # Introduction Project for developing the Java version of GraphChi ( http://www.graphchi.org ), the disk-based graph computation engine. To learn more about GraphChi, visit the C++ version's project page: https://github.com/GraphChi/graphchi-cpp @@ -21,7 +16,21 @@ Project for developing the Java version of GraphChi ( http://www.graphchi.org ), ### How to use -Read the README.txt for information on how to build and run the example applications. You are going to need [Maven](http://maven.apache.org/download.cgi) for building. +Read the README.txt for information on how to build and run the example applications. You are going to need [Maven](http://maven.apache.org/download.cgi) or [sbt](http://www.scala-sbt.org/) for building. + +graphchi-java is hosted in the maven central repository, so you can include it as a managed dependency in your maven or sbt builds. For sbt, include the following line in your `build.sbt`: + +`libraryDependencies += "org.graphchi" %% "graphchi-java" % "0.2.2"` + +For maven, include the following in ``: + +``` + + org.graphchi + graphchi-java_2.11 + 0.2.2 + +``` It is a very good idea to study the example applications carefully. There are currently three example applications in the package **edu.cmu.graphchi.apps**: * [PageRank](https://github.com/GraphChi/graphchi-java/tree/master/src/main/java/edu/cmu/graphchi/apps/Pagerank.java) for computing the famous [PageRank](http://en.wikipedia.org/wiki/PageRank) ranking @@ -132,8 +141,3 @@ Java and .NET applications. Take a look at YourKit's leading software products: akyrola@cs.cmu.edu - - - -[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/GraphChi/graphchi-java/trend.png)](https://bitdeli.com/free "Bitdeli Badge") - diff --git a/build.sbt b/build.sbt new file mode 100644 index 00000000..b9e0e6fa --- /dev/null +++ b/build.sbt @@ -0,0 +1,54 @@ +organization := "org.graphchi" + +name := "graphchi-java" + +version := "0.2.2" + +scalaVersion := "2.11.2" + +crossScalaVersions := Seq("2.11.2", "2.10.3") + +javaSource in Test := baseDirectory.value / "test" + +libraryDependencies ++= Seq( + "com.yammer.metrics" % "metrics-core" % "2.2.0", + "mysql" % "mysql-connector-java" % "5.1.6", + "org.apache.pig" % "pig" % "0.10.0", + "org.apache.hadoop" % "hadoop-core" % "0.20.2", + "org.apache.commons" % "commons-math" % "2.1", + "commons-cli" % "commons-cli" % "1.2", + "com.novocode" % "junit-interface" % "0.11" % "test", + "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", + "org.scalatest" %% "scalatest" % "2.2.1" % "test" +) + +publishMavenStyle := true + +pomIncludeRepository := { _ => false } + +publishTo := { + val nexus = "https://oss.sonatype.org/" + if (isSnapshot.value) + Some("snapshots" at nexus + "content/repositories/snapshots") + else + Some("releases" at nexus + "service/local/staging/deploy/maven2") +} + +publishArtifact in Test := false + +licenses := Seq("Apache-2.0" -> url("http://www.opensource.org/licenses/Apache-2.0")) + +homepage := Some(url("http://github.com/GraphChi/graphchi-java")) + +pomExtra := ( + + git@github.com:GraphChi/graphchi-java.git + scm:git:git@github.com:GraphChi/graphchi-java.git + + + + matt-gardner + Matt Gardner + http://cs.cmu.edu/~mg1 + + ) diff --git a/pom.xml b/pom.xml index 6e07afc6..52d0b868 100644 --- a/pom.xml +++ b/pom.xml @@ -1,140 +1,182 @@ - 4.0.0 + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 - groupId - graphchi-java - 0.2 + org.graphchi + graphchi-java + 0.2.2 - + + + github + + - SonatypeNexusSnapshots - Sonatype Nexus Snapshots - https://oss.sonatype.org/content/repositories/snapshots/ - true + SonatypeNexusSnapshots + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + true - - scala-tools.org - Scala-tools Maven2 Repository - http://scala-tools.org/repo-releases + + scala-tools.org + Scala-tools Maven2 Repository + http://scala-tools.org/repo-releases - - - com.yammer.metrics - metrics-core - 2.2.0 + + + internal.repo + Temporary Staging Repository + file:://${project.build.directory}/mvn-repo + + + + + + com.yammer.metrics + metrics-core + 2.2.0 - - - org.scala-lang - scala-library - 2.9.0-1 - - - mysql - mysql-connector-java - 5.1.6 - - - junit - junit - 4.10 - jar - test - true - - - org.apache.pig - pig - compile - 0.10.0 - - - org.apache.hadoop - hadoop-core - 0.20.2 - - - org.apache.commons - commons-math - 2.0 - - - org.apache.commons - commons-math - 2.1 - - - commons-cli - commons-cli - 1.2 - - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - + + + org.scala-lang + scala-library + 2.9.0-1 + + + mysql + mysql-connector-java + 5.1.6 + + + junit + junit + 4.10 + jar + test + true + + + org.apache.pig + pig + compile + 0.10.0 + + + org.apache.hadoop + hadoop-core + 0.20.2 + + + org.apache.commons + commons-math + 2.0 + + + org.apache.commons + commons-math + 2.1 + + + commons-cli + commons-cli + 1.2 + + - - compile - - compile - - compile - - - test-compile - - testCompile - - test-compile - - - process-resources - - compile - - - - + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + - - maven-compiler-plugin - 2.3.2 + + compile + + compile + + compile + + + test-compile + + testCompile + + test-compile + + + process-resources + + compile + + + + - - 1.6 - 1.6 - - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.2.2 - - - - - src/main/assembly/assembly.xml - - - + + maven-compiler-plugin + 2.3.2 - - target/test-classes - test - + + 1.6 + 1.6 + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.2.2 + + + + + src/main/assembly/assembly.xml + + + + + maven-deploy-plugin + 2.8.1 + + internal.repo::default::file://${project.build.directory}/mvn-repo + + + + com.github.github + site-maven-plugin + 0.10 + + Maven artifacts for ${project.version} + true + ${project.build.directory}/mvn-repo + refs/heads/mvn-repo + **/* + graphchi-java + matt-gardner + + + + + + site + + deploy + + + + + target/test-classes + test + diff --git a/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java b/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java index a93c7ae1..4e23623a 100644 --- a/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java +++ b/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java @@ -83,7 +83,7 @@ public void update(ChiVertex vertex, GraphChiContext context) FloatPair curValue = vertex.getValue(); if (side == LEFTSIDE && vertex.numOutEdges() > 0) { - curValue.first = newValue; + curValue = new FloatPair(newValue, curValue.second); synchronized (this) { leftSideSqrSum += newValue * newValue; } @@ -140,8 +140,7 @@ public void endIteration(GraphChiContext ctx) { leftNorm = (float) Math.sqrt(leftSideSqrSum); VertexTransformer.transform((int) ctx.getNumVertices(), graphName, new FloatPairConverter(), new VertexTransformCallBack() { public FloatPair map(int vertexId, FloatPair value) { - value.first /= leftNorm; - return value; + return new FloatPair(value.first/leftNorm, value.second); } }); diff --git a/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java b/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java index 03bd6d3e..2b4fb2d0 100644 --- a/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java +++ b/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java @@ -77,7 +77,7 @@ public void update(ChiVertex vertex, GraphChiContext context) FloatPair curValue = vertex.getValue(); if (side == LEFTSIDE && vertex.numOutEdges() > 0) { - curValue.first = newValue; + curValue = new FloatPair(newValue, curValue.second); // Write value to outedges float broadcastValue = newValue / vertex.numOutEdges(); for(int i=0; i < vertex.numOutEdges(); i++) { diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java new file mode 100644 index 00000000..a44abe34 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -0,0 +1,132 @@ +package edu.cmu.graphchi.apps; + +import edu.cmu.graphchi.*; +import edu.cmu.graphchi.datablocks.FloatConverter; +import edu.cmu.graphchi.datablocks.FloatPair; +import edu.cmu.graphchi.datablocks.FloatPairConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.io.CompressedIO; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; +import edu.cmu.graphchi.preprocessing.VertexProcessor; +import edu.cmu.graphchi.util.IdFloat; +import edu.cmu.graphchi.util.Toplist; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.TreeSet; +import java.util.logging.Logger; + +/** + * Weighted Pagerank. + * Contributed by Jerry Ye, 2014. + */ +public class WeightedPagerank implements GraphChiProgram { + + private static Logger logger = ChiLogger.getLogger("weighted_pagerank"); + + public void update(ChiVertex vertex, GraphChiContext context) { + if (context.getIteration() == 0) { + /* Initialize on first iteration */ + vertex.setValue(1.0f); + } else { + /* On other iterations, set my value to be the weighted + average of my in-coming neighbors pageranks. + */ + float sum = 0.f; + for(int i=0; i(graphName, numShards, new VertexProcessor() { + public Float receiveVertexValue(int vertexId, String token) { + return (token == null ? 0.f : Float.parseFloat(token)); + } + }, new EdgeProcessor() { + public FloatPair receiveEdge(int from, int to, String token) { + return new FloatPair(Float.parseFloat(token), 0.f); + } + }, new FloatConverter(), new FloatPairConverter()); + } + + /** + * Usage: java edu.cmu.graphchi.demo.PageRank graph-name num-shards filetype(edgelist|adjlist) + * For specifying the number of shards, 20-50 million edges/shard is often a good configuration. + */ + public static void main(String[] args) throws Exception { + String baseFilename = args[0]; + int nShards = Integer.parseInt(args[1]); + String fileType = (args.length >= 3 ? args[2] : null); + + CompressedIO.disableCompression(); + + /* Create shards */ + FastSharder sharder = createSharder(baseFilename, nShards); + if (baseFilename.equals("pipein")) { // Allow piping graph in + sharder.shard(System.in, fileType); + } else { + if (!new File(ChiFilenames.getFilenameIntervals(baseFilename, nShards)).exists()) { + sharder.shard(new FileInputStream(new File(baseFilename)), fileType); + } else { + logger.info("Found shards -- no need to preprocess"); + } + } + + /* Run GraphChi */ + GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); + engine.setEdataConverter(new FloatPairConverter()); + engine.setVertexDataConverter(new FloatConverter()); + engine.setModifiesInedges(false); // Important optimization + + engine.run(new WeightedPagerank(), 4); + + logger.info("Ready."); + + /* Output results */ + int i = 0; + VertexIdTranslate trans = engine.getVertexIdTranslate(); + TreeSet top20 = Toplist.topListFloat(baseFilename, engine.numVertices(), 20); + for(IdFloat vertexRank : top20) { + System.out.println(++i + ": " + trans.backward(vertexRank.getVertexId()) + " = " + vertexRank.getValue()); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java b/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java new file mode 100644 index 00000000..ca7fbee5 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java @@ -0,0 +1,150 @@ +package edu.cmu.graphchi.apps.kcore; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.logging.Logger; + +import edu.cmu.graphchi.ChiFilenames; +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.GraphChiProgram; +import edu.cmu.graphchi.datablocks.IntConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.io.CompressedIO; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexProcessor; + +/** + * Converts an indirected input graph into a directed by checking that each edge has a complimentary edge + * in the opposite direction, and adding those complimentary edges when applicable. + * + * Note: You may change output and input directory path based on your needs. + * + * @author Wissam Khaouid, wissamk@uvic.ca, 2014 + */ + +public class GraphTransformer implements GraphChiProgram { + + protected static int nEdgesAdded = 0; + + protected static BufferedWriter bw; + + private static Logger logger = ChiLogger.getLogger("GraphConverter"); + + public static void startWriting(File file, boolean append) throws IOException { + FileWriter fw = new FileWriter(file, append); + bw = new BufferedWriter(fw); + } + + public static void stopWriting() throws IOException { + bw.close(); + } + + public void update(ChiVertex vertex, GraphChiContext context) { + ArrayList outNeighbors = new ArrayList(); + + for(int i = 0; i < vertex.numOutEdges(); i++) { + outNeighbors.add(vertex.outEdge(i).getVertexId()); + } + + for(int i = 0; i < vertex.numInEdges(); i++) { + if (!outNeighbors.contains(vertex.inEdge(i).getVertexId()) ) { + try { + bw.write("\n" + vertex.getId() + "\t" + vertex.inEdge(i).getVertexId()); + nEdgesAdded ++; + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + public void beginIteration(GraphChiContext ctx) {} + + public void endIteration(GraphChiContext ctx) {} + + public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + protected static FastSharder createSharder(String graphName, int numShards) throws IOException { + return new FastSharder(graphName, numShards, new VertexProcessor() { + public Integer receiveVertexValue(int vertexId, String token) { + return 0; + } + }, new EdgeProcessor() { + public Integer receiveEdge(int from, int to, String token) { + return 0; + } + }, new IntConverter(), new IntConverter()); + } + + public static void main(String[] args) throws IOException { + + /** + * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=8 edu.cmu.graphchi.apps.kcore.GraphTransformer filename nbrOfShards filetype memoryBudget + */ + + String inputDirectory = "./datasets/"; + String outputDirectory = "./output/"; + + String fileName = args[0]; + int nShards = Integer.parseInt(args[1]); + String fileType = args[2]; + int memBudget = (args.length >= 4 ? Integer.parseInt(args[3]) : null); + + CompressedIO.disableCompression(); + + String inputFilePath = inputDirectory + fileName; + + /* Making shards */ + FastSharder sharder = createSharder(inputFilePath, nShards); + if (inputFilePath.equals("pipein")) { // Allow piping graph in + sharder.shard(System.in, fileType); + } else { + if (!new File(ChiFilenames.getFilenameIntervals(inputFilePath, nShards)).exists()) { + sharder.shard(new FileInputStream(new File(inputFilePath)), fileType); + } else { + logger.info("Found shards -- no need to preprocess"); + } + } + + /* Complementary edges will be appended to the input graph file throughout execution */ + startWriting(new File(inputFilePath), true); + + /* Running GraphChi */ + GraphChiEngine engine = new GraphChiEngine(inputFilePath, nShards); + engine.setMemoryBudgetMb(memBudget); + engine.setSkipZeroDegreeVertices(true); + engine.setEnableScheduler(true); + engine.setEdataConverter(new IntConverter()); + engine.setVertexDataConverter(new IntConverter()); + + engine.run(new GraphTransformer(), 1); + + stopWriting(); + + /* Write report file */ + startWriting(new File(outputDirectory + "gtransformer-report-" + fileName), false); + bw.write("Total edges added: " + nEdgesAdded + "\n"); + stopWriting(); + + logger.info("Success!"); + + } + +} + + + diff --git a/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java b/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java new file mode 100644 index 00000000..f6a313ec --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java @@ -0,0 +1,268 @@ +package edu.cmu.graphchi.apps.kcore; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.logging.Logger; + +import edu.cmu.graphchi.ChiFilenames; +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.GraphChiProgram; +import edu.cmu.graphchi.datablocks.IntConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.io.CompressedIO; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; +import edu.cmu.graphchi.preprocessing.VertexProcessor; +import edu.cmu.graphchi.util.IdInt; +import edu.cmu.graphchi.util.Toplist; + +/** + * K-core decomposition algorithm + * + * Outputs: a file containing key-value pairs: vertexId, coreness + * + * How does it work ? + * 1 - Initializes vertex values to their degrees then those values are communicated to neighbors. + * 2 - for each vertex v, an upper-bound is computed on its coreness based on the values received from neighbors. + * 3 - if the upper-bound is better than its current value, v updates its value with the upper-bound. + * 4 - Steps 2 and 3 are repeated until no more value updates are occurring. + * + * For correct results, run your input graph through GraphTransformer first. + * Also, make sure to delete the preprocessed shard files created by GraphTransformer prior to running KCoreDecomposer. + * + * KCoreDecomposer is inspired from the algorithm presented in the following paper: + * Distributed K-Core Decomposition + * Alberto Montresor, Francesco De Pellegrini, Daniele Miorandi + * http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6189336 + * + * Note: You may change output and input directory path based on your needs. + * + * @author Wissam Khaouid, wissamk@uvic.ca, 2014 + */ + +public class KCoreDecomposer implements GraphChiProgram { + + public static final int INFINITY = Integer.MAX_VALUE; + + protected int vertexValuesUpdated; + protected static int nVertexes = 0; + + private static int nIterations = 0; + protected static BufferedWriter bw; + + private static Logger logger = ChiLogger.getLogger("kCoreDecomposition"); + + public static void startWriting(File file, boolean append) throws IOException { + FileWriter fw = new FileWriter(file, append); + bw = new BufferedWriter(fw); + } + + public static void stopWriting() throws IOException { + bw.close(); + } + + public void update(ChiVertex vertex, GraphChiContext context) { + + int iteration = context.getIteration(); + int numOutEdges = vertex.numOutEdges(); + + if (iteration == 0) { + vertex.setValue(numOutEdges); + broadcastValue(vertex, numOutEdges); + nVertexes++; + vertexValuesUpdated++; + } else { + int topDrawer = vertex.getValue() + 1, + topDrawerCount = 0, + localEstimate = 0; + + SortedMap inEdgeValueCounts = + Collections.synchronizedSortedMap( + new TreeMap(Collections.reverseOrder())); + + for(int i = 0; i <= vertex.numOutEdges(); i++) { + inEdgeValueCounts.put(i, 0); + } + + for(int i = 0; i < vertex.numInEdges(); i++) { + int inEdgeValue = vertex.inEdge(i).getValue(); + if( inEdgeValue >= topDrawer ) { + topDrawerCount ++; + } else { + try { + int currentValue = inEdgeValueCounts.get(inEdgeValue); + inEdgeValueCounts.put(inEdgeValue, currentValue + 1); + } + catch(Exception e) { + e.printStackTrace(); + System.exit(0); + } + + } + } + + inEdgeValueCounts.put(topDrawer, topDrawerCount); + localEstimate = computeLeastValue(inEdgeValueCounts); + + if( localEstimate < vertex.getValue() ) { + vertex.setValue(localEstimate); + broadcastValue(vertex, localEstimate); + vertexValuesUpdated ++; + } + } + + context.getScheduler().addTask(vertex.getId()); + + } + + /** + * Computes the greatest x among a list of values, such that at least x values are greater than x + * For now, the array is instantiated and filled up elsewhere + */ + public int computeLeastValue(SortedMap map) { + int cumulCount = 0; + int key, count; + for(Map.Entry entry : map.entrySet()) { + key = entry.getKey(); + count = entry.getValue(); + cumulCount += count; + if(cumulCount >= key) { + return key; + } + } + return 1; + } + + /** + * Broadcasts a value to the neighbors by writing it to the out-edges + */ + + public void broadcastValue(ChiVertex vertex, int value) { + for(int i = 0; i < vertex.numOutEdges(); i++) { + vertex.outEdge(i).setValue(value); + } + } + + /** + * Invoked with the start of a new iteration + */ + public void beginIteration(GraphChiContext ctx) { + vertexValuesUpdated = 0; + } + + /** + * Invoked at the end of every iteration + */ + public void endIteration(GraphChiContext ctx) { + System.out.println(vertexValuesUpdated + " updates."); + System.out.println("iteration " + ctx.getIteration() + " ends."); + + nIterations ++; + if( vertexValuesUpdated == 0 ) { + System.out.println("no updates in this round. No more rounds .. KCore-montresor terminates!"); + ctx.getScheduler().removeAllTasks(); + } + } + + public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + protected static FastSharder createSharder(String graphName, int numShards) throws IOException { + return new FastSharder(graphName, numShards, new VertexProcessor() { + public Integer receiveVertexValue(int vertexId, String token) { + return 0; + } + }, new EdgeProcessor() { + public Integer receiveEdge(int from, int to, String token) { + return 0; + } + }, new IntConverter(), new IntConverter()); + } + + public static void main(String[] args) throws IOException { + + /** Run from command line (Example) + * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=4 edu.cmu.graphchi.apps.kcore.KCoreDecomposition filename nbrOfShards filetype memoryBudget + * + * Assuming GraphChi jar files are saved in ./gchi-libs/ + */ + + String inputDirectory = "./datasets/"; + String outputDirectory = "./output/"; + + String fileName = args[0]; + int nShards = Integer.parseInt(args[1]); + String fileType = args[2]; + int memBudget = (args.length >= 4 ? Integer.parseInt(args[3]) : null); + + CompressedIO.disableCompression(); + + String inputFilePath = inputDirectory + fileName; + + /* Preprocessing graph : Making shards */ + + FastSharder sharder = createSharder(inputFilePath, nShards); + if (inputFilePath.equals("pipein")) { // Allow piping graph in + sharder.shard(System.in, fileType); + } else { + if (!new File(ChiFilenames.getFilenameIntervals(inputFilePath, nShards)).exists()) { + sharder.shard(new FileInputStream(new File(inputFilePath)), fileType); + } else { + logger.info("Found shards -- no need to preprocess"); + } + } + + /* Running GraphChi */ + GraphChiEngine engine = + new GraphChiEngine(inputFilePath, nShards); + engine.setMemoryBudgetMb(memBudget); + engine.setSkipZeroDegreeVertices(true); + engine.setEnableScheduler(true); + engine.setEdataConverter(new IntConverter()); + engine.setVertexDataConverter(new IntConverter()); + + engine.run(new KCoreDecomposer(), INFINITY); + + logger.info("Ready."); + + /* Outputting Core Values */ + startWriting(new File(outputDirectory + "out-cores-" + fileName), false); + bw.write(nVertexes + "\n"); + + VertexIdTranslate trans = engine.getVertexIdTranslate(); + TreeSet topToBottom = Toplist.topListInt(inputFilePath, + engine.numVertices(), engine.numVertices()); + + for(IdInt walker : topToBottom) { + float coreValue = walker.getValue(); + bw.write(trans.backward(walker.getVertexId()) + ", " + String.valueOf((int)coreValue) + "\n"); + } + + stopWriting(); + + System.out.println("Vertexes Processed: " + engine.numVertices()); + System.out.println("Edges Processed: " + engine.numEdges()) ; + + System.out.println("nIterations: " + nIterations); + System.out.println("Success!"); + + } + +} diff --git a/src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java new file mode 100644 index 00000000..f651b47c --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java @@ -0,0 +1,165 @@ +package edu.cmu.graphchi.apps.pig; + +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.GraphChiProgram; +import edu.cmu.graphchi.datablocks.FloatConverter; +import edu.cmu.graphchi.datablocks.FloatPair; +import edu.cmu.graphchi.datablocks.FloatPairConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.hadoop.PigGraphChiBase; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexProcessor; +import edu.cmu.graphchi.vertexdata.VertexAggregator; +import edu.cmu.graphchi.vertexdata.VertexIdValue; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; + +import java.io.IOException; +import java.util.Iterator; +import java.util.logging.Logger; + +/** + * Example application: WeightedPageRank (http://en.wikipedia.org/wiki/Pagerank) + * Iteratively computes a pagerank for each vertex by averaging the pageranks + * of in-neighbors pageranks. Uses edge weights to implemented weighted version of pagerank. + * + * This version can be used with Pig in a Hadoop cluster. + * + * Example PIG script for running this: + * + *
+ *     REGISTER graphchi-java-0.2-jar-with-dependencies.jar;
+ *
+ *     pagerank = LOAD 'graphs/soc-LiveJournal1.txt' USING edu.cmu.graphchi.demo.pig.PigPagerank as (vertex:int, rank:float);
+ *
+ *     STORE pagerank INTO 'pagerank-livejournal';
+ * 
+ * + * (To get the livejournal graph, visit: http://snap.stanford.edu/data/soc-LiveJournal1.html) + * + * @see edu.cmu.graphchi.hadoop.PigGraphChiBase + * @author Jerry Ye + */ +public class PigWeightedPagerank extends PigGraphChiBase implements GraphChiProgram { + + private static Logger logger = Logger.getLogger("weighted_pagerank"); + + public void update(ChiVertex vertex, GraphChiContext context) { + if (context.getIteration() == 0) { + /* Initialize on first iteration */ + vertex.setValue(1.0f); + } else { + /* On other iterations, set my value to be the weighted + average of my in-coming neighbors pageranks. + */ + float sum = 0.f; + for(int i=0; i> vertexIterator; + + + @Override + /** + * Pig column names + */ + protected String getSchemaString() { + return "(vertex:int, weight:float)"; + } + + @Override + protected int getNumShards() { + return 12; // Unfortunately, currently hard-coded. + } + + @Override + /** + * Runs the GraphChi program + */ + protected void runGraphChi() throws Exception { + /* Run GraphChi */ + GraphChiEngine engine = new GraphChiEngine(getGraphName(), getNumShards()); + engine.setEdataConverter(new FloatPairConverter()); + engine.setVertexDataConverter(new FloatConverter()); + engine.setModifiesInedges(false); // Important optimization + + engine.run(this, 4); + + logger.info("Ready."); + + /* Create iterator for the vertex values */ + this.vertexIterator = VertexAggregator.vertexIterator(engine.numVertices(), getGraphName(), new FloatConverter(), + engine.getVertexIdTranslate()); + } + + @Override + /** + * Constructs "sharder", which takes an edge list and + * converts it to internal binary representation of GraphChi. + */ + protected FastSharder createSharder(String graphName, int numShards) throws IOException { + return new FastSharder(graphName, numShards, new VertexProcessor() { + public Float receiveVertexValue(int vertexId, String token) { + return (token == null ? 0.0f : Float.parseFloat(token)); + } + }, new EdgeProcessor() { + public FloatPair receiveEdge(int from, int to, String token) { + return new FloatPair(Float.parseFloat(token), 0.f); + } + }, new FloatConverter(), new FloatPairConverter()); + } + + @Override + /** + * Generates the output to the Pig script, tuple by tuple + */ + protected Tuple getNextResult(TupleFactory tupleFactory) throws ExecException { + if (vertexIterator.hasNext()) { + Tuple t = tupleFactory.newTuple(2); + VertexIdValue val = vertexIterator.next(); + t.set(0, val.getVertexId()); + t.set(1, val.getValue()); + return t; + } else { + return null; + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java b/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java index e2a5aaf3..61be9745 100644 --- a/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java +++ b/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java @@ -7,7 +7,13 @@ import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; +import edu.cmu.graphchi.walks.IntDrunkardContext; +import edu.cmu.graphchi.walks.IntDrunkardFactory; +import edu.cmu.graphchi.walks.IntWalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.WeightedHopper; +import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; import org.apache.commons.cli.*; @@ -39,7 +45,8 @@ public class PersonalizedPageRank implements WalkUpdateFunction(baseFilename, nShards); + this.drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards, + new IntDrunkardFactory()); this.companionUrl = companionUrl; this.firstSource = firstSource; @@ -55,7 +62,7 @@ private void execute(int numIters) throws Exception { */ RemoteDrunkardCompanion companion; if (companionUrl.equals("local")) { - companion = new DrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); + companion = new IntDrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); } else { companion = (RemoteDrunkardCompanion) Naming.lookup(companionUrl); } @@ -91,10 +98,12 @@ private void execute(int numIters) throws Exception { * WalkUpdateFunction interface implementations */ @Override - public void processWalksAtVertex(int[] walks, + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, - DrunkardContext drunkardContext, + DrunkardContext drunkardContext_, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; int numWalks = walks.length; int numOutEdges = vertex.numOutEdges(); diff --git a/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java b/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java index b2c40a81..00bcbaa4 100644 --- a/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java +++ b/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java @@ -10,9 +10,13 @@ import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; +import edu.cmu.graphchi.walks.IntDrunkardContext; +import edu.cmu.graphchi.walks.IntDrunkardFactory; +import edu.cmu.graphchi.walks.IntWalkArray; +import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; import edu.cmu.graphchi.walks.WeightedHopper; -import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; +import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import org.apache.commons.cli.*; @@ -51,11 +55,11 @@ protected void execute() throws Exception { logger.info("Computed ALS, now random walks"); /* Initialize drunkardmob */ - DrunkardMobEngine drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards); + DrunkardMobEngine drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards, new IntDrunkardFactory()); DrunkardJob positiveJob = drunkardMobEngine.addJob("positive", EdgeDirection.IN_AND_OUT_EDGES, - new PositiveWalkUpdate(), new DrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); + new PositiveWalkUpdate(), new IntDrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); DrunkardJob negativeJob = drunkardMobEngine.addJob("negative", EdgeDirection.IN_AND_OUT_EDGES, - new NegativeWalkUpdate(), new DrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); + new NegativeWalkUpdate(), new IntDrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); drunkardMobEngine.setEdataConverter(new FloatConverter()); @@ -117,14 +121,15 @@ protected void execute() throws Exception { protected static class PositiveWalkUpdate implements WalkUpdateFunction { @Override - public void processWalksAtVertex(int[] walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { - hopToHighRatings(walks, vertex, drunkardContext, randomGenerator); + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + hopToHighRatings(walks, vertex, (IntDrunkardContext)drunkardContext, randomGenerator); } // Have some weight for <= 3 ratings to avoid divide by zeroes. private static final float weightedRating[] = {0.0f, 0.00001f, 0.00001f, 0.0001f, 100.0f, 1000.0f}; - protected static void hopToHighRatings(int[] walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { + protected static void hopToHighRatings(int[] walks, ChiVertex vertex, IntDrunkardContext drunkardContext, Random randomGenerator) { int[] hops = WeightedHopper.generateRandomHopsAliasMethod(randomGenerator, vertex, walks.length, EdgeDirection.IN_AND_OUT_EDGES, new WeightedHopper.EdgeWeightMap() { @@ -164,7 +169,9 @@ public int[] getNotTrackedVertices(ChiVertex vertex) { protected class NegativeWalkUpdate extends PositiveWalkUpdate { @Override - public void processWalksAtVertex(int[] walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, DrunkardContext drunkardContext_, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; // Movie vertex - do same as the positive if (vertex.numInEdges() > 0 || drunkardContext.getIteration() > 0) { hopToHighRatings(walks, vertex, drunkardContext, randomGenerator); diff --git a/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java b/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java index a7496fd2..f33737e7 100644 --- a/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java +++ b/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java @@ -11,8 +11,13 @@ import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; +import edu.cmu.graphchi.walks.IntDrunkardContext; +import edu.cmu.graphchi.walks.IntDrunkardFactory; +import edu.cmu.graphchi.walks.IntWalkArray; +import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; -import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; +import edu.cmu.graphchi.walks.WeightedHopper; +import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; import org.apache.commons.cli.*; @@ -65,7 +70,8 @@ public class TwitterWTF implements WalkUpdateFunction { public TwitterWTF(String companionUrl, String baseFilename, int nShards, int firstSource, int numSources, int walksPerSource) throws Exception{ this.baseFilename = baseFilename; - this.drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards); + this.drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards, + new IntDrunkardFactory()); this.numShards = nShards; this.companionUrl = companionUrl; @@ -82,7 +88,7 @@ private void execute(int numIters) throws Exception { */ final RemoteDrunkardCompanion companion; if (companionUrl.equals("local")) { - companion = new DrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); + companion = new IntDrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); } else { companion = (RemoteDrunkardCompanion) Naming.lookup(companionUrl); } @@ -206,10 +212,12 @@ private void computeRecs(RemoteDrunkardCompanion companion, int circleOfTrustSiz * WalkUpdateFunction interface implementations */ @Override - public void processWalksAtVertex(int[] walks, + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, - DrunkardContext drunkardContext, + DrunkardContext drunkardContext_, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; int numWalks = walks.length; int numOutEdges = vertex.numOutEdges(); diff --git a/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java b/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java index aba68569..dacec76a 100644 --- a/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java +++ b/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java @@ -3,11 +3,12 @@ /** * Represents a 2-tuple of floats. * Access the tuple elements by pair.first, pair.second. + * * @author Aapo Kyrola */ public class FloatPair { - public float first; - public float second; + public final float first; + public final float second; public FloatPair(float first, float second) { this.first = first; diff --git a/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java b/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java index 8cbad3fb..aaca8e3b 100644 --- a/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java +++ b/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java @@ -604,7 +604,7 @@ public void setValue(byte[] array, Object val) { File blockFile = new File(ChiFilenames.getFilenameShardEdataBlock(edataFileName, blockIdx, blockSize)); OutputStream blockOs = (CompressedIO.isCompressionEnabled() ? new DeflaterOutputStream(new BufferedOutputStream(new FileOutputStream(blockFile))) : - new FileOutputStream(blockFile)); + new FileOutputStream(blockFile)); long len = Math.min(blockSize, edatasize - idx); byte[] block = new byte[(int)len]; @@ -657,11 +657,13 @@ private static int partition(long arr[], byte[] values, int sizeOf, int left, in } static void quickSort(long arr[], byte[] values, int sizeOf, int left, int right) { - int index = partition(arr, values, sizeOf, left, right); - if (left < index - 1) - quickSort(arr, values, sizeOf, left, index - 1); - if (index < right) - quickSort(arr, values, sizeOf, index, right); + if (left < right) { + int index = partition(arr, values, sizeOf, left, right); + if (left < index - 1) + quickSort(arr, values, sizeOf, left, index - 1); + if (index < right) + quickSort(arr, values, sizeOf, index, right); + } } @@ -752,7 +754,7 @@ public void shard(InputStream inputStream, GraphInputFormat format) throws IOExc /* Vertex - ids on the right side of the bipartite graph have id numLeft + originalId */ try { String lastTok = tok[tok.length - 1]; - this.addEdge(Integer.parseInt(tok[0]) - 1, numLeft + Integer.parseInt(tok[1]), lastTok); + this.addEdge(Integer.parseInt(tok[0]) - 1, numLeft + Integer.parseInt(tok[1]) - 1, lastTok); } catch (NumberFormatException nfe) { logger.severe("Could not parse line: " + ln); throw nfe; diff --git a/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java b/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java index 378bb839..2f4f12a8 100644 --- a/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java +++ b/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java @@ -157,7 +157,7 @@ public String recommendFriends(int vertexId, int fanOut) throws IOException { private String namify(Integer value) throws IOException { File f = new File(baseFilename + "_names.dat"); if (!f.exists()) { - System.out.println("didn't find name file: " + f.getPath()); + // System.out.println("didn't find name file: " + f.getPath()); return value+""; } int i = value * 16; diff --git a/src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java b/src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java new file mode 100644 index 00000000..dfc37a18 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java @@ -0,0 +1,13 @@ +package edu.cmu.graphchi.walks; + +public class BucketsToSend { + public final int firstVertex; + public final WalkArray walks; + public final int length; + + BucketsToSend(int firstVertex, WalkArray walks, int length) { + this.firstVertex = firstVertex; + this.walks = walks; + this.length = length; + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java index 70fc9553..ab460f4e 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java @@ -22,33 +22,9 @@ public interface DrunkardContext { int getIteration(); - /** - * Moves walk to next vertex - * @param walk walk identified - * @param destinationVertex vertex id to move hte walk to - * @param trackBit set to true if this walk should be tracked, otherwise false - */ - void forwardWalkTo(int walk, int destinationVertex, boolean trackBit); - - void resetWalk(int walk, boolean trackBit); - - /** - * Reads the track-bit of a walk identifier. - * @param walk - * @return - */ - boolean getTrackBit(int walk); - - /** - * Returns true if walk was started from the vertex - */ - boolean isWalkStartedFromVertex(int walk); - /** * Object for translating from internal to original vertex ids * @return */ VertexIdTranslate getVertexIdTranslate(); - - void resetAll(int[] walks); } diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java index a85fc6ef..8865674b 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java @@ -1,32 +1,36 @@ package edu.cmu.graphchi.walks; -import com.yammer.metrics.Metrics; -import com.yammer.metrics.core.Timer; -import com.yammer.metrics.core.TimerContext; -import edu.cmu.graphchi.*; -import edu.cmu.graphchi.engine.VertexInterval; -import edu.cmu.graphchi.preprocessing.VertexIdTranslate; - import java.rmi.RemoteException; -import java.util.*; +import java.util.ArrayList; +import java.util.Random; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Logger; +import com.yammer.metrics.Metrics; +import com.yammer.metrics.core.Timer; +import com.yammer.metrics.core.TimerContext; + +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.engine.VertexInterval; + /** * Class to encapsulate the graphchi program running the show. * Due to several optimizations, it is quite complicated! */ -public class DrunkardDriver implements GrabbedBucketConsumer { +public abstract class DrunkardDriver implements GrabbedBucketConsumer { private WalkSnapshot curWalkSnapshot; - private final DrunkardJob job; - private static Logger logger = ChiLogger.getLogger("drunkard-driver"); + protected final DrunkardJob job; + protected static Logger logger = ChiLogger.getLogger("drunkard-driver"); - private LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); - private boolean finished = false; + protected LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); + protected AtomicBoolean finished = new AtomicBoolean(false); + protected AtomicLong pendingWalksToSubmit = new AtomicLong(0); private Thread dumperThread; - private AtomicLong pendingWalksToSubmit = new AtomicLong(0); WalkUpdateFunction callback; private final Timer purgeTimer = @@ -39,72 +43,21 @@ public class DrunkardDriver implements GrabbedBuck // Setup thread for sending walks to the companion (i.e tracker) // Launch a thread to send to the companion - dumperThread = new Thread(new Runnable() { - public void run() { - int[] walks = new int[256 * 1024]; - int[] vertices = new int[256 * 1024]; - int idx = 0; - - while(!finished || bucketQueue.size() > 0) { - BucketsToSend bucket = null; - try { - bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - } - if (bucket != null) { - pendingWalksToSubmit.addAndGet(-bucket.length); - for(int i=0; i= walks.length) { - try { - job.getCompanion().processWalks(walks, vertices); - } catch (Exception err) { - err.printStackTrace(); - } - idx = 0; - } - - } - } - } - - // Send rest - try { - int[] tmpWalks = new int[idx]; - int[] tmpVertices = new int[idx]; - System.arraycopy(walks, 0, tmpWalks, 0, idx); - System.arraycopy(vertices, 0, tmpVertices, 0, idx); - job.getCompanion().processWalks(tmpWalks, tmpVertices); - } catch (Exception err) { - err.printStackTrace(); - } - } - }); + dumperThread = new Thread(createDumperThread()); dumperThread.start(); } + protected abstract DumperThread createDumperThread(); + public DrunkardJob getJob() { return job; } - public void update(ChiVertex vertex, final GraphChiContext context, - final LocalWalkBuffer localBuf) { - + protected abstract DrunkardContext createDrunkardContext(int vertexId, GraphChiContext context, + LocalWalkBuffer localBuf); + public void update(ChiVertex vertex, + final GraphChiContext context, final LocalWalkBuffer localBuf) { try { // Flow control while (pendingWalksToSubmit.get() > job.getWalkManager().getTotalWalks() / 40) { @@ -116,7 +69,7 @@ public void update(ChiVertex vertex, final GraphCh } boolean firstIteration = (context.getIteration() == 0); - int[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); + WalkArray walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); // Very dirty memory management curWalkSnapshot.clear(vertex.getId()); @@ -130,61 +83,12 @@ public void update(ChiVertex vertex, final GraphCh job.getCompanion().setAvoidList(mySourceIdx, callback.getNotTrackedVertices(vertex)); } } - if (walksAtMe == null || walksAtMe.length == 0) { - return; - } + if (walksAtMe == null || walksAtMe.size() == 0) return; Random randomGenerator = localBuf.random; - final boolean isSource = job.getWalkManager().isSource(vertex.getId()); - final int mySourceIndex = (isSource ? job.getWalkManager().getVertexSourceIdx(vertex.getId()) : -1); - - callback.processWalksAtVertex(walksAtMe, vertex, new DrunkardContext() { - @Override - public boolean isSource() { - return isSource; - } - - @Override - public int sourceIndex() { - return mySourceIndex; - } - - @Override - public int getIteration() { - return context.getIteration(); - } - - @Override - public void forwardWalkTo(int walk, int destinationVertex, boolean trackBit) { - localBuf.add(WalkManager.sourceIdx(walk), destinationVertex, trackBit); - } - - @Override - public void resetWalk(int walk, boolean trackBit) { - forwardWalkTo(walk, job.getWalkManager().getSourceVertex(WalkManager.sourceIdx(walk)), false); - } - - @Override - public boolean getTrackBit(int walk) { - return WalkManager.hop(walk); - } - - @Override - public boolean isWalkStartedFromVertex(int walk) { - return mySourceIndex == WalkManager.sourceIdx(walk); - } - - @Override - public VertexIdTranslate getVertexIdTranslate() { - return getVertexIdTranslate(); - } - - @Override - public void resetAll(int[] walks) { - for(int w : walks) resetWalk(w, false); - } - }, randomGenerator); + DrunkardContext drunkardContext = createDrunkardContext(vertex.getId(), context, localBuf); + callback.processWalksAtVertex(walksAtMe, vertex, drunkardContext, randomGenerator); } catch (RemoteException re) { throw new RuntimeException(re); } @@ -195,9 +99,6 @@ public void initWalks() throws RemoteException{ job.getCompanion().setSources(job.getWalkManager().getSources()); } - - - public void beginIteration(GraphChiContext ctx) { if (ctx.getIteration() == 0) { ctx.getScheduler().removeAllTasks(); @@ -208,7 +109,7 @@ public void beginIteration(GraphChiContext ctx) { public void endIteration(GraphChiContext ctx) {} public void spinUntilFinish() { - finished = true; + finished.set(true); while (bucketQueue.size() > 0) { try { System.out.println("Waiting ..." + bucketQueue.size()); @@ -262,8 +163,6 @@ public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { } } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) { /* Count walks */ long initializedWalks = job.getWalkManager().getTotalWalks(); @@ -279,7 +178,7 @@ public void beginInterval(GraphChiContext ctx, VertexInterval interval) { public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - public void consume(int firstVertexInBucket, int[] walkBucket, int len) { + public void consume(int firstVertexInBucket, WalkArray walkBucket, int len) { try { pendingWalksToSubmit.addAndGet(len); bucketQueue.put(new BucketsToSend(firstVertexInBucket, walkBucket, len)); @@ -287,18 +186,4 @@ public void consume(int firstVertexInBucket, int[] walkBucket, int len) { e.printStackTrace(); } } - - private static class BucketsToSend { - int firstVertex; - int[] walks; - int length; - - BucketsToSend(int firstVertex, int[] walks, int length) { - this.firstVertex = firstVertex; - this.walks = walks; - this.length = length; - } - } - } - diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java new file mode 100644 index 00000000..26e40da2 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java @@ -0,0 +1,8 @@ +package edu.cmu.graphchi.walks; + +public interface DrunkardFactory { + public DrunkardDriver createDrunkardDriver(DrunkardJob job, + WalkUpdateFunction callback); + public WalkManager createWalkManager(int numVertices, int numSources); + public LocalWalkBuffer createLocalWalkBuffer(); +} diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java index e62ce8f8..13e62ced 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java @@ -11,16 +11,15 @@ public class DrunkardJob { private String name; private WalkManager walkManager; private RemoteDrunkardCompanion companion; - private int numVertices; + private DrunkardFactory factory; + protected int numVertices; - public DrunkardJob(String name, RemoteDrunkardCompanion companion, int numVertices) { + public DrunkardJob(String name, RemoteDrunkardCompanion companion, int numVertices, + DrunkardFactory factory) { this.name = name; this.numVertices = numVertices; this.companion = companion; - } - - protected WalkManager createWalkManager(int numSources) { - return new WalkManager(numVertices, numSources); + this.factory = factory; } /** @@ -30,13 +29,13 @@ protected WalkManager createWalkManager(int numSources) { * @param walksPerSource how many walks to start from each source */ public void configureSourceRangeInternalIds(int firstSourceId, int numSources, int walksPerSource) { - if (this.walkManager != null) { + if (walkManager != null) { throw new IllegalStateException("You can configure walks only once!"); } - this.walkManager = createWalkManager(numSources); + walkManager = factory.createWalkManager(numVertices, numSources); for(int i=firstSourceId; i < firstSourceId + numSources; i++) { - this.walkManager.addWalkBatch(i, walksPerSource); + walkManager.addWalkBatch(i, walksPerSource); } } @@ -46,13 +45,13 @@ public void configureSourceRangeInternalIds(int firstSourceId, int numSources, i * @param walksPerSource */ public void configureWalkSources(List walkSources, int walksPerSource) { - if (this.walkManager != null) { + if (walkManager != null) { throw new IllegalStateException("You can configure walks only once!"); } - this.walkManager = createWalkManager(walkSources.size()); + walkManager = factory.createWalkManager(numVertices, walkSources.size()); Collections.sort(walkSources); for(int src : walkSources) { - this.walkManager.addWalkBatch(src, walksPerSource); + walkManager.addWalkBatch(src, walksPerSource); } } diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java index b8f933f2..dbe80217 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java @@ -20,15 +20,27 @@ */ public class DrunkardMobEngine { - protected GraphChiEngine engine; - protected List drivers; + private GraphChiEngine engine; + private List drivers; - protected static Logger logger = ChiLogger.getLogger("drunkardmob-engine"); + private static Logger logger = ChiLogger.getLogger("drunkardmob-engine"); + private DrunkardFactory factory; - - public DrunkardMobEngine(String baseFilename, int nShards) throws IOException { + /** + * Create the engine + * @param factory we allow walks to be represented either as ints or as longs (if more + * information needs to be stored, e.g. to retrieve path information from the walks). In order + * to avoid autoboxing, we do a little bit of fancy footwork here. The caller must pass in an + * IntDrunkardFactory or a LongDrunkardFactory, and then when processing WalkArrays and + * DrunkardContexts, they must be cast to IntWalkArrays or LongWalkArrays (and Contexts, and + * whatever else) in order to get the actual values out. This way we can keep the primitive + * typing while still sharing as much code as possible between the int and the long processing. + */ + public DrunkardMobEngine(String baseFilename, int nShards, + DrunkardFactory factory) throws IOException { createGraphChiEngine(baseFilename, nShards); this.drivers = new ArrayList(); + this.factory = factory; // Disable all edge directions by default engine.setDisableInedges(true); @@ -37,7 +49,7 @@ public DrunkardMobEngine(String baseFilename, int nShards) throws IOException { engine.setModifiesOutedges(false); } - protected void createGraphChiEngine(String baseFilename, int nShards) throws IOException { + private void createGraphChiEngine(String baseFilename, int nShards) throws IOException { this.engine = new GraphChiEngine(baseFilename, nShards); this.engine.setOnlyAdjacency(true); this.engine.setVertexDataConverter(null); @@ -88,7 +100,7 @@ public void setVertexDataConverter(BytesToValueConverter vertexD */ public DrunkardJob addJob(String jobName, EdgeDirection edgeDirection, WalkUpdateFunction callback, - RemoteDrunkardCompanion companion) throws IOException { + RemoteDrunkardCompanion companion) { /* Configure engine parameters */ switch(edgeDirection) { @@ -107,8 +119,8 @@ public DrunkardJob addJob(String jobName, EdgeDirection edgeDirection, /** * Create job object and the driver-object. */ - DrunkardJob job = new DrunkardJob(jobName, companion, engine.numVertices()); - drivers.add(new DrunkardDriver(job, callback)); + DrunkardJob job = new DrunkardJob(jobName, companion, engine.numVertices(), factory); + drivers.add(factory.createDrunkardDriver(job, callback)); return job; } @@ -169,7 +181,7 @@ public void update(ChiVertex vertex, GraphChiConte if (context.getThreadLocal() == null) { ArrayList multiplexedLocalBuffers = new ArrayList(drivers.size()); for(DrunkardDriver driver: drivers) { - LocalWalkBuffer buf = new LocalWalkBuffer(); + LocalWalkBuffer buf = factory.createLocalWalkBuffer(); driver.addLocalBuffer(buf); multiplexedLocalBuffers.add(buf); } diff --git a/src/main/java/edu/cmu/graphchi/walks/DumperThread.java b/src/main/java/edu/cmu/graphchi/walks/DumperThread.java new file mode 100644 index 00000000..ade4b471 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/DumperThread.java @@ -0,0 +1,43 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public abstract class DumperThread implements Runnable { + + private final LinkedBlockingQueue bucketQueue; + private final AtomicLong pendingWalksToSubmit; + private final AtomicBoolean finished; + + public DumperThread(LinkedBlockingQueue bucketQueue, + AtomicLong pendingWalksToSubmit, + AtomicBoolean finished) { + this.bucketQueue = bucketQueue; + this.pendingWalksToSubmit = pendingWalksToSubmit; + this.finished = finished; + } + + public void run() { + while(!finished.get() || bucketQueue.size() > 0) { + BucketsToSend bucket = null; + try { + bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + } + if (bucket != null) { + pendingWalksToSubmit.addAndGet(-bucket.length); + for(int i=0; i + extends DrunkardDriver implements GrabbedBucketConsumer { + + IntDrunkardDriver(final DrunkardJob job, + WalkUpdateFunction callback) { + super(job, callback); + } + + @Override + protected IntDumperThread createDumperThread() { + return new IntDumperThread(bucketQueue, pendingWalksToSubmit, finished, job); + } + + @Override + protected DrunkardContext createDrunkardContext(int vertexId, final GraphChiContext context, + final LocalWalkBuffer localBuf_) { + final IntWalkManager manager = (IntWalkManager) job.getWalkManager(); + final boolean isSource = manager.isSource(vertexId); + final int mySourceIndex = (isSource ? manager.getVertexSourceIdx(vertexId) : -1); + final IntLocalWalkBuffer localBuf = (IntLocalWalkBuffer) localBuf_; + return new IntDrunkardContext() { + @Override + public boolean isSource() { + return isSource; + } + + @Override + public int sourceIndex() { + return mySourceIndex; + } + + @Override + public int getIteration() { + return context.getIteration(); + } + + @Override + public void forwardWalkTo(int walk, int destinationVertex, boolean trackBit) { + localBuf.add(walk, destinationVertex, trackBit); + } + + @Override + public void resetWalk(int walk, boolean trackBit) { + forwardWalkTo(walk, manager.getSourceVertex(walk), trackBit); + } + + @Override + public boolean getTrackBit(int walk) { + return manager.trackBit(walk); + } + + @Override + public boolean isWalkStartedFromVertex(int walk) { + return mySourceIndex == manager.sourceIdx(walk); + } + + @Override + public VertexIdTranslate getVertexIdTranslate() { + return getVertexIdTranslate(); + } + }; + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java new file mode 100644 index 00000000..8621557e --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java @@ -0,0 +1,16 @@ +package edu.cmu.graphchi.walks; + +public class IntDrunkardFactory + implements DrunkardFactory { + public DrunkardDriver createDrunkardDriver(DrunkardJob job, + WalkUpdateFunction callback) { + return new IntDrunkardDriver(job, callback); + } + public WalkManager createWalkManager(int numVertices, int numSources) { + return new IntWalkManager(numVertices, numSources); + } + public LocalWalkBuffer createLocalWalkBuffer() { + return new IntLocalWalkBuffer(); + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java b/src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java new file mode 100644 index 00000000..7065194a --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java @@ -0,0 +1,63 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public class IntDumperThread extends DumperThread { + private final DrunkardJob job; + private int[] walks = new int[256 * 1024]; + private int[] vertices = new int[256 * 1024]; + private int idx = 0; + + public IntDumperThread(LinkedBlockingQueue bucketQueue, + AtomicLong pendingWalksToSubmit, + AtomicBoolean finished, + DrunkardJob job) { + super(bucketQueue, pendingWalksToSubmit, finished); + this.job = job; + } + + @Override + protected void processWalks(BucketsToSend bucket, int i) { + IntWalkManager manager = (IntWalkManager) job.getWalkManager(); + IntWalkArray bucketWalks = (IntWalkArray) bucket.walks; + int w = bucketWalks.getArray()[i]; + int v = manager.off(w) + bucket.firstVertex; + + + // Skip walks with the track-bit (hop-bit) not set + boolean trackBit = manager.trackBit(w); + + if (!trackBit) { + return; + } + + walks[idx] = w; + vertices[idx] = v; + idx++; + + if (idx >= walks.length) { + try { + job.getCompanion().processWalks(new IntWalkArray(walks), vertices); + } catch (Exception err) { + err.printStackTrace(); + } + idx = 0; + } + } + + @Override + protected void sendRest() { + // Send rest + try { + int[] tmpWalks = new int[idx]; + int[] tmpVertices = new int[idx]; + System.arraycopy(walks, 0, tmpWalks, 0, idx); + System.arraycopy(vertices, 0, tmpVertices, 0, idx); + job.getCompanion().processWalks(new IntWalkArray(tmpWalks), tmpVertices); + } catch (Exception err) { + err.printStackTrace(); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java b/src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java new file mode 100644 index 00000000..d29b4b7a --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java @@ -0,0 +1,36 @@ +package edu.cmu.graphchi.walks; + +class IntLocalWalkBuffer extends LocalWalkBuffer { + int[] walks; + + IntLocalWalkBuffer() { + super(); + walks = new int[DEFAULT_SIZE]; + } + + public void add(int walk, int destination, boolean trackBit) { + if (idx == walks.length) { + int[] tmp = walks; + walks = new int[tmp.length * 2]; + System.arraycopy(tmp, 0, walks, 0, tmp.length); + + expandArrays(); + } + walkBufferDests[idx] = destination; + walks[idx] = walk; + trackBits[idx] = trackBit; + idx++; + } + + @Override + public void purge(WalkManager walkManager) { + IntWalkManager manager = (IntWalkManager) walkManager; + for(int i=0; i> 8) & 0xffffff; + } + + public boolean trackBit(int walk) { + return ((walk & 1) != 0); + } + + public int off(int walk) { + return (walk >> 1) & 0x7f; + } + + /** + * Resets the bucket offset to reflect the new destination vertex, and also resets the track + * bit, according to the parameters. Note that those are the _only_ things re-encoded by this + * method, as those are the only things this method has access to; if other parts of the walk + * need to be changed, that must be taken care of in the WalkUpdateFunction _before_ forwarding + * the walk. + */ + public int reencodeWalk(int walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + return encode(sourceIdx(walk), trackBit, toVertex % bucketSize); + } + + /** + * @param sourceId + * @param toVertex + * @param trackBit true if odd, false if even hop + */ + public void moveWalk(int walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + synchronized (bucketLocks[bucket]) { + moveWalkUnsafe(walk, toVertex, trackBit); + } + } + + public void moveWalkUnsafe(int walk, int toVertex, boolean trackBit) { + // Re-encode the walk to reflect the movement + walk = reencodeWalk(walk, toVertex, trackBit); + + // Move the walk to the new bucket for processing + int bucket = toVertex / bucketSize; + int idx = walkIndices[bucket]; + if (idx == 0) { + walks[bucket] = new int[initialSize]; + } else { + if (idx == walks[bucket].length) { + int[] newBucket = new int[walks[bucket].length * 3 / 2]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } + walks[bucket][idx] = walk; + walkIndices[bucket]++; + } + + @Override + protected void expandCapacity(int bucket, int additional) { + if (walks[bucket] != null) { + int desiredLength = walks[bucket].length + additional; + if (walks[bucket].length < desiredLength) { + int[] newBucket = new int[desiredLength]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } else { + walks[bucket] = new int[additional]; + } + } + + @Override + public void initializeWalks() { + walks = new int[1 + numVertices / bucketSize][]; + bucketLocks = new Object[walks.length]; + for(int i=0; i 0 && i >= -offt && i + offt < snapshots.length) + snapshots[i + offt] = new int[snapshotSizes[i]]; + } + + for(int i=0; i < len; i++) { + int w = bucketToConsume[i]; + int vertex = bucketFirstVertex + off(w); + + if (vertex >= fromVertex && vertex <= toVertexInclusive) { + int snapshotOff = vertex - fromVertex; + int localOff = vertex - bucketFirstVertex; + snapshots[snapshotOff][snapshotIdxs[localOff]] = w; + snapshotIdxs[localOff]++; + } else { + // add back + moveWalk(w, vertex, trackBit(w)); + } + } + } + snapshotInitBits[localBucketIdx] = true; + } + } + if (bucketConsumer != null && bucketToConsume != null && len > 0) { + bucketConsumer.consume(bucketIdx * bucketSize, new IntWalkArray(bucketToConsume), len); + if (len > 1000000) { + log((bucketIdx * bucketSize) + " - " + ((bucketIdx+1)) * bucketSize + ", " + len); + } + } + _timer.stop(); + int[] array = snapshots[vertexId - fromVertex]; + if (array == null) { + return null; + } else { + return new IntWalkArray(snapshots[vertexId - fromVertex]); + } + } + } + + @Override + public int getFirstVertex() { + return fromVertex; + } + + @Override + public int getLastVertex() { + return toVertexInclusive; + } + }; + } + + /** Dump to file all walks with more than 0 hop */ + @Override + public void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException { + final TimerContext _timer = dumpTimer.time(); + synchronized (filename.intern()) { + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); + for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { + int[] ws = ((IntWalkArray)snapshot.getWalksAtVertex(i, false)).getArray(); + if (ws != null) { + for(int j=0; j < ws.length; j++) { + int w = ws[j]; + int source = sources[sourceIdx(w)]; + dos.writeInt(source); + dos.writeInt(i); + } + } + } + dos.flush(); + dos.close(); + } + _timer.stop(); + } + + public int getSourceVertex(int walk) { + return sources[sourceIdx(walk)]; + } + + @Override + public void populateSchedulerForInterval(Scheduler scheduler, VertexInterval interval) { + final TimerContext _timer = schedulePopulate.time(); + int fromBucket = interval.getFirstVertex() / bucketSize; + int toBucket = interval.getLastVertex() / bucketSize; + + for(int bucketIdx=fromBucket; bucketIdx <= toBucket; bucketIdx++) { + int vertexBase = bucketIdx * bucketSize; + int[] bucket = walks[bucketIdx]; + + if (bucket != null) { + BitSet alreadySeen = new BitSet(bucketSize); + int counter = 0; + for(int j=0; j dests = new ArrayList(); - ArrayList hops = new ArrayList(); - - - + boolean[] trackBits; int idx = 0; - LocalWalkBuffer() { - walkBufferDests = new int[65536]; - walkSourcesAndHops = new int[65536]; - } + int DEFAULT_SIZE = 65536; + Random random; - public void add(int src, int dst, boolean hop) { - if (idx == walkSourcesAndHops.length) { - dests.add(walkBufferDests); - hops.add(walkSourcesAndHops); - walkBufferDests = new int[1000000]; - walkSourcesAndHops = new int[1000000]; - idx = 0; - } - walkBufferDests[idx] = dst; - walkSourcesAndHops[idx] = (hop ? -1 : 1) * (1 + src); // Note +1 so zero will be handled correctly - idx++; + LocalWalkBuffer() { + walkBufferDests = new int[DEFAULT_SIZE]; + trackBits = new boolean[DEFAULT_SIZE]; + random = new Random(); } - public void purge(WalkManager walkManager) { - dests.add(walkBufferDests); - hops.add(walkSourcesAndHops); + protected void expandArrays() { + int[] tmp = walkBufferDests; + walkBufferDests = new int[tmp.length * 2]; + System.arraycopy(tmp, 0, walkBufferDests, 0, tmp.length); - for(int k=0; k < hops.size(); k++) { - int[] d = dests.get(k); - int[] h = hops.get(k); - int len = (k == hops.size() - 1 ? idx : d.length); - for(int i=0; i < len; i++) { - int dst = d[i]; - int src = h[i]; - boolean hop = src < 0; - if (src < 0) src = -src; - src = src - 1; // Note, -1 - walkManager.updateWalkUnsafe(src, dst, hop); - } - } - hops = null; - dests = null; - walkSourcesAndHops = null; - walkBufferDests = null; + boolean[] tmpB = trackBits; + trackBits = new boolean[tmpB.length * 2]; + System.arraycopy(tmpB, 0, trackBits, 0, tmpB.length); } + + public abstract void purge(WalkManager walkManager); } diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java new file mode 100644 index 00000000..fa4c6c19 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java @@ -0,0 +1,29 @@ +package edu.cmu.graphchi.walks; + +/** + * @author Aapo Kyrola + */ +public interface LongDrunkardContext extends DrunkardContext { + + /** + * Moves walk to next vertex + * @param walk walk identified + * @param destinationVertex vertex id to move hte walk to + * @param trackBit set to true if this walk should be tracked, otherwise false + */ + void forwardWalkTo(long walk, int destinationVertex, boolean trackBit); + + void resetWalk(long walk, boolean trackBit); + + /** + * Reads the track-bit of a walk identifier. + * @param walk + * @return + */ + boolean getTrackBit(long walk); + + /** + * Returns true if walk was started from the vertex + */ + boolean isWalkStartedFromVertex(long walk); +} diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java new file mode 100644 index 00000000..4be6bc17 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java @@ -0,0 +1,77 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; + +/** + * Class to encapsulate the graphchi program running the show. + * Due to several optimizations, it is quite complicated! + */ +public class LongDrunkardDriver + extends DrunkardDriver implements GrabbedBucketConsumer { + + public LongDrunkardDriver(final DrunkardJob job, + WalkUpdateFunction callback) { + super(job, callback); + } + + @Override + protected LongDumperThread createDumperThread() { + return new LongDumperThread(bucketQueue, pendingWalksToSubmit, finished, job); + } + + @Override + protected DrunkardContext createDrunkardContext(int vertexId, final GraphChiContext context, + final LocalWalkBuffer localBuf_) { + final LongWalkManager manager = (LongWalkManager) job.getWalkManager(); + final boolean isSource = manager.isSource(vertexId); + final int mySourceIndex = (isSource ? manager.getVertexSourceIdx(vertexId) : -1); + final LongLocalWalkBuffer localBuf = (LongLocalWalkBuffer) localBuf_; + return new LongDrunkardContext() { + @Override + public boolean isSource() { + return isSource; + } + + @Override + public int sourceIndex() { + return mySourceIndex; + } + + @Override + public int getIteration() { + return context.getIteration(); + } + + @Override + public void forwardWalkTo(long walk, int destinationVertex, boolean trackBit) { + localBuf.add(walk, destinationVertex, trackBit); + } + + @Override + public void resetWalk(long walk, boolean trackBit) { + forwardWalkTo(walk, manager.getSourceVertex(walk), trackBit); + } + + @Override + public boolean getTrackBit(long walk) { + return manager.trackBit(walk); + } + + @Override + public boolean isWalkStartedFromVertex(long walk) { + return mySourceIndex == manager.sourceIdx(walk); + } + + @Override + public VertexIdTranslate getVertexIdTranslate() { + return getVertexIdTranslate(); + } + }; + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java new file mode 100644 index 00000000..f1136832 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java @@ -0,0 +1,16 @@ +package edu.cmu.graphchi.walks; + +public class LongDrunkardFactory + implements DrunkardFactory { + public DrunkardDriver createDrunkardDriver(DrunkardJob job, + WalkUpdateFunction callback) { + return new LongDrunkardDriver(job, callback); + } + public WalkManager createWalkManager(int numVertices, int numSources) { + return new LongWalkManager(numVertices, numSources); + } + public LocalWalkBuffer createLocalWalkBuffer() { + return new LongLocalWalkBuffer(); + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java b/src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java new file mode 100644 index 00000000..ec37e7cc --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java @@ -0,0 +1,64 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public class LongDumperThread extends DumperThread { + protected final DrunkardJob job; + protected long[] walks = new long[256 * 1024]; + protected int[] vertices = new int[256 * 1024]; + protected int idx = 0; + + public LongDumperThread(LinkedBlockingQueue bucketQueue, + AtomicLong pendingWalksToSubmit, + AtomicBoolean finished, + DrunkardJob job) { + super(bucketQueue, pendingWalksToSubmit, finished); + this.job = job; + } + + @Override + protected void processWalks(BucketsToSend bucket, int i) { + LongWalkArray bucketWalks = (LongWalkArray) bucket.walks; + long w = bucketWalks.getArray()[i]; + LongWalkManager manager = (LongWalkManager) job.getWalkManager(); + int v = manager.off(w) + bucket.firstVertex; + + + // Skip walks with the track-bit (hop-bit) not set + boolean trackBit = manager.trackBit(w); + + if (!trackBit) { + return; + } + + walks[idx] = w; + vertices[idx] = v; + idx++; + + if (idx >= walks.length) { + try { + job.getCompanion().processWalks(new LongWalkArray(walks), vertices); + } catch (Exception err) { + err.printStackTrace(); + } + idx = 0; + } + } + + @Override + protected void sendRest() { + // Send rest + try { + long[] tmpWalks = new long[idx]; + int[] tmpVertices = new int[idx]; + System.arraycopy(walks, 0, tmpWalks, 0, idx); + System.arraycopy(vertices, 0, tmpVertices, 0, idx); + job.getCompanion().processWalks(new LongWalkArray(tmpWalks), tmpVertices); + } catch (Exception err) { + err.printStackTrace(); + } + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java b/src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java new file mode 100644 index 00000000..4605fc4e --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java @@ -0,0 +1,36 @@ +package edu.cmu.graphchi.walks; + +class LongLocalWalkBuffer extends LocalWalkBuffer { + long[] walks; + + LongLocalWalkBuffer() { + super(); + walks = new long[DEFAULT_SIZE]; + } + + public void add(long walk, int destination, boolean trackBit) { + if (idx == walks.length) { + long[] tmp = walks; + walks = new long[tmp.length * 2]; + System.arraycopy(tmp, 0, walks, 0, tmp.length); + + expandArrays(); + } + walkBufferDests[idx] = destination; + walks[idx] = walk; + trackBits[idx] = trackBit; + idx++; + } + + @Override + public void purge(WalkManager walkManager) { + LongWalkManager manager = (LongWalkManager) walkManager; + for(int i=0; i> 8) & 0xffffff; + } + + public boolean trackBit(long walk) { + return ((walk & 1) != 0); + } + + public int off(long walk) { + return (int) (walk >> 1) & 0x7f; + } + + /** + * Resets the bucket offset to reflect the new destination vertex, and also resets the track + * bit, according to the parameters. Note that those are the _only_ things re-encoded by this + * method, as those are the only things this method has access to; if other parts of the walk + * need to be changed, that must be taken care of in the WalkUpdateFunction _before_ forwarding + * the walk. + */ + protected long reencodeWalk(long walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + return encode(sourceIdx(walk), trackBit, toVertex % bucketSize); + } + + /** + * @param sourceId + * @param toVertex + * @param trackBit true if odd, false if even hop + */ + public void moveWalk(long walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + synchronized (bucketLocks[bucket]) { + moveWalkUnsafe(walk, toVertex, trackBit); + } + } + + public void moveWalkUnsafe(long walk, int toVertex, boolean trackBit) { + // Reincode the walk to reflect the movement + walk = reencodeWalk(walk, toVertex, trackBit); + + // Move the walk to the new bucket for processing + int bucket = toVertex / bucketSize; + int idx = walkIndices[bucket]; + if (idx == 0) { + walks[bucket] = new long[initialSize]; + } else { + if (idx == walks[bucket].length) { + long[] newBucket = new long[walks[bucket].length * 3 / 2]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } + walks[bucket][idx] = walk; + walkIndices[bucket]++; + } + + @Override + protected void expandCapacity(int bucket, int additional) { + if (walks[bucket] != null) { + int desiredLength = walks[bucket].length + additional; + if (walks[bucket].length < desiredLength) { + long[] newBucket = new long[desiredLength]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } else { + walks[bucket] = new long[additional]; + } + } + + @Override + public void initializeWalks() { + walks = new long[1 + numVertices / bucketSize][]; + bucketLocks = new Object[walks.length]; + for(int i=0; i 0 && i >= -offt && i + offt < snapshots.length) + snapshots[i + offt] = new long[snapshotSizes[i]]; + } + + for(int i=0; i < len; i++) { + long w = bucketToConsume[i]; + int vertex = bucketFirstVertex + off(w); + + if (vertex >= fromVertex && vertex <= toVertexInclusive) { + int snapshotOff = vertex - fromVertex; + int localOff = vertex - bucketFirstVertex; + snapshots[snapshotOff][snapshotIdxs[localOff]] = w; + snapshotIdxs[localOff]++; + } else { + // add back + moveWalk(w, vertex, trackBit(w)); + } + } + } + snapshotInitBits[localBucketIdx] = true; + } + } + if (bucketConsumer != null && bucketToConsume != null && len > 0) { + bucketConsumer.consume(bucketIdx * bucketSize, new LongWalkArray(bucketToConsume), len); + if (len > 1000000) { + log((bucketIdx * bucketSize) + " - " + ((bucketIdx+1)) * bucketSize + ", " + len); + } + } + _timer.stop(); + long[] array = snapshots[vertexId - fromVertex]; + if (array == null) { + return null; + } else { + return new LongWalkArray(snapshots[vertexId - fromVertex]); + } + } + } + + @Override + public int getFirstVertex() { + return fromVertex; + } + + @Override + public int getLastVertex() { + return toVertexInclusive; + } + }; + } + + /** Dump to file all walks with more than 0 hop */ + @Override + public void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException { + final TimerContext _timer = dumpTimer.time(); + synchronized (filename.intern()) { + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); + for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { + long[] ws = ((LongWalkArray)snapshot.getWalksAtVertex(i, false)).getArray(); + if (ws != null) { + for(int j=0; j < ws.length; j++) { + long w = ws[j]; + int source = sources[sourceIdx(w)]; + dos.writeLong(source); + dos.writeInt(i); + } + } + } + dos.flush(); + dos.close(); + } + _timer.stop(); + } + + public int getSourceVertex(long walk) { + return sources[sourceIdx(walk)]; + } + + @Override + public void populateSchedulerForInterval(Scheduler scheduler, VertexInterval interval) { + final TimerContext _timer = schedulePopulate.time(); + int fromBucket = interval.getFirstVertex() / bucketSize; + int toBucket = interval.getLastVertex() / bucketSize; + + for(int bucketIdx=fromBucket; bucketIdx <= toBucket; bucketIdx++) { + int vertexBase = bucketIdx * bucketSize; + long[] bucket = walks[bucketIdx]; + + if (bucket != null) { + BitSet alreadySeen = new BitSet(bucketSize); + int counter = 0; + for(int j=0; j MAX_SOURCES) throw new IllegalArgumentException("Max sources: " + numSources); sources = new int[numSources]; @@ -60,8 +59,12 @@ public WalkManager(int numVertices, int numSources) { } } + /** + * Sets MAX_SOURCES and bucketSize, which may be different for different subclasses. + */ + protected abstract void setSourceAndBucketBits(); - private void log(String s) { + protected void log(String s) { try { log.write(s + "\n"); log.flush(); @@ -113,142 +116,9 @@ public synchronized int addWalkBatch(int vertex, int numWalks) { return sourceSeqIdx - 1; } + protected abstract void expandCapacity(int bucket, int additional); - /** - * Encode a walk. Note, as sourceIdx is the highest order bits, the - * walks can be sorted by source simply by sorting the list. - * @param sourceId index of the rousce vertex - * @param hop true if odd, false if even - * @param off bucket offset - * @return - */ - static int encode(int sourceId, boolean hop, int off) { - assert(off < 128); - int hopbit = (hop ? 1 : 0); - return ((sourceId & 0xffffff) << 8) | ((off & 0x7f) << 1) | hopbit; - } - - static int encodeV(int sourceId, boolean hop, int vertexId) { - return encode(sourceId, hop, vertexId % bucketSize); - } - - - public static int sourceIdx(int walk) { - return ((walk & 0xffffff00) >> 8) & 0xffffff; - } - - public static boolean hop(int walk) { - return ((walk & 1) != 0); - } - - public static int off(int walk) { - return (walk >> 1) & 0x7f; - } - - - /** - * @param sourceId - * @param toVertex - * @param hop true if odd, false if even hop - */ - public void updateWalk(int sourceId, int toVertex, boolean hop) { - int bucket = toVertex / bucketSize; - synchronized (bucketLocks[bucket]) { - updateWalkUnsafe(sourceId, toVertex, hop); - } - } - - public void updateWalkUnsafe(int sourceId, int toVertex, boolean hop) { - int bucket = toVertex / bucketSize; - int w = encode(sourceId, hop, toVertex % bucketSize); - int idx = walkIndices[bucket]; - if (idx == 0) { - walks[bucket] = new int[initialSize]; - } else { - if (idx == walks[bucket].length) { - int[] newBucket = new int[walks[bucket].length * 3 / 2]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - } - walks[bucket][idx] = w; - walkIndices[bucket]++; - } - - - - - protected void expandCapacity(int bucket, int additional) { - if (walks[bucket] != null) { - int desiredLength = walks[bucket].length + additional; - if (walks[bucket].length < desiredLength) { - int[] newBucket = new int[desiredLength]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - } else { - walks[bucket] = new int[additional]; - } - } - - public void initializeWalks() { - walks = new int[1 + numVertices / bucketSize][]; - bucketLocks = new Object[walks.length]; - for(int i=0; i 0 && i >= -offt && i + offt < snapshots.length) - snapshots[i + offt] = new int[snapshotSizes[i]]; - } - - for(int i=0; i < len; i++) { - int w = bucketToConsume[i]; - int vertex = bucketFirstVertex + off(w); - - if (vertex >= fromVertex && vertex <= toVertexInclusive) { - int snapshotOff = vertex - fromVertex; - int localOff = vertex - bucketFirstVertex; - snapshots[snapshotOff][snapshotIdxs[localOff]] = w; - snapshotIdxs[localOff]++; - } else { - // add back - boolean hop = hop(w); - int src = sourceIdx(w); - updateWalk(src, vertex, hop); - } - } - } - snapshotInitBits[localBucketIdx] = true; - } - } - if (bucketConsumer != null && bucketToConsume != null && len > 0) { - bucketConsumer.consume(bucketIdx * bucketSize, bucketToConsume, len); - if (len > 1000000) { - log((bucketIdx * bucketSize) + " - " + ((bucketIdx+1)) * bucketSize + ", " + len); - } - } - _timer.stop(); - return snapshots[vertexId - fromVertex]; - } - } - - @Override - public int getFirstVertex() { - return fromVertex; - } - - @Override - public int getLastVertex() { - return toVertexInclusive; - } - - - }; - - } - - public static int getWalkLength(int[] w) { - if (w == null) return 0; - return w.length; - } + public abstract WalkSnapshot grabSnapshot(final int fromVertex, final int toVertexInclusive); /** Dump to file all walks with more than 0 hop */ - public void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException { - final TimerContext _timer = dumpTimer.time(); - synchronized (filename.intern()) { - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); - for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { - int[] ws = snapshot.getWalksAtVertex(i, false); - if (ws != null) { - for(int j=0; j < ws.length; j++) { - int w = ws[j]; - int source = sources[sourceIdx(w)]; - dos.writeInt(source); - dos.writeInt(i); - } - } - } - dos.flush(); - dos.close(); - } - _timer.stop(); - } - - public int getSourceVertex(int walk) { - return sources[sourceIdx(walk)]; - } + public abstract void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException; public void populateSchedulerWithSources(Scheduler scheduler) { for(int i=0; i sources = new ArrayList(32678); - private ArrayList sourceWalkCounts = new ArrayList(32678); - private int totalWalks = 0; - - private long[][] walks; - private int[] walkIndices; - - private int numVertices; - private final Timer grabTimer = Metrics.defaultRegistry().newTimer(WalkManagerForPaths.class, "grab-walks", TimeUnit.SECONDS, TimeUnit.MINUTES); - private final Timer dumpTimer = Metrics.defaultRegistry().newTimer(WalkManagerForPaths.class, "dump-walks", TimeUnit.SECONDS, TimeUnit.MINUTES); - private final Timer initTimer = Metrics.defaultRegistry().newTimer(WalkManagerForPaths.class, "init-walks", TimeUnit.SECONDS, TimeUnit.MINUTES); - - - public WalkManagerForPaths(int numVertices) { - this.numVertices = numVertices; - System.out.println("Initial size for walk bucket: " + initialSize); - } - - public synchronized void addWalkBatch(int vertex, int numWalks) { - sources.add(vertex); - sourceWalkCounts.add(numWalks); - totalWalks += numWalks; - - } - - - // Note: there are some extra bits to be used here - public long encode(int id, int hop, int off) { - return ((long)id) << 32 | (((long)hop << 16) & 0x000f0000l) | ((off << 20) & 0xfff00000l); - } - - - public int hop(long walk) { - return (int) ((walk & 0x000f0000) >> 16); - } - - public int off(long walk) { - return (int) ((walk & 0xfff00000l) >> 20); - } - - public int walkId(long walk) { - return (int) (walk >> 32); - } - - - public void updateWalk(int id, int toVertex, int hop) { - int bucket = toVertex / bucketSize; - assert(hop < 16); - - - synchronized (walks[bucket]) { - int idx = walkIndices[bucket]; - if (idx == walks[bucket].length) { - long[] newBucket = new long[walks[bucket].length * 3 / 2]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - walks[bucket][idx] = encode(id, hop, toVertex % bucketSize); - walkIndices[bucket]++; - } - } - - public void expandCapacity(int bucket, int additional) { - int desiredLength = walks[bucket].length + additional; - - if (walks[bucket].length < desiredLength) { - long[] newBucket = new long[desiredLength]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - } - - public void initializeWalks() { - final TimerContext _timer = initTimer.time(); - walks = new long[1 + numVertices / bucketSize][]; - walkIndices = new int[walks.length]; - for(int i = 0; i < walks.length; i++) { - walks[i] = new long[initialSize]; - walkIndices[i] = 0; - } - - /* Precalculate bucket sizes for performance */ - int[] tmpsizes = new int[walks.length]; - for(int j=0; j < sources.size(); j++) { - int source = sources.get(j); - tmpsizes[source / bucketSize] += sourceWalkCounts.get(j); - } - - for(int b=0; b < walks.length; b++) { - expandCapacity(b, tmpsizes[b]); - } - - int walkId = 0; - for(int i=0; i < sources.size(); i++) { - int source = sources.get(i); - int count = sourceWalkCounts.get(i); - for(int c=0; c tmpBuckets = new ArrayList(toBucket - fromBucket + 1); - int[] tmpBucketLengths = new int[toBucket - fromBucket + 1]; - for(int b=fromBucket; b <= toBucket; b++) { - tmpBuckets.add(walks[b]); - tmpBucketLengths[b - fromBucket] = walkIndices[b]; - walks[b] = new long[initialSize]; - walkIndices[b] = 0; - } - - /* Now create data structure for fast retrieval */ - final long[][] snapshots = new long[toVertexInclusive - fromVertex + 1][]; - final int[] snapshotIdxs = new int[snapshots.length]; - - for(int i=0; i < snapshots.length; i++) { - snapshots[i] = null; - snapshotIdxs[i] = 0; - } - /* Add walks to snapshot arrays -- TODO: parallelize */ - for(int b=0; b < tmpBuckets.size(); b++) { - int bucketFirstVertex = bucketSize * (fromBucket + b); - long[] arr = tmpBuckets.get(b); - int len = tmpBucketLengths[b]; - - final int[] snapshotSizes = new int[bucketSize]; - - /* Calculate vertex-walks sizes */ - for(int i=0; i < len; i++) { - long w = arr[i]; - snapshotSizes[off(w)]++; - } - - int offt = bucketFirstVertex - fromVertex; - - /* Precalculate the array sizes. offt is the - offset of the bucket's first vertex from the first - vertex of the snapshot - */ - - for(int i=0; i < snapshotSizes.length; i++) { - if (snapshotSizes[i] > 0 && i >= -offt && i + offt < snapshots.length) - snapshots[i + offt] = new long[snapshotSizes[i]]; - } - - for(int i=0; i < len; i++) { - long w = arr[i]; - int hop = hop(w); - int id = walkId(w); - int vertex = bucketFirstVertex + off(w); - - if (vertex >= fromVertex && vertex <= toVertexInclusive) { - int snapshotOff = vertex - fromVertex; - if (snapshots[snapshotOff] == null) - throw new IllegalStateException(); - - if (snapshotIdxs[snapshotOff] >= snapshots[snapshotOff].length) { - throw new RuntimeException("Not possible!"); - /* Duplicate array - int[] tmp = new int[snapshots[snapshotOff].length * 2]; - System.arraycopy(snapshots[snapshotOff], 0, tmp, 0, snapshots[snapshotOff].length); - snapshots[snapshotOff] = tmp; */ - } - snapshots[snapshotOff][snapshotIdxs[snapshotOff]] = w; - snapshotIdxs[snapshotOff]++; - } else { - // add back - updateWalk(id, vertex, hop); - } - } - tmpBuckets.set(b, null); // Save memory - } - - _timer.stop(); - - /* Create the snapshot object */ - return new WalkSnapshotForPaths() { - @Override - public long[] getWalksAtVertex(int vertexId) { - return snapshots[vertexId - fromVertex]; - } - - @Override - public int getFirstVertex() { - return fromVertex; - } - - @Override - public int getLastVertex() { - return toVertexInclusive; - } - }; - - } - - - - /** Dump to file all walks with more than 0 hop */ - public void dumpToFile(WalkSnapshotForPaths snapshot, String filename) throws IOException { - final TimerContext _timer = dumpTimer.time(); - synchronized (filename.intern()) { - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); - for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { - long[] ws = snapshot.getWalksAtVertex(i); - if (ws != null) { - for(int j=0; j < ws.length; j++) { - long w = ws[j]; - /* walk-id: int, hop: short, vertex: int */ - dos.writeInt(walkId(w)); - dos.writeShort(hop(w)); - dos.writeInt(i); - } - } - } - dos.flush(); - dos.close(); - } - _timer.stop(); - } - - - public void populateSchedulerWithSources(Scheduler scheduler) { - for(int i=0; i < sources.size(); i++) { - scheduler.addTask(sources.get(i)); - } - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java b/src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java deleted file mode 100644 index 8e297e94..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java +++ /dev/null @@ -1,102 +0,0 @@ -package edu.cmu.graphchi.walks; - -import java.io.*; -import java.util.Arrays; - -/** - * Class for computing paths from the walk-files produced - * by DrunkardMobForPaths - * @author Aapo Kyrola, akyrola@cs.cmu.edu, akyrola@twitter.com - */ -public class WalkPathAnalyzer { - - private File directory; - - public WalkPathAnalyzer(File directory) { - this.directory = directory; - if (!this.directory.isDirectory()) throw new IllegalArgumentException("You must provide a directory"); - } - - /** - * Currently very dummy implementation. TODO: Make memory efficient and smarter in general. - * Just for demonstration purposes. - */ - public void analyze(int minWalkId, int maxWalkId, int maxHops) throws IOException { - int numberOfWalks = maxWalkId - minWalkId + 1; - Walk[] paths = new Walk[numberOfWalks]; - for(int i=0; i < paths.length; i++) { - paths[i] = new Walk(maxHops); - } - - String[] walkFiles = directory.list(new FilenameFilter() { - @Override - public boolean accept(File file, String s) { - return s.startsWith("walks_"); - } - }); - - for(String walkFile : walkFiles) { - System.out.println("Analyze: " + walkFile); - long walksInFile = new File(directory, walkFile).length() / 10; - DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream( - new File(directory, walkFile)), 1024 * 1024 * 50)); - try { - long i = 0; - while(i < walksInFile) { - if (i % 1000000 == 0) System.out.println(i + " / " + walksInFile); - i++; - - int walkId = dis.readInt(); - - short hop = dis.readShort(); - int atVertex = dis.readInt(); - if (walkId >= minWalkId && walkId <= maxWalkId) { - paths[walkId - minWalkId].addWalk(hop, atVertex); - } - } - } catch (EOFException ioe) { - continue; - } - dis.close(); - } - - for(Walk w : paths) { - System.out.println(w.getPathDescription()); - } - } - - private static class Walk { - - private long[] path; - int idx; - - private Walk(int maxHops) { - idx = 0; - path = new long[maxHops]; - } - - void addWalk(short hop, int atVertex) { - long w = atVertex | ((long)hop << 32); - if (idx < path.length) path[idx++] = w; - } - - String getPathDescription() { - /* Super-slow */ - Arrays.sort(path); // Hop is the highest order bit so sorts by hop - StringBuffer sb = new StringBuffer(); - for(long w : path) { - sb.append((w & 0xffffffffl) + "-"); - } - return sb.toString(); - } - } - - public static void main(String[] args) throws Exception { - WalkPathAnalyzer analyzer = new WalkPathAnalyzer(new File(".")); - int minWalkId = Integer.parseInt(args[0]); - int maxWalkId = Integer.parseInt(args[1]); - int maxHops = Integer.parseInt(args[2]); - - analyzer.analyze(minWalkId, maxWalkId, maxHops); - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java b/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java index a458e436..e5d8ed54 100644 --- a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java +++ b/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java @@ -5,9 +5,6 @@ */ public interface WalkSnapshot { - /** Returns walk at vertex, or null if none **/ - int[] getWalksAtVertex(int vertexId, boolean processed); - int getFirstVertex(); int getLastVertex(); @@ -17,4 +14,6 @@ public interface WalkSnapshot { public long numWalks(); public void restoreUngrabbed(); + + WalkArray getWalksAtVertex(int vertexId, boolean processed); } diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java b/src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java deleted file mode 100644 index 9dc0e01d..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java +++ /dev/null @@ -1,14 +0,0 @@ -package edu.cmu.graphchi.walks; - -/** - * @author Aapo Kyrola - */ -public interface WalkSnapshotForPaths { - - /** Returns walk at vertex, or null if none **/ - long[] getWalksAtVertex(int vertexId); - - int getFirstVertex(); - - int getLastVertex(); -} diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java b/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java index a7e4bcdc..1f906fae 100644 --- a/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java +++ b/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java @@ -10,6 +10,15 @@ */ public interface WalkUpdateFunction { + /** + * Called for each source vertex. Return an int-array of vertices to which walk visits should not + * be tracked. For example, if you are not interested about the walks to the immediate neighbors, + * you should returns an array of the vertex ids of the neighbors. + * @param vertex + * @return + */ + int[] getNotTrackedVertices(ChiVertex vertex); + /** * Callback * @param walks @@ -17,17 +26,8 @@ public interface WalkUpdateFunction { * @param drunkardContext * @param randomGenerator random-generator */ - void processWalksAtVertex(int[] walks, + void processWalksAtVertex(WalkArray walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator); - - /** - * Called for each source vertex. Return an int-array of vertices to which walk visits should not - * be tracked. For example, if you are not interested about the walks to the immediate neighbors, - * you should returns an array of the vertex ids of the neighbors. - * @param vertex - * @return - */ - int[] getNotTrackedVertices(ChiVertex vertex); } diff --git a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java b/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java deleted file mode 100644 index d7638e9d..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java +++ /dev/null @@ -1,179 +0,0 @@ -package edu.cmu.graphchi.walks.deprecated; - -import edu.cmu.graphchi.ChiFilenames; -import edu.cmu.graphchi.ChiVertex; -import edu.cmu.graphchi.GraphChiContext; -import edu.cmu.graphchi.GraphChiProgram; -import edu.cmu.graphchi.datablocks.IntConverter; -import edu.cmu.graphchi.engine.GraphChiEngine; -import edu.cmu.graphchi.engine.VertexInterval; -import edu.cmu.graphchi.util.IdInt; -import edu.cmu.graphchi.util.Toplist; -import edu.cmu.graphchi.walks.WalkManager; -import edu.cmu.graphchi.walks.WalkSnapshot; - -import java.io.File; -import java.io.IOException; -import java.util.TreeSet; - -/** - * Launch millions (?) of random walks and record the - * hops for each source. This version can be used only for computing - * distribution of the source-destinations. For recording the actual - * paths, use DrunkardMobForPaths - * Done partially during authors internship at Twitter, Fall 2012. - * @author Aapo Kyrola, akyrola@cs.cmu.edu - */ -public class DrunkardMob implements GraphChiProgram { - - private WalkManager walkManager; - private WalkSnapshot curWalkSnapshot; - - public DrunkardMob() { - } - - private static final double RESETPROB = 0.15; - - public void update(ChiVertex vertex, GraphChiContext context) { - int[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); - if (context.getIteration() == 0) vertex.setValue(0); - if (walksAtMe == null) return; - - int walkLength = walksAtMe.length; - - int numWalks = 0; - for(int i=0; i < walkLength; i++) { - int walk = walksAtMe[i]; - boolean hop = walkManager.hop(walk); - // Choose a random destination and move the walk forward - int dst; - if (vertex.getId() != walkManager.getSourceVertex(walk)) { - numWalks++; - } - if (vertex.numOutEdges() > 0 && (context.getIteration() == 0 || Math.random() > RESETPROB)) { - dst = vertex.getRandomOutNeighbor(); - } else { - // Dead end! - dst = walkManager.getSourceVertex(walk); - } - walkManager.updateWalk(walkManager.sourceIdx(walk), dst, !hop); - context.getScheduler().addTask(dst); - - } - vertex.setValue(vertex.getValue() + numWalks); - } - - - public void beginIteration(GraphChiContext ctx) { - if (ctx.getIteration() == 0) { - ctx.getScheduler().removeAllTasks(); - walkManager.populateSchedulerWithSources(ctx.getScheduler()); - } - } - - public void endIteration(GraphChiContext ctx) {} - - /** - * At the start of interval - grab the snapshot of walks - */ - public void beginSubInterval(GraphChiContext ctx, final VertexInterval interval) { - long t = System.currentTimeMillis(); - curWalkSnapshot = walkManager.grabSnapshot(interval.getFirstVertex(), interval.getLastVertex()); - System.out.println("Grab snapshot took " + (System.currentTimeMillis() - t) + " ms."); - - String walkDir = System.getProperty("walk.dir", "."); - final String filename = walkDir + "/walks_.dat"; - if (ctx.getIteration() == 0) { // NOTE, temporary hack to save disk space but have the same I/O cost for testing - new File(filename).delete(); - } - - // Launch a thread to dump - final WalkSnapshot snapshot = curWalkSnapshot; - synchronized (filename.intern()) { - try { - walkManager.dumpToFile(snapshot, filename); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { - curWalkSnapshot.restoreUngrabbed(); - curWalkSnapshot = null; // Release memory - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} - - public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - - public static void main(String[] args) throws Exception { - - String baseFilename = args[0]; - - if (args.length > 1) { - int nShards = Integer.parseInt(args[1]); - int nSources = Integer.parseInt(args[2]); - int walksPerSource = Integer.parseInt(args[3]); - int maxHops = Integer.parseInt(args[4]); - - System.out.println("Walks will start from " + nSources + " sources."); - System.out.println("Going to start " + walksPerSource + " walks per source."); - System.out.println("Max hops: " + maxHops); - - /* Delete vertex data */ - File vertexDataFile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, new IntConverter(), false)); - if (vertexDataFile.exists()) { - vertexDataFile.delete(); - } - - /* Initialize GraphChi engine */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - engine.setEdataConverter(null); - engine.setVertexDataConverter(new IntConverter()); - engine.setModifiesInedges(false); - engine.setModifiesOutedges(false); - engine.setEnableScheduler(true); - engine.setOnlyAdjacency(true); - engine.setDisableInedges(true); - engine.setMemoryBudgetMb(1200); - engine.setUseStaticWindowSize(false); // Disable dynamic window size detection - engine.setEnableDeterministicExecution(false); - engine.setAutoLoadNext(false); - engine.setMaxWindow(2000000); // Handle maximum 2M vertices a time. - - long t1 = System.currentTimeMillis(); - - /* Initialize application object */ - DrunkardMob mob = new DrunkardMob(); - - /* Initialize Random walks */ - int nVertices = engine.numVertices(); - mob.walkManager = new WalkManager(nVertices, nSources); - - for(int i=0; i < nSources; i++) { - int source = 234224 + i; - mob.walkManager.addWalkBatch(source, walksPerSource); - } - mob.walkManager.initializeWalks(); - - System.out.println("Configured " + mob.walkManager.getTotalWalks() + " walks in " + - (System.currentTimeMillis() - t1) + " ms"); - - - /* Run */ - engine.run(mob, maxHops + 1); - - System.out.println("Ready. Going to output..."); - - TreeSet top20 = Toplist.topListInt(baseFilename, engine.numVertices(), 20); - int i = 0; - for(IdInt vertexRank : top20) { - System.out.println(++i + ": " + - engine.getVertexIdTranslate().backward(vertexRank.getVertexId()) + " = " + vertexRank.getValue()); - } - System.out.println("Finished."); - } - - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java b/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java deleted file mode 100644 index 6d4a8275..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java +++ /dev/null @@ -1,187 +0,0 @@ -package edu.cmu.graphchi.walks.deprecated; - -import edu.cmu.graphchi.ChiFilenames; -import edu.cmu.graphchi.ChiVertex; -import edu.cmu.graphchi.GraphChiContext; -import edu.cmu.graphchi.GraphChiProgram; -import edu.cmu.graphchi.vertexdata.VertexAggregator; -import edu.cmu.graphchi.datablocks.IntConverter; -import edu.cmu.graphchi.engine.GraphChiEngine; -import edu.cmu.graphchi.engine.VertexInterval; -import edu.cmu.graphchi.util.IdInt; -import edu.cmu.graphchi.util.Toplist; -import edu.cmu.graphchi.walks.WalkManagerForPaths; -import edu.cmu.graphchi.walks.WalkPathAnalyzer; -import edu.cmu.graphchi.walks.WalkSnapshotForPaths; - -import java.io.File; -import java.util.TreeSet; - -/** - * Launch millions (?) of random walks and record each hop - * for the walks. Each walk has an unique id. This version thus - * uses twice amount of memory as the DrunkardMob which only - * can be used for computing distributions of source-destinations. - * @author Aapo Kyrola, akyrola@cs.cmu.edu - */ -public class DrunkardMobForPaths implements GraphChiProgram { - - private WalkManagerForPaths walkManager; - private WalkSnapshotForPaths curWalkSnapshot; - private int maxHops; - private String basefileName; - - public DrunkardMobForPaths(int maxHops, String basefileName) { - this.maxHops = maxHops; - this.basefileName = basefileName; - } - - public void update(ChiVertex vertex, GraphChiContext context) { - long[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId()); - if (context.getIteration() == 0) vertex.setValue(0); - if (walksAtMe == null) return; - - int numWalks = 0; - for(int i=0; i < walksAtMe.length; i++) { - long walk = walksAtMe[i]; - int hop = walkManager.hop(walk); - if (hop > 0) numWalks++; - if (hop < maxHops) { - // Choose a random destination and move the walk forward - int dst; - if (vertex.numEdges() > 0) { - dst = vertex.getRandomNeighbor(); - } else { - // Dead end! - continue; // Ignore this walk - } - walkManager.updateWalk(walkManager.walkId(walk), dst, hop + 1); - context.getScheduler().addTask(dst); - } - } - vertex.setValue(vertex.getValue() + numWalks); - } - - - public void beginIteration(GraphChiContext ctx) { - if (ctx.getIteration() == 0) { - ctx.getScheduler().removeAllTasks(); - walkManager.populateSchedulerWithSources(ctx.getScheduler()); - } - } - - public void endIteration(GraphChiContext ctx) { - - } - - /** - * At the start of interval - grab the snapshot of walks - */ - public void beginSubInterval(GraphChiContext ctx, final VertexInterval interval) { - long t = System.currentTimeMillis(); - curWalkSnapshot = walkManager.grabSnapshot(interval.getFirstVertex(), interval.getLastVertex()); - System.out.println("Grab snapshot took " + (System.currentTimeMillis() - t) + " ms."); - - String walkDir = System.getProperty("walk.dir", "."); - final String filename = walkDir + "/walks_" + interval.getFirstVertex() + "-" + interval.getLastVertex() + ".dat"; - if (ctx.getIteration() == 0) { // NOTE, temporary hack to save disk space but have the same I/O cost for testing - new File(filename).delete(); - } - // Launch a thread to dump - Thread dumperThread = new Thread(new Runnable() { - public void run() { - try { - walkManager.dumpToFile(curWalkSnapshot, filename); - } catch (Exception err) { - err.printStackTrace(); - } - } - }); - dumperThread.start(); - } - - public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { - curWalkSnapshot = null; // Release memory - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} - - public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - - public static void main(String[] args) throws Exception { - String baseFilename = args[0]; - - - if (args.length > 1) { - int nShards = Integer.parseInt(args[1]); - int nSources = Integer.parseInt(args[2]); - int walksPerSource = Integer.parseInt(args[3]); - int maxHops = Integer.parseInt(args[4]); - - System.out.println("Path-recording walks will start from " + nSources + " sources."); - System.out.println("Going to start " + walksPerSource + " walks per source."); - System.out.println("Max hops: " + maxHops); - - /* Delete vertex data */ - File vertexDataFile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, new IntConverter(), false)); - if (vertexDataFile.exists()) { - vertexDataFile.delete(); - } - - /* Initialize GraphChi engine */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - - engine.setEdataConverter(null); - engine.setVertexDataConverter(new IntConverter()); - engine.setModifiesInedges(false); - engine.setModifiesOutedges(false); - engine.setEnableScheduler(true); - engine.setOnlyAdjacency(true); - engine.setDisableInedges(false); // NOTE! In-edges are enabled - engine.setMemoryBudgetMb(1200); - engine.setUseStaticWindowSize(false); // Disable dynamic window size detection - engine.setEnableDeterministicExecution(false); - engine.setMaxWindow(2000000); // Handle maximum 2M vertices a time. - - long t1 = System.currentTimeMillis(); - - /* Initialize application object */ - DrunkardMobForPaths mob = new DrunkardMobForPaths(maxHops, baseFilename); - - /* Initialize Random walks */ - int nVertices = engine.numVertices(); - mob.walkManager = new WalkManagerForPaths(nVertices); - - /* NOTE: This starts walks from random nodes - you probably want something different */ - for(int i=0; i < nSources; i++) { - int source = (int) (Math.random() * nVertices); - mob.walkManager.addWalkBatch(source, walksPerSource); - } - mob.walkManager.initializeWalks(); - - System.out.println("Configured " + mob.walkManager.getTotalWalks() + " walks in " + - (System.currentTimeMillis() - t1) + " ms"); - - - /* Run */ - engine.run(mob, maxHops + 1); - - /* Analyze */ - WalkPathAnalyzer analyzer = new WalkPathAnalyzer(new File(".")); - analyzer.analyze(0, mob.walkManager.getTotalWalks() - 1, maxHops); - - System.out.println("Ready. Going to output..."); - - /* Output top 20 of visited vertices. */ - TreeSet top20 = Toplist.topListInt(baseFilename, engine.numVertices(), 20); - int i = 0; - for(IdInt vertexRank : top20) { - System.out.println(++i + ": " + vertexRank.getVertexId() + " = " + vertexRank.getValue()); - } - System.out.println("Finished."); - - long sumWalks = VertexAggregator.sumInt(engine.numVertices(), baseFilename); - System.out.println("Total hops (in file): " + sumWalks); - } - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java b/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java deleted file mode 100644 index 9e10d3b1..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java +++ /dev/null @@ -1,451 +0,0 @@ -package edu.cmu.graphchi.walks.deprecated; - -import edu.cmu.graphchi.walks.GrabbedBucketConsumer; -import edu.cmu.graphchi.walks.WalkManager; -import edu.cmu.graphchi.walks.WalkSnapshot; -import edu.cmu.graphchi.walks.WeightedHopper; -import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; -import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; -import com.yammer.metrics.Metrics; -import com.yammer.metrics.core.Timer; -import com.yammer.metrics.core.TimerContext; -import edu.cmu.graphchi.ChiFilenames; -import edu.cmu.graphchi.ChiVertex; -import edu.cmu.graphchi.GraphChiContext; -import edu.cmu.graphchi.GraphChiProgram; -import edu.cmu.graphchi.datablocks.FloatConverter; -import edu.cmu.graphchi.datablocks.IntConverter; -import edu.cmu.graphchi.engine.GraphChiEngine; -import edu.cmu.graphchi.engine.VertexInterval; - -import java.io.File; -import java.rmi.Naming; -import java.rmi.RemoteException; -import java.util.ArrayList; -import java.util.Random; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -/** - * Launch millions (?) of random walks and record the - * hops for each source. Uses a remote DrunkardCompanion to - * keep track of the distribution. - * @author Aapo Kyrola, akyrola@cs.cmu.edu - */ -public class DrunkardMobWithCompanion implements GraphChiProgram, GrabbedBucketConsumer { - - private static final int[] DEBUGIDS = new int[] {0}; - - private WalkManager walkManager; - private WalkSnapshot curWalkSnapshot; - private final RemoteDrunkardCompanion companion; - - private final static double RESETPROB = 0.15; - private LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); - private boolean finished = false; - private Thread dumperThread; - private final Timer purgeTimer = - Metrics.defaultRegistry().newTimer(DrunkardMobWithCompanion.class, "purge-localwalks", TimeUnit.SECONDS, TimeUnit.MINUTES); - - private boolean weighted; - - private AtomicLong pendingWalksToSubmit = new AtomicLong(0); - - public DrunkardMobWithCompanion(String companionAddress, boolean weighted) throws Exception { - this.weighted = weighted; - - if (companionAddress.equals("local")) { - companion = new DrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); - } else { - companion = (RemoteDrunkardCompanion) Naming.lookup(companionAddress); - } - System.out.println("Found companion: " + companion); - - // Launch a thread to send to the companion - dumperThread = new Thread(new Runnable() { - public void run() { - int[] walks = new int[256 * 1024]; - int[] vertices = new int[256 * 1024]; - int idx = 0; - - while(!finished || bucketQueue.size() > 0) { - BucketsToSend bucket = null; - try { - bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - } - if (bucket != null) { - pendingWalksToSubmit.addAndGet(-bucket.length); - for(int i=0; i= walks.length) { - try { - companion.processWalks(walks, vertices); - } catch (Exception err) { - err.printStackTrace(); - } - idx = 0; - } - - } - } - } - - // Send rest - try { - int[] tmpwalks = new int[idx]; - int[] tmpvertices = new int[idx]; - System.arraycopy(walks, 0, tmpwalks, 0, idx); - System.arraycopy(vertices, 0, tmpvertices, 0, idx); - companion.processWalks(tmpwalks, tmpvertices); - } catch (Exception err) { - err.printStackTrace(); - } - } - }); - dumperThread.start(); - } - - private static class BucketsToSend { - int firstVertex; - int[] walks; - int length; - - BucketsToSend(int firstVertex, int[] walks, int length) { - this.firstVertex = firstVertex; - this.walks = walks; - this.length = length; - } - } - - public void consume(int firstVertexInBucket, int[] walkBucket, int len) { - try { - pendingWalksToSubmit.addAndGet(len); - bucketQueue.put(new BucketsToSend(firstVertexInBucket, walkBucket, len)); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - private void initCompanion() throws Exception { - /* Tell companion the sources */ - companion.setSources(walkManager.getSources()); - } - - public void update(ChiVertex vertex, GraphChiContext context) { - - if (context.getThreadLocal() == null) { - LocalWalkBuffer buf = new LocalWalkBuffer(); - context.setThreadLocal(buf); - synchronized (localBuffers) { - localBuffers.add(buf); - } - } - - LocalWalkBuffer localBuf = (LocalWalkBuffer) context.getThreadLocal(); - - try { - // Flow control - while (pendingWalksToSubmit.get() > walkManager.getTotalWalks() / 40) { - System.out.println("Too many walks waiting for delivery: " + pendingWalksToSubmit.get()); - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - } - } - - boolean firstIteration = (context.getIteration() == 0); - int[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); - - for(int j=0; j < DEBUGIDS.length; j++) { - if (vertex.getId() == DEBUGIDS[j]) { - System.out.println(vertex.getId() + " walks: " + walksAtMe.length); - // for(int i=0; i 0) { - - if (weighted) { - hops = (numOutEdges < 16 || walkLength < 8 ? WeightedHopper.generateRandomHopsOut(r, vertex, walkLength) : - WeightedHopper.generateRandomHopsAliasMethodOut(r, vertex, walkLength)); - for(int j=0; j 0 && (firstIteration || Math.random() > RESETPROB)) { - dst = nextHop; - } else { - // Dead end or reset - dst = walkManager.getSourceVertex(walk); - atleastSecondHop = false; - } - localBuf.add(src, dst, atleastSecondHop); - } - } catch (RemoteException re) { - throw new RuntimeException(re); - } - } - - - private class LocalWalkBuffer { - int[] walkBufferDests; - int[] walkSourcesAndHops; - Random random = new Random(); - - int idx = 0; - LocalWalkBuffer() { - walkBufferDests = new int[65536]; - walkSourcesAndHops = new int[65536]; - } - - private void add(int src, int dst, boolean hop) { - if (idx == walkSourcesAndHops.length) { - int[] tmp = walkSourcesAndHops; - walkSourcesAndHops = new int[tmp.length * 2]; - System.arraycopy(tmp, 0, walkSourcesAndHops, 0, tmp.length); - - tmp = walkBufferDests; - walkBufferDests = new int[tmp.length * 2]; - System.arraycopy(tmp, 0, walkBufferDests, 0, tmp.length); - } - walkBufferDests[idx] = dst; - walkSourcesAndHops[idx] = (hop ? -1 : 1) * (1 + src); // Note +1 so zero will be handled correctly - idx++; - } - - private void purge() { - for(int i=0; i 0) { - try { - System.out.println("Waiting ..." + bucketQueue.size()); - Thread.sleep(500); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - try { - dumperThread.join(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - private ArrayList localBuffers = new ArrayList(); - - /** - * At the start of interval - grab the snapshot of walks - */ - public void beginSubInterval(GraphChiContext ctx, final VertexInterval interval) { - long t = System.currentTimeMillis(); - curWalkSnapshot = walkManager.grabSnapshot(interval.getFirstVertex(), interval.getLastVertex()); - System.out.println("Grab snapshot took " + (System.currentTimeMillis() - t) + " ms."); - - while(localBuffers.size() > 0) { - try { - Thread.sleep(100); - } catch (InterruptedException e) { - } - System.out.println("Waiting for purge to finish..."); - } - } - - public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { - curWalkSnapshot.restoreUngrabbed(); - curWalkSnapshot = null; // Release memory - - /* Purge local buffers */ - /* TODO: do in separate thread */ - Thread t = new Thread(new Runnable() { - public void run() { - synchronized (localBuffers) { - final TimerContext _timer = purgeTimer.time(); - for (LocalWalkBuffer buf : localBuffers) { - buf.purge(); - } - localBuffers.clear(); - _timer.stop(); - } - }}); - t.start(); - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) { - /* Count walks */ - long initializedWalks = walkManager.getTotalWalks(); - long activeWalks = walkManager.getNumOfActiveWalks(); - - System.out.println("====================================="); - System.out.println("Active walks: " + activeWalks + ", initialized=" + initializedWalks); - System.out.println("====================================="); - - walkManager.populateSchedulerForInterval(ctx.getScheduler(), interval); - walkManager.setBucketConsumer(this); - } - - public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - - public static void main(String[] args) throws Exception { - - String baseFilename = args[0]; - - if (args.length > 1) { - int nShards = Integer.parseInt(args[1]); - int nSources = Integer.parseInt(args[2]); - int walksPerSource = Integer.parseInt(args[3]); - int maxHops = Integer.parseInt(args[4]); - int firstSource = Integer.parseInt(args[5]); - String companionAddress = args[6]; - boolean weightedGraph = (1 == Integer.parseInt(args[7])); - - - System.out.println("Walks will start from vertices " + firstSource + " -- " + (firstSource + nSources - 1) ); - System.out.println("Going to start " + walksPerSource + " walks per source."); - System.out.println("Max hops: " + maxHops); - System.out.println("Companion: " + companionAddress); - System.out.println("Weighted: " + weightedGraph); - - /* Delete vertex data */ - File vertexDataFile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, new IntConverter(), false)); - if (vertexDataFile.exists()) { - vertexDataFile.delete(); - } - - /* Initialize GraphChi engine */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - engine.setEdataConverter(weightedGraph ? new FloatConverter() : null); - engine.setModifiesInedges(false); - engine.setModifiesOutedges(false); - engine.setEnableScheduler(true); - engine.setOnlyAdjacency(!weightedGraph); - engine.setDisableInedges(true); - - int memoryBudget = 1200; - if (System.getProperty("membudget") != null) memoryBudget = Integer.parseInt(System.getProperty("membudget")); - - System.out.println("Memory budget: " + memoryBudget); - engine.setMemoryBudgetMb(memoryBudget); - engine.setEnableDeterministicExecution(false); - engine.setAutoLoadNext(false); - engine.setVertexDataConverter(null); - engine.setMaxWindow(10000000); // Handle maximum 10M vertices a time. - - long t1 = System.currentTimeMillis(); - - /* Initialize application object */ - DrunkardMobWithCompanion mob = new DrunkardMobWithCompanion(companionAddress, weightedGraph); - - /* Initialize Random walks */ - int nVertices = engine.numVertices(); - mob.walkManager = new WalkManager(nVertices, nSources); - - for(int i=0; i < nSources; i++) { - mob.walkManager.addWalkBatch(i + firstSource, walksPerSource); - } - - System.out.println("Initializing walks..."); - mob.walkManager.initializeWalks(); - - mob.initCompanion(); - - System.out.println("Configured " + mob.walkManager.getTotalWalks() + " walks in " + - (System.currentTimeMillis() - t1) + " ms"); - - - /* Run */ - engine.run(mob, maxHops + 1); - - // TODO: ensure that we have sent all walks! - mob.spinUntilFinish(); - - mob.companion.outputDistributions(new File(baseFilename).getName() + "_" + firstSource); - - } - - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java index 3b64411d..308bc10d 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java @@ -1,7 +1,7 @@ package edu.cmu.graphchi.walks.distributions; import edu.cmu.graphchi.ChiLogger; -import edu.cmu.graphchi.walks.WalkManager; +import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.util.IdCount; import edu.cmu.graphchi.util.IntegerBuffer; @@ -25,13 +25,13 @@ * Done partially during internship at Twitter, Fall 2012 * @author Aapo Kyrola, akyrola@cs.cmu.edu */ -public class DrunkardCompanion extends UnicastRemoteObject implements RemoteDrunkardCompanion { +public abstract class DrunkardCompanion extends UnicastRemoteObject implements RemoteDrunkardCompanion { protected static class WalkSubmission { - int[] walks; + WalkArray walks; int[] atVertices; - private WalkSubmission(int[] walks, int[] atVertices) { + private WalkSubmission(WalkArray walks, int[] atVertices) { this.walks = walks; this.atVertices = atVertices; } @@ -158,7 +158,7 @@ public void run() { WalkSubmission subm = pendingQueue.poll(2000, TimeUnit.MILLISECONDS); if (subm != null) { _processWalks(subm.walks, subm.atVertices); - unpurgedWalks += subm.walks.length; + unpurgedWalks += subm.walks.size(); } if (sourceVertexIds != null) { if (unpurgedWalks > sourceVertexIds.length * 10 || (subm == null && unpurgedWalks > 100000)) { @@ -270,31 +270,7 @@ public void run() { }, 5000, 60000); } - - - - - protected void _processWalks(int[] walks, int[] atVertices) { - long t1 = System.currentTimeMillis(); - for(int i=0; i < walks.length; i++) { - int w = walks[i]; - int atVertex = atVertices[i]; - int sourceIdx = WalkManager.sourceIdx(w); - - if (atVertex == sourceVertexIds[sourceIdx]) { - continue; - } - - synchronized (buffers[sourceIdx]) { - buffers[sourceIdx].add(atVertex); - } - } - - long tt = (System.currentTimeMillis() - t1); - if (tt > 1000) { - logger.info("Processing " + walks.length + " took " + tt + " ms."); - } - } + protected abstract void _processWalks(WalkArray walkArray, int[] atVertices); @Override public IdCount[] getTop(int vertexId, int nTop) throws RemoteException { @@ -320,7 +296,7 @@ protected void drainBuffer(int sourceIdx) { } @Override - public void processWalks(final int[] walks, final int[] atVertices) throws RemoteException { + public void processWalks(final WalkArray walks, final int[] atVertices) throws RemoteException { try { pendingQueue.put(new WalkSubmission(walks, atVertices)); int pending = pendingQueue.size(); @@ -392,7 +368,9 @@ public static void main(String[] args) throws Exception { } catch (Exception err) { logger.info("Registry already created?"); } - Naming.rebind(bindAddress, new DrunkardCompanion(4, (long) (Runtime.getRuntime().maxMemory() * 0.75))); + // TODO? Not sure what the main class is used for; just for testing? This may need to be + // put into the subclass. + Naming.rebind(bindAddress, new IntDrunkardCompanion(4, (long) (Runtime.getRuntime().maxMemory() * 0.75))); logger.info("Prune fraction: " + pruneFraction); } diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java new file mode 100644 index 00000000..1673e0f8 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java @@ -0,0 +1,42 @@ +package edu.cmu.graphchi.walks.distributions; + +import edu.cmu.graphchi.walks.IntWalkManager; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.IntWalkArray; + +import java.rmi.RemoteException; + +public class IntDrunkardCompanion extends DrunkardCompanion { + private IntWalkManager manager; + + public IntDrunkardCompanion( final int numThreads, final long maxMemoryBytes) + throws RemoteException { + super(numThreads, maxMemoryBytes); + // TODO: may be better to pass this in... + manager = new IntWalkManager(0, 0); + } + + @Override + protected void _processWalks(WalkArray walkArray, int[] atVertices) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + long t1 = System.currentTimeMillis(); + for(int i=0; i < walks.length; i++) { + int w = walks[i]; + int atVertex = atVertices[i]; + int sourceIdx = manager.sourceIdx(w); + + if (atVertex == sourceVertexIds[sourceIdx]) { + continue; + } + + synchronized (buffers[sourceIdx]) { + buffers[sourceIdx].add(atVertex); + } + } + + long tt = (System.currentTimeMillis() - t1); + if (tt > 1000) { + logger.info("Processing " + walks.length + " took " + tt + " ms."); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java new file mode 100644 index 00000000..a20cee01 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java @@ -0,0 +1,42 @@ +package edu.cmu.graphchi.walks.distributions; + +import edu.cmu.graphchi.walks.LongWalkManager; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.LongWalkArray; + +import java.rmi.RemoteException; + +public class LongDrunkardCompanion extends DrunkardCompanion { + private LongWalkManager manager; + + public LongDrunkardCompanion( final int numThreads, final long maxMemoryBytes) + throws RemoteException { + super(numThreads, maxMemoryBytes); + // TODO: may be better to pass this in... + manager = new LongWalkManager(0, 0); + } + + @Override + protected void _processWalks(WalkArray walkArray, int[] atVertices) { + long[] walks = ((LongWalkArray)walkArray).getArray(); + long t1 = System.currentTimeMillis(); + for(int i=0; i < walks.length; i++) { + long w = walks[i]; + int atVertex = atVertices[i]; + int sourceIdx = manager.sourceIdx(w); + + if (atVertex == sourceVertexIds[sourceIdx]) { + continue; + } + + synchronized (buffers[sourceIdx]) { + buffers[sourceIdx].add(atVertex); + } + } + + long tt = (System.currentTimeMillis() - t1); + if (tt > 1000) { + logger.info("Processing " + walks.length + " took " + tt + " ms."); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java index 6365523b..c5c0afd9 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java @@ -1,6 +1,7 @@ package edu.cmu.graphchi.walks.distributions; import edu.cmu.graphchi.util.IdCount; +import edu.cmu.graphchi.walks.WalkArray; import java.rmi.Remote; import java.rmi.RemoteException; @@ -20,7 +21,7 @@ public interface RemoteDrunkardCompanion extends Remote { void setSources(int[] sources) throws RemoteException; - void processWalks(int[] walks, int[] atVertices) throws RemoteException; + void processWalks(WalkArray walks, int[] atVertices) throws RemoteException; void outputDistributions(String outputFile) throws RemoteException; diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java new file mode 100644 index 00000000..6c3b70e7 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java @@ -0,0 +1,430 @@ +package edu.cmu.graphchi.walks.distributions; + +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.LongWalkArray; +import edu.cmu.graphchi.walks.distributions.DiscreteDistribution; +import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; +import edu.cmu.graphchi.util.IdCount; +import edu.cmu.graphchi.util.IntegerBuffer; + +import java.io.*; +import java.rmi.Naming; +import java.rmi.RemoteException; +import java.rmi.registry.LocateRegistry; +import java.rmi.server.UnicastRemoteObject; +import java.text.NumberFormat; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; + + +/** + * A DrunkardCompanion object that has two keys to get to a DiscreteDistribution, instead of one. + * Where DrunkardCompanion represents a matrix of values (one key to get to a DiscreteDistribution + * vector), this represents a rank 3 tensor (two keys to get to a DiscreteDistribution). This is + * suitable for collecting more complicated statistics than DrunkardCompanion, though the current + * implementation is perhaps a little slower than it could be, using nested hash maps instead of a + * more efficient data structure. + */ +public abstract class TwoKeyCompanion extends UnicastRemoteObject + implements RemoteDrunkardCompanion { + + protected static class WalkSubmission { + WalkArray walks; + int[] atVertices; + + private WalkSubmission(WalkArray walks, int[] atVertices) { + this.walks = walks; + this.atVertices = atVertices; + } + } + + protected static final int BUFFER_CAPACITY = 128; + protected static final int BUFFER_MAX = 128; + + boolean isLowInMemory = false; + + // Using hash maps of hash maps isn't the most efficient thing to do here, but it'll do for + // now. + protected ConcurrentHashMap> distributions; + protected ConcurrentHashMap> buffers; + protected ConcurrentHashMap> distrLocks; + protected AtomicInteger outstanding = new AtomicInteger(0); + + protected ExecutorService parallelExecutor; + protected long maxMemoryBytes; + + protected LinkedBlockingQueue pendingQueue = new LinkedBlockingQueue(); + + protected static Logger logger = ChiLogger.getLogger("pathcompanion"); + protected Timer timer = new Timer(true); + + /** + * Prints estimate of memory usage + */ + private long memoryAuditReport() { + long companionOverHeads = 0; + + long bufferMem = 0; + long maxMem = 0; + int bufferCount = 0; + for (ConcurrentHashMap map : buffers.values()) { + companionOverHeads += 4; + for(IntegerBuffer buf : map.values()) { + bufferCount += 1; + companionOverHeads += 4; + long est = buf.memorySizeEst(); + bufferMem += est; + maxMem = Math.max(maxMem, est); + } + } + + long distributionMem = 0; + long maxDistMem = 0; + long avoidMem = 0; + int distCount = 0; + for (ConcurrentHashMap map : distributions.values()) { + companionOverHeads += 4; + for(DiscreteDistribution dist : map.values()) { + distCount += 1; + companionOverHeads += 4; + long est = dist.memorySizeEst(); + distributionMem += est; + maxDistMem = Math.max(est, maxDistMem); + avoidMem += dist.avoidCount() * 6; + } + } + + NumberFormat nf = NumberFormat.getInstance(Locale.US); + + logger.info("======= MEMORY REPORT ======"); + logger.info("Companion internal: " + nf.format(companionOverHeads / 1024. / 1024.) + " mb"); + + logger.info("Buffer mem: " + nf.format(bufferMem / 1024. / 1024.) + " mb"); + logger.info("Avg bytes per buffer: " + + nf.format(bufferMem * 1.0 / bufferCount / 1024.) + " kb"); + logger.info("Max buffer was: " + nf.format(maxMem / 1024.) + "kb"); + + logger.info("Distribution mem: " + nf.format(distributionMem / 1024. / 1024.) + " mb"); + logger.info("- of which avoids: " + nf.format(avoidMem / 1024. / 1024.) + " mb"); + + logger.info("Avg bytes per distribution: " + + nf.format((distributionMem * 1.0 / distCount / 1024.)) + " kb"); + logger.info("Max distribution: " + nf.format(maxDistMem / 1024.) + " kb"); + + long totalMem = companionOverHeads + bufferMem + distributionMem; + logger.info("** Total: " + nf.format(totalMem / 1024. / 1024. / 1024.) + + " GB (low-mem limit " + + Runtime.getRuntime().maxMemory() * 0.75 / 1024. / 1024. / 1024. + "GB)" ); + isLowInMemory = totalMem > maxMemoryBytes; + + if (isLowInMemory) { + compactMemoryUsage(); + } + + return totalMem; + } + + /** + * Removes tails from distributions to save memory + */ + private void compactMemoryUsage() { + long before=0; + long after=0; + + for (Integer firstKey : distributions.keySet()) { + ConcurrentHashMap map = distributions.get(firstKey); + for (Integer secondKey : map.keySet()) { + DiscreteDistribution prevDist, newDist; + synchronized (distrLocks.get(firstKey).get(secondKey)) { + prevDist = map.get(secondKey); + newDist = prevDist.filteredAndShift(2); + map.put(secondKey, newDist); + } + before += prevDist.memorySizeEst(); + after += newDist.memorySizeEst(); + } + } + + logger.info("** Compacted: " + (before / 1024. / 1024. / 1024.) + " GB --> " + + (after / 1024. / 1024. / 1024.) + " GB"); + } + + + /** + * Creates the TwoKeyCompanion object + * @param numThreads number of worker threads (4 is common) + * @param maxMemoryBytes maximum amount of memory to use for storing the distributions + */ + public TwoKeyCompanion(int numThreads, long maxMemoryBytes) throws RemoteException { + this.maxMemoryBytes = maxMemoryBytes; + parallelExecutor = + Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + + buffers = new ConcurrentHashMap>(); + distrLocks = new ConcurrentHashMap>(); + distributions = new ConcurrentHashMap>(); + + + for(int threadId=0; threadId < numThreads; threadId++) { + Thread processingThread = new Thread(new ProcessingThread(threadId, numThreads)); + processingThread.setDaemon(true); + processingThread.start(); + } + } + + private class ProcessingThread implements Runnable { + private int id; + private int numThreads; + + public ProcessingThread(int id, int numThreads) { + this.id = id; + this.numThreads = numThreads; + } + @Override + public void run() { + try { + long unpurgedWalks = 0; + while(true) { + WalkSubmission subm = pendingQueue.poll(2000, TimeUnit.MILLISECONDS); + if (subm != null) { + _processWalks(subm.walks, subm.atVertices); + unpurgedWalks += subm.walks.size(); + } + if (distributions != null) { + if (unpurgedWalks > distributions.size() * 10 || + (subm == null && unpurgedWalks > 100000)) { + logger.fine("Purge:" + unpurgedWalks); + unpurgedWalks = 0; + + // Loop to see what to drain. Every thread looks for + // different buffers. + for (Integer firstKey : buffers.keySet()) { + ConcurrentHashMap map = + buffers.get(firstKey); + for (Integer secondKey : map.keySet()) { + if ((firstKey + secondKey) % numThreads != id) { + continue; + } + // Drain asynchronously + outstanding.incrementAndGet(); + final IntegerBuffer toDrain = map.get(secondKey); + final int first = firstKey; + final int second = secondKey; + + synchronized (toDrain) { + map.put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); + } + parallelExecutor.submit(new Runnable() { public void run() { + try { + int[] d = toDrain.toIntArray(); + Arrays.sort(d); + DiscreteDistribution dist = new DiscreteDistribution(d); + mergeWith(first, second, dist); + } catch (Exception err ) { + err.printStackTrace(); + } finally { + outstanding.decrementAndGet(); + } + }}); + } + } + } + } + } + } catch (Exception err) { + if (!(err instanceof InterruptedException)) { + err.printStackTrace(); + } + } + } + } + + protected void ensureExists(int firstKey, int secondKey) { + ConcurrentHashMap map = distrLocks.get(firstKey); + if (map == null) { + ConcurrentHashMap new_map = new ConcurrentHashMap(); + map = distrLocks.putIfAbsent(firstKey, new_map); + if (map == null) { + map = new_map; + } + } + Object lock = map.get(secondKey); + if (lock == null) { + Object new_lock = new Object(); + lock = map.putIfAbsent(secondKey, new_lock); + if (lock == null) { + synchronized(new_lock) { + ConcurrentHashMap dmap = + distributions.get(firstKey); + if (dmap == null) { + dmap = new ConcurrentHashMap(); + distributions.put(firstKey, dmap); + } + dmap.put(secondKey, new DiscreteDistribution()); + ConcurrentHashMap bmap = buffers.get(firstKey); + if (bmap == null) { + bmap = new ConcurrentHashMap(); + buffers.put(firstKey, bmap); + } + bmap.put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); + } + } else { + synchronized(lock) { + // We're just waiting for the other thread to release the lock, so that we can + // get the buffer without crashing later. Another thread actually added it, + // but we have to wait for them. + } + } + } + } + + private void mergeWith(int firstKey, int secondKey, DiscreteDistribution distr) { + ensureExists(firstKey, secondKey); + synchronized (distrLocks.get(firstKey).get(secondKey)) { + DiscreteDistribution mergeInto = distributions.get(firstKey).get(secondKey); + DiscreteDistribution merged = DiscreteDistribution.merge(mergeInto, distr); + distributions.get(firstKey).put(secondKey, merged); + } + } + + @Override + public void setAvoidList(int sourceIdx, int[] avoidList) throws RemoteException { + // We don't need this, so this is a no-op + } + + @Override + public IdCount[] getTop(int vertexId, int nTop) throws RemoteException { + // Not really useful for us + return null; + } + + @Override + public void setSources(int[] sources) throws RemoteException { + // We don't use an array of source indices, so we just take the opportunity to initialize + // our objects. + + // Restart timer + timer.cancel(); + timer = new Timer(true); + + timer.schedule(new TimerTask() { + @Override + public void run() { + memoryAuditReport(); + } + }, 5000, 60000); + } + + protected void _processWalks(WalkArray walkArray, int[] atVertices) { + long[] walks = ((LongWalkArray)walkArray).getArray(); + long t1 = System.currentTimeMillis(); + for(int i=0; i < walks.length; i++) { + long w = walks[i]; + if (ignoreWalk(w)) { + continue; + } + int atVertex = atVertices[i]; + int firstKey = getFirstKey(w, atVertex); + int secondKey = getSecondKey(w, atVertex); + int value = getValue(w, atVertex); + + ensureExists(firstKey, secondKey); + IntegerBuffer buffer = buffers.get(firstKey).get(secondKey); + synchronized (buffer) { + buffer.add(value); + } + } + + long tt = (System.currentTimeMillis() - t1); + if (tt > 1000) { + logger.info("Processing " + walks.length + " took " + tt + " ms."); + } + } + + protected boolean ignoreWalk(long walk) { + if (walk == 0) { + return true; + } + return false; + } + + protected abstract int getFirstKey(long walk, int atVertex); + + protected abstract int getSecondKey(long walk, int atVertex); + + protected abstract int getValue(long walk, int atVertex); + + protected void drainBuffer(int firstKey, int secondKey) { + IntegerBuffer buffer = buffers.get(firstKey).get(secondKey); + int[] arr; + synchronized (buffer) { + arr = buffer.toIntArray(); + buffers.get(firstKey).put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); + } + Arrays.sort(arr); + DiscreteDistribution dist = new DiscreteDistribution(arr); + mergeWith(firstKey, secondKey, dist); + } + + @Override + public void processWalks(final WalkArray walks, final int[] atVertices) throws RemoteException { + try { + pendingQueue.put(new WalkSubmission(walks, atVertices)); + int pending = pendingQueue.size(); + if (pending > 50 && pending % 20 == 0) { + logger.info("Warning, pending queue size: " + pending); + } + } catch (Exception err) { + err.printStackTrace(); + } + } + + protected void waitForFinish() { + logger.info("Waiting for processing to finish"); + while (pendingQueue.size() > 0) { + logger.info("..."); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + while(outstanding.get() > 0) { + logger.info("..."); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + + @Override + public abstract void outputDistributions(String outputFile) throws RemoteException; + + @Override + public void outputDistributions(String outputFile, int nTop) throws RemoteException { + outputDistributions(outputFile); + } + + public void close() { + parallelExecutor.shutdown(); + timer.cancel(); + clearMemory(); + } + + protected void clearMemory() { + distributions.clear(); + buffers.clear(); + distrLocks.clear(); + } +} diff --git a/test/edu/cmu/graphchi/walks/TestWalkManager.java b/test/edu/cmu/graphchi/walks/TestWalkManager.java index b79974de..1849059a 100644 --- a/test/edu/cmu/graphchi/walks/TestWalkManager.java +++ b/test/edu/cmu/graphchi/walks/TestWalkManager.java @@ -13,60 +13,111 @@ public class TestWalkManager { + // There's a lot of duplicated code in here to separately test the int and long versions of + // WalkManager; that could probably be fixed, to just test the parts that are necessary for + // each one... TODO + @Test - public void testWalkEncodings() { - WalkManager wmgr = new WalkManager(1000, 10000); + public void testIntWalkEncodings() { + IntWalkManager wmgr = new IntWalkManager(1000, 10000); int x = wmgr.encode(3, true, 114); System.out.println("X = " + x); - boolean hop = wmgr.hop(x); + boolean trackBit = wmgr.trackBit(x); int src = wmgr.sourceIdx(x); int off = wmgr.off(x); assertEquals(3, src); - assertEquals(true, hop); + assertEquals(true, trackBit); assertEquals(114, off); x = wmgr.encode(16777200, false, 126); - hop = wmgr.hop(x); + trackBit = wmgr.trackBit(x); src = wmgr.sourceIdx(x); off = wmgr.off(x); assertEquals(16777200, src); - assertEquals(false, hop); + assertEquals(false, trackBit); assertEquals(126, off); for(int v=0; v<15000000; v+=29) { for (int o=0; o<128; o++) { - x = WalkManager.encode(v, true, o); - int y = WalkManager.encode(v, false, o); - assertEquals(v, WalkManager.sourceIdx(x)); - assertEquals(v, WalkManager.sourceIdx(y)); + x = wmgr.encode(v, true, o); + int y = wmgr.encode(v, false, o); + assertEquals(v, wmgr.sourceIdx(x)); + assertEquals(v, wmgr.sourceIdx(y)); - assertEquals(o, WalkManager.off(x)); - assertEquals(o, WalkManager.off(y)); + assertEquals(o, wmgr.off(x)); + assertEquals(o, wmgr.off(y)); - assertEquals(true, WalkManager.hop(x)); - assertEquals(false, WalkManager.hop(y)); + assertEquals(true, wmgr.trackBit(x)); + assertEquals(false, wmgr.trackBit(y)); } } x = wmgr.encode(16367, true, 0); - hop = wmgr.hop(x); + trackBit = wmgr.trackBit(x); src = wmgr.sourceIdx(x); off = wmgr.off(x); assertEquals(16367, src); - assertEquals(true, hop); + assertEquals(true, trackBit); assertEquals(0, off); } + @Test + public void testLongWalkEncodings() { + LongWalkManager wmgr = new LongWalkManager(1000, 10000); + long x = wmgr.encode(3, true, 114); + + System.out.println("X = " + x); + + boolean trackBit = wmgr.trackBit(x); + int src = wmgr.sourceIdx(x); + int off = wmgr.off(x); + assertEquals(3, src); + assertEquals(true, trackBit); + assertEquals(114, off); + + x = wmgr.encode(16777200, false, 126); + trackBit = wmgr.trackBit(x); + src = wmgr.sourceIdx(x); + off = wmgr.off(x); + assertEquals(16777200, src); + assertEquals(false, trackBit); + assertEquals(126, off); + + + for(int v=0; v<15000000; v+=29) { + for (int o=0; o<128; o++) { + x = wmgr.encode(v, true, o); + long y = wmgr.encode(v, false, o); + assertEquals(v, wmgr.sourceIdx(x)); + assertEquals(v, wmgr.sourceIdx(y)); + + assertEquals(o, wmgr.off(x)); + assertEquals(o, wmgr.off(y)); + + assertEquals(true, wmgr.trackBit(x)); + assertEquals(false, wmgr.trackBit(y)); + } + } + + + x = wmgr.encode(16367, true, 0); + trackBit = wmgr.trackBit(x); + src = wmgr.sourceIdx(x); + off = wmgr.off(x); + assertEquals(16367, src); + assertEquals(true, trackBit); + assertEquals(0, off); + } @Test - public void testWalkManager() throws IOException { + public void testIntWalkManager() throws IOException { int nvertices = 33333; - WalkManager wmgr = new WalkManager(nvertices, 40000); + IntWalkManager wmgr = new IntWalkManager(nvertices, 40000); int tot = 0; for(int j=877; j < 3898; j++) { wmgr.addWalkBatch(j, (j % 100) + 10); @@ -80,12 +131,12 @@ public void testWalkManager() throws IOException { // Now get two snapshots WalkSnapshot snapshot1 = wmgr.grabSnapshot(890, 1300); for(int j=890; j <= 1300; j++) { - int[] vertexwalks = snapshot1.getWalksAtVertex(j, true); - assertEquals((j % 100) + 10, WalkManager.getWalkLength(vertexwalks)); + WalkArray vertexwalks = snapshot1.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); - for(int w : vertexwalks) { + for(int w : ((IntWalkArray)vertexwalks).getArray()) { if (w != -1) - assertEquals(false, wmgr.hop(w)); + assertEquals(false, wmgr.trackBit(w)); } } assertEquals(890, snapshot1.getFirstVertex()); @@ -94,26 +145,26 @@ public void testWalkManager() throws IOException { // Next snapshot should be empty WalkSnapshot snapshot2 = wmgr.grabSnapshot(890, 1300); for(int j=890; j <= 1300; j++) { - int[] vertexwalks = snapshot2.getWalksAtVertex(j, true); + WalkArray vertexwalks = snapshot2.getWalksAtVertex(j, true); assertNull(vertexwalks); } WalkSnapshot snapshot3 = wmgr.grabSnapshot(877, 889); for(int j=877; j <= 889; j++) { - int[] vertexwalks = snapshot3.getWalksAtVertex(j, true); - assertEquals((j % 100) + 10, WalkManager.getWalkLength(vertexwalks)); + WalkArray vertexwalks = snapshot3.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); } WalkSnapshot snapshot4 = wmgr.grabSnapshot(877, 889); for(int j=877; j <= 889; j++) { - int[] vertexwalks = snapshot4.getWalksAtVertex(j, true); + WalkArray vertexwalks = snapshot4.getWalksAtVertex(j, true); assertNull(vertexwalks); } WalkSnapshot snapshot5 = wmgr.grabSnapshot(1301, 3898); for(int j=1301; j < 3898; j++) { - int[] vertexwalks = snapshot5.getWalksAtVertex(j, true); - assertEquals((j % 100) + 10, WalkManager.getWalkLength(vertexwalks)); + WalkArray vertexwalks = snapshot5.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); } @@ -122,26 +173,108 @@ public void testWalkManager() throws IOException { WalkSnapshot snapshot6 = wmgr.grabSnapshot(1301, 3898); for(int j=1301; j < 3898; j++) { - int[] vertexwalks = snapshot6.getWalksAtVertex(j, true); + WalkArray vertexwalks = snapshot6.getWalksAtVertex(j, true); assertNull(vertexwalks); } /* Then update some walks */ - wmgr.updateWalk(88, 22098, true); - wmgr.updateWalk(41, 76, false); + int w = wmgr.encode(41, false, 0); + wmgr.moveWalk(w, 76, false); + w = wmgr.encode(88, false, 0); + wmgr.moveWalk(w, 22098, true); WalkSnapshot snapshot7 = wmgr.grabSnapshot(76, 22098); - int[] w1 = snapshot7.getWalksAtVertex(76, true); - assertEquals(1, WalkManager.getWalkLength(w1)); - int w = w1[0]; + WalkArray w1 = snapshot7.getWalksAtVertex(76, true); + assertEquals(1, wmgr.getWalkLength(w1)); + w = ((IntWalkArray)w1).getArray()[0]; assertEquals(41, wmgr.sourceIdx(w)); - assertEquals(false, wmgr.hop(w)); + assertEquals(false, wmgr.trackBit(w)); - int[] w2 = snapshot7.getWalksAtVertex(22098, true); - w = w2[0]; + WalkArray w2 = snapshot7.getWalksAtVertex(22098, true); + w = ((IntWalkArray)w2).getArray()[0]; assertEquals(88, wmgr.sourceIdx(w)); - assertEquals(true, wmgr.hop(w)); - + assertEquals(true, wmgr.trackBit(w)); } + @Test + public void testLongWalkManager() throws IOException { + int nvertices = 33333; + LongWalkManager wmgr = new LongWalkManager(nvertices, 40000); + int tot = 0; + for(int j=877; j < 3898; j++) { + wmgr.addWalkBatch(j, (j % 100) + 10); + tot += (j % 100) + 10; + } + + wmgr.initializeWalks(); + + assertEquals(tot, wmgr.getTotalWalks()); + + // Now get two snapshots + WalkSnapshot snapshot1 = wmgr.grabSnapshot(890, 1300); + for(int j=890; j <= 1300; j++) { + WalkArray vertexwalks = snapshot1.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); + + for(long w : ((LongWalkArray)vertexwalks).getArray()) { + if (w != -1) + assertEquals(false, wmgr.trackBit(w)); + } + } + assertEquals(890, snapshot1.getFirstVertex()); + assertEquals(1300, snapshot1.getLastVertex()); + + // Next snapshot should be empty + WalkSnapshot snapshot2 = wmgr.grabSnapshot(890, 1300); + for(int j=890; j <= 1300; j++) { + WalkArray vertexwalks = snapshot2.getWalksAtVertex(j, true); + assertNull(vertexwalks); + } + + WalkSnapshot snapshot3 = wmgr.grabSnapshot(877, 889); + for(int j=877; j <= 889; j++) { + WalkArray vertexwalks = snapshot3.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); + } + + WalkSnapshot snapshot4 = wmgr.grabSnapshot(877, 889); + for(int j=877; j <= 889; j++) { + WalkArray vertexwalks = snapshot4.getWalksAtVertex(j, true); + assertNull(vertexwalks); + } + + WalkSnapshot snapshot5 = wmgr.grabSnapshot(1301, 3898); + for(int j=1301; j < 3898; j++) { + WalkArray vertexwalks = snapshot5.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); + } + + + // wmgr.dumpToFile(snapshot5, "tmp/snapshot5"); + + + WalkSnapshot snapshot6 = wmgr.grabSnapshot(1301, 3898); + for(int j=1301; j < 3898; j++) { + WalkArray vertexwalks = snapshot6.getWalksAtVertex(j, true); + assertNull(vertexwalks); + } + + /* Then update some walks */ + long w = wmgr.encode(41, false, 0); + wmgr.moveWalk(w, 76, false); + w = wmgr.encode(88, false, 0); + wmgr.moveWalk(w, 22098, true); + + WalkSnapshot snapshot7 = wmgr.grabSnapshot(76, 22098); + WalkArray w1 = snapshot7.getWalksAtVertex(76, true); + assertEquals(1, wmgr.getWalkLength(w1)); + w = ((LongWalkArray)w1).getArray()[0]; + assertEquals(41, wmgr.sourceIdx(w)); + assertEquals(false, wmgr.trackBit(w)); + + WalkArray w2 = snapshot7.getWalksAtVertex(22098, true); + w = ((LongWalkArray)w2).getArray()[0]; + assertEquals(88, wmgr.sourceIdx(w)); + assertEquals(true, wmgr.trackBit(w)); + } } diff --git a/test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java b/test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java deleted file mode 100644 index 8aafd6f7..00000000 --- a/test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java +++ /dev/null @@ -1,121 +0,0 @@ -package edu.cmu.graphchi.walks; - -import org.junit.Test; - -import java.io.IOException; - -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNull; - -/** - * @author Aapo Kyrola, akyrola@cs.cmu.edu, akyrola@twitter.com - */ -public class TestWalkManagerWithPaths { - - @Test - public void testWalkEncodings() { - WalkManagerForPaths wmgr = new WalkManagerForPaths(1000); - long x = wmgr.encode(3, 2, 284); - int hop = wmgr.hop(x); - int id = wmgr.walkId(x); - int off = wmgr.off(x); - assertEquals(3, id); - assertEquals(2, hop); - assertEquals(284, off); - - x = wmgr.encode(878, 0, 999); - hop = wmgr.hop(x); - id = wmgr.walkId(x); - off = wmgr.off(x); - assertEquals(878, id); - assertEquals(0, hop); - assertEquals(999, off); - - x = wmgr.encode(1987000001, 8, 0); - hop = wmgr.hop(x); - id = wmgr.walkId(x); - off = wmgr.off(x); - assertEquals(1987000001, id); - assertEquals(8, hop); - assertEquals(0, off); - } - - - - @Test - public void testWalkManagerWithPaths() throws IOException { - int nvertices = 33333; - WalkManagerForPaths wmgr = new WalkManagerForPaths(nvertices); - int tot = 0; - for(int j=877; j < 3898; j++) { - wmgr.addWalkBatch(j, (j % 100) + 10); - tot += (j % 100) + 10; - } - - wmgr.initializeWalks(); - - assertEquals(tot, wmgr.getTotalWalks()); - - // Now get two snapshots - WalkSnapshotForPaths snapshot1 = wmgr.grabSnapshot(890, 1300); - for(int j=890; j <= 1300; j++) { - long[] vertexwalks = snapshot1.getWalksAtVertex(j); - assertEquals((j % 100) + 10, vertexwalks.length); - - for(long w : vertexwalks) { - if (w != -1) - assertEquals(0, wmgr.hop(w)); - } - } - assertEquals(890, snapshot1.getFirstVertex()); - assertEquals(1300, snapshot1.getLastVertex()); - - // Next snapshot should be empty - WalkSnapshotForPaths snapshot2 = wmgr.grabSnapshot(890, 1300); - for(int j=890; j <= 1300; j++) { - long[] vertexwalks = snapshot2.getWalksAtVertex(j); - assertNull(vertexwalks); - } - - WalkSnapshotForPaths snapshot3 = wmgr.grabSnapshot(877, 889); - for(int j=877; j <= 889; j++) { - long[] vertexwalks = snapshot3.getWalksAtVertex(j); - assertEquals((j % 100) + 10, vertexwalks.length); - } - - WalkSnapshotForPaths snapshot4 = wmgr.grabSnapshot(877, 889); - for(int j=877; j <= 889; j++) { - long[] vertexwalks = snapshot4.getWalksAtVertex(j); - assertNull(vertexwalks); - } - - WalkSnapshotForPaths snapshot5 = wmgr.grabSnapshot(1301, 3898); - for(int j=1301; j < 3898; j++) { - long[] vertexwalks = snapshot5.getWalksAtVertex(j); - assertEquals((j % 100) + 10, vertexwalks.length); - } - // wmgr.dumpToFile(snapshot5, "snapshot5"); - - - WalkSnapshotForPaths snapshot6 = wmgr.grabSnapshot(1301, 3898); - for(int j=1301; j < 3898; j++) { - long[] vertexwalks = snapshot6.getWalksAtVertex(j); - assertNull(vertexwalks); - } - - /* Then update some walks */ - wmgr.updateWalk(88, 22098, 5); - wmgr.updateWalk(41, 76, 3); - - WalkSnapshotForPaths snapshot7 = wmgr.grabSnapshot(76, 22098); - long[] w1 = snapshot7.getWalksAtVertex(76); - assertEquals(1, w1.length); - long w = w1[0]; - assertEquals(3, wmgr.hop(w)); - - long[] w2 = snapshot7.getWalksAtVertex(22098); - w = w2[0]; - assertEquals(5, wmgr.hop(w)); - - } -}