From f7dae5a71b54febfe4ae62dcbeca96435a787583 Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Wed, 23 Oct 2013 19:00:07 -0700 Subject: [PATCH 01/29] small optimization, thanks to Puneet Jain --- .../cmu/graphchi/preprocessing/FastSharder.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java b/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java index 8cbad3fb..4bb340de 100644 --- a/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java +++ b/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java @@ -604,7 +604,7 @@ public void setValue(byte[] array, Object val) { File blockFile = new File(ChiFilenames.getFilenameShardEdataBlock(edataFileName, blockIdx, blockSize)); OutputStream blockOs = (CompressedIO.isCompressionEnabled() ? new DeflaterOutputStream(new BufferedOutputStream(new FileOutputStream(blockFile))) : - new FileOutputStream(blockFile)); + new FileOutputStream(blockFile)); long len = Math.min(blockSize, edatasize - idx); byte[] block = new byte[(int)len]; @@ -657,11 +657,13 @@ private static int partition(long arr[], byte[] values, int sizeOf, int left, in } static void quickSort(long arr[], byte[] values, int sizeOf, int left, int right) { - int index = partition(arr, values, sizeOf, left, right); - if (left < index - 1) - quickSort(arr, values, sizeOf, left, index - 1); - if (index < right) - quickSort(arr, values, sizeOf, index, right); + if (left < right) { + int index = partition(arr, values, sizeOf, left, right); + if (left < index - 1) + quickSort(arr, values, sizeOf, left, index - 1); + if (index < right) + quickSort(arr, values, sizeOf, index, right); + } } From 1d7e4b1161fe1dd15ddd1b0e3c5579a0d892823e Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Thu, 13 Mar 2014 23:04:12 -0700 Subject: [PATCH 02/29] fixed bug with the matrix market parser -- thanks Victoria for spotting it! --- src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java | 2 +- .../java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java b/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java index 4bb340de..aaca8e3b 100644 --- a/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java +++ b/src/main/java/edu/cmu/graphchi/preprocessing/FastSharder.java @@ -754,7 +754,7 @@ public void shard(InputStream inputStream, GraphInputFormat format) throws IOExc /* Vertex - ids on the right side of the bipartite graph have id numLeft + originalId */ try { String lastTok = tok[tok.length - 1]; - this.addEdge(Integer.parseInt(tok[0]) - 1, numLeft + Integer.parseInt(tok[1]), lastTok); + this.addEdge(Integer.parseInt(tok[0]) - 1, numLeft + Integer.parseInt(tok[1]) - 1, lastTok); } catch (NumberFormatException nfe) { logger.severe("Could not parse line: " + ln); throw nfe; diff --git a/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java b/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java index 378bb839..2f4f12a8 100644 --- a/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java +++ b/src/main/java/edu/cmu/graphchi/queries/demo/FriendsOfFriends.java @@ -157,7 +157,7 @@ public String recommendFriends(int vertexId, int fanOut) throws IOException { private String namify(Integer value) throws IOException { File f = new File(baseFilename + "_names.dat"); if (!f.exists()) { - System.out.println("didn't find name file: " + f.getPath()); + // System.out.println("didn't find name file: " + f.getPath()); return value+""; } int i = value * 16; From 9b760717087c30d10f571b6c02f3ecc15ca1b225 Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Tue, 15 Apr 2014 15:19:29 -0700 Subject: [PATCH 03/29] Update README.md --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index d967f9b6..4490ef1b 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,3 @@ Java and .NET applications. Take a look at YourKit's leading software products: akyrola@cs.cmu.edu - - - -[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/GraphChi/graphchi-java/trend.png)](https://bitdeli.com/free "Bitdeli Badge") - From 2e2a0a8eaf80b08b37f206beb88039c36fe30667 Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Wed, 16 Apr 2014 21:35:53 -0700 Subject: [PATCH 04/29] Made floatpair fields final to prevent modifying. If setting edge or vertex value, need to use setValue() explicitly --- src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java | 5 ++--- src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java | 2 +- src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java | 5 +++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java b/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java index a93c7ae1..4e23623a 100644 --- a/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java +++ b/src/main/java/com/twitter/pers/bipartite/HITSSmallMem.java @@ -83,7 +83,7 @@ public void update(ChiVertex vertex, GraphChiContext context) FloatPair curValue = vertex.getValue(); if (side == LEFTSIDE && vertex.numOutEdges() > 0) { - curValue.first = newValue; + curValue = new FloatPair(newValue, curValue.second); synchronized (this) { leftSideSqrSum += newValue * newValue; } @@ -140,8 +140,7 @@ public void endIteration(GraphChiContext ctx) { leftNorm = (float) Math.sqrt(leftSideSqrSum); VertexTransformer.transform((int) ctx.getNumVertices(), graphName, new FloatPairConverter(), new VertexTransformCallBack() { public FloatPair map(int vertexId, FloatPair value) { - value.first /= leftNorm; - return value; + return new FloatPair(value.first/leftNorm, value.second); } }); diff --git a/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java b/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java index 03bd6d3e..2b4fb2d0 100644 --- a/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java +++ b/src/main/java/com/twitter/pers/bipartite/SALSASmallMem.java @@ -77,7 +77,7 @@ public void update(ChiVertex vertex, GraphChiContext context) FloatPair curValue = vertex.getValue(); if (side == LEFTSIDE && vertex.numOutEdges() > 0) { - curValue.first = newValue; + curValue = new FloatPair(newValue, curValue.second); // Write value to outedges float broadcastValue = newValue / vertex.numOutEdges(); for(int i=0; i < vertex.numOutEdges(); i++) { diff --git a/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java b/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java index aba68569..dacec76a 100644 --- a/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java +++ b/src/main/java/edu/cmu/graphchi/datablocks/FloatPair.java @@ -3,11 +3,12 @@ /** * Represents a 2-tuple of floats. * Access the tuple elements by pair.first, pair.second. + * * @author Aapo Kyrola */ public class FloatPair { - public float first; - public float second; + public final float first; + public final float second; public FloatPair(float first, float second) { this.first = first; From e14b634f76fdec16898c0e6e61fa51554a27d26f Mon Sep 17 00:00:00 2001 From: jerryye Date: Wed, 16 Apr 2014 19:35:35 -0700 Subject: [PATCH 05/29] Added weighted pagerank. --- .../cmu/graphchi/apps/WeightedPagerank.java | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java new file mode 100644 index 00000000..ea2e6d77 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -0,0 +1,125 @@ +package edu.cmu.graphchi.apps; + +import edu.cmu.graphchi.*; +import edu.cmu.graphchi.datablocks.FloatConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.io.CompressedIO; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; +import edu.cmu.graphchi.preprocessing.VertexProcessor; +import edu.cmu.graphchi.util.IdFloat; +import edu.cmu.graphchi.util.Toplist; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.TreeSet; +import java.util.logging.Logger; + +/** + * Example application: PageRank (http://en.wikipedia.org/wiki/Pagerank) + * Iteratively computes a pagerank for each vertex by averaging the pageranks + * of in-neighbors pageranks. + * @author akyrola + */ +public class WeightedPagerank implements GraphChiProgram { + + private static Logger logger = ChiLogger.getLogger("pagerank"); + + public void update(ChiVertex vertex, GraphChiContext context) { + if (context.getIteration() == 0) { + /* Initialize on first iteration */ + vertex.setValue(1.0f); + } else { + /* On other iterations, set my value to be the weighted + average of my in-coming neighbors pageranks. + */ + float sum = 0.f; + for(int i=0; i(graphName, numShards, new VertexProcessor() { + public Float receiveVertexValue(int vertexId, String token) { + return (token == null ? 0.0f : Float.parseFloat(token)); + } + }, new EdgeProcessor() { + public Float receiveEdge(int from, int to, String token) { + return (token == null ? 0.0f : Float.parseFloat(token)); + } + }, new FloatConverter(), new FloatConverter()); + } + + /** + * Usage: java edu.cmu.graphchi.demo.PageRank graph-name num-shards filetype(edgelist|adjlist) + * For specifying the number of shards, 20-50 million edges/shard is often a good configuration. + */ + public static void main(String[] args) throws Exception { + String baseFilename = args[0]; + int nShards = Integer.parseInt(args[1]); + String fileType = (args.length >= 3 ? args[2] : null); + + CompressedIO.disableCompression(); + + /* Create shards */ + FastSharder sharder = createSharder(baseFilename, nShards); + if (baseFilename.equals("pipein")) { // Allow piping graph in + sharder.shard(System.in, fileType); + } else { + if (!new File(ChiFilenames.getFilenameIntervals(baseFilename, nShards)).exists()) { + sharder.shard(new FileInputStream(new File(baseFilename)), fileType); + } else { + logger.info("Found shards -- no need to preprocess"); + } + } + + /* Run GraphChi */ + GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); + engine.setEdataConverter(new FloatConverter()); + engine.setVertexDataConverter(new FloatConverter()); + engine.setModifiesInedges(false); // Important optimization + + engine.run(new WeightedPagerank(), 4); + + logger.info("Ready."); + + /* Output results */ + int i = 0; + VertexIdTranslate trans = engine.getVertexIdTranslate(); + TreeSet top20 = Toplist.topListFloat(baseFilename, engine.numVertices(), 20); + for(IdFloat vertexRank : top20) { + System.out.println(++i + ": " + trans.backward(vertexRank.getVertexId()) + " = " + vertexRank.getValue()); + } + } +} From b39309f48a48aba52c5f1daa7d9aa52dbdb73421 Mon Sep 17 00:00:00 2001 From: jerryye Date: Wed, 16 Apr 2014 20:41:40 -0700 Subject: [PATCH 06/29] src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java --- .../cmu/graphchi/apps/WeightedPagerank.java | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java index ea2e6d77..7f8c11bd 100644 --- a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -2,6 +2,8 @@ import edu.cmu.graphchi.*; import edu.cmu.graphchi.datablocks.FloatConverter; +import edu.cmu.graphchi.datablocks.FloatPair; +import edu.cmu.graphchi.datablocks.FloatPairConverter; import edu.cmu.graphchi.engine.GraphChiEngine; import edu.cmu.graphchi.engine.VertexInterval; import edu.cmu.graphchi.io.CompressedIO; @@ -24,11 +26,10 @@ * of in-neighbors pageranks. * @author akyrola */ -public class WeightedPagerank implements GraphChiProgram { +public class WeightedPagerank implements GraphChiProgram { + private static Logger logger = ChiLogger.getLogger("weighted_pagerank"); - private static Logger logger = ChiLogger.getLogger("pagerank"); - - public void update(ChiVertex vertex, GraphChiContext context) { + public void update(ChiVertex vertex, GraphChiContext context) { if (context.getIteration() == 0) { /* Initialize on first iteration */ vertex.setValue(1.0f); @@ -38,7 +39,8 @@ public void update(ChiVertex vertex, GraphChiContext context) { */ float sum = 0.f; for(int i=0; i vertex, GraphChiContext context) { /* Write my value (divided by my out-degree) to my out-edges so neighbors can read it. */ float outValue = vertex.getValue() / vertex.numOutEdges(); for(int i=0; i(graphName, numShards, new VertexProcessor() { + return new FastSharder(graphName, numShards, new VertexProcessor() { public Float receiveVertexValue(int vertexId, String token) { return (token == null ? 0.0f : Float.parseFloat(token)); } - }, new EdgeProcessor() { - public Float receiveEdge(int from, int to, String token) { - return (token == null ? 0.0f : Float.parseFloat(token)); + }, new EdgeProcessor() { + public FloatPair receiveEdge(int from, int to, String token) { + return (new FloatPair(Float.parseFloat(token), 0.0f)); } - }, new FloatConverter(), new FloatConverter()); + }, new FloatConverter(), new FloatPairConverter()); } /** @@ -105,8 +107,8 @@ public static void main(String[] args) throws Exception { } /* Run GraphChi */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - engine.setEdataConverter(new FloatConverter()); + GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); + engine.setEdataConverter(new FloatPairConverter()); engine.setVertexDataConverter(new FloatConverter()); engine.setModifiesInedges(false); // Important optimization From d1b75cdf9cc36e56dc11837d142f8f35ad9b25ff Mon Sep 17 00:00:00 2001 From: jerryye Date: Wed, 16 Apr 2014 21:04:21 -0700 Subject: [PATCH 07/29] Changed how updated edge weights are saved. --- src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java index 7f8c11bd..6befe1aa 100644 --- a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -48,7 +48,9 @@ public void update(ChiVertex vertex, GraphChiContext context) /* Write my value (divided by my out-degree) to my out-edges so neighbors can read it. */ float outValue = vertex.getValue() / vertex.numOutEdges(); for(int i=0; i Date: Thu, 17 Apr 2014 00:44:52 -0700 Subject: [PATCH 08/29] Implemented computation of pagerank with edge weights. --- .gitignore | 5 ++++- .../edu/cmu/graphchi/apps/WeightedPagerank.java | 15 ++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 0a8d3539..d0a07d50 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,7 @@ # Package Files # *.jar *.war -*.ear \ No newline at end of file +*.ear +.idea +target +walkmanager.log diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java index 6befe1aa..0ea6a5af 100644 --- a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -39,17 +39,22 @@ public void update(ChiVertex vertex, GraphChiContext context) */ float sum = 0.f; for(int i=0; i(graphName, numShards, new VertexProcessor() { public Float receiveVertexValue(int vertexId, String token) { - return (token == null ? 0.0f : Float.parseFloat(token)); + return (token == null ? 0.f : Float.parseFloat(token)); } }, new EdgeProcessor() { public FloatPair receiveEdge(int from, int to, String token) { - return (new FloatPair(Float.parseFloat(token), 0.0f)); + return (new FloatPair(Float.parseFloat(token), 0.f)); } }, new FloatConverter(), new FloatPairConverter()); } From a98397f1ed16a64ba74f2c152ee59488d508f6f3 Mon Sep 17 00:00:00 2001 From: jerryye Date: Thu, 17 Apr 2014 01:30:04 -0700 Subject: [PATCH 09/29] added PigWeightedPagerank --- .../cmu/graphchi/apps/WeightedPagerank.java | 2 +- .../apps/pig/PigWeightedPagerank.java | 166 ++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java index 0ea6a5af..f7940bd7 100644 --- a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -85,7 +85,7 @@ public Float receiveVertexValue(int vertexId, String token) { } }, new EdgeProcessor() { public FloatPair receiveEdge(int from, int to, String token) { - return (new FloatPair(Float.parseFloat(token), 0.f)); + return new FloatPair(Float.parseFloat(token), 0.f); } }, new FloatConverter(), new FloatPairConverter()); } diff --git a/src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java new file mode 100644 index 00000000..a153808e --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java @@ -0,0 +1,166 @@ +package edu.cmu.graphchi.apps.pig; + +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.GraphChiProgram; +import edu.cmu.graphchi.datablocks.FloatConverter; +import edu.cmu.graphchi.datablocks.FloatPair; +import edu.cmu.graphchi.datablocks.FloatPairConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.hadoop.PigGraphChiBase; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexProcessor; +import edu.cmu.graphchi.vertexdata.VertexAggregator; +import edu.cmu.graphchi.vertexdata.VertexIdValue; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; + +import java.io.IOException; +import java.util.Iterator; +import java.util.logging.Logger; + +/** + * Example application: PageRank (http://en.wikipedia.org/wiki/Pagerank) + * Iteratively computes a pagerank for each vertex by averaging the pageranks + * of in-neighbors pageranks. + * + * This version can be used with Pig in a Hadoop cluster. + * + * Example PIG script for running this: + * + *
+ *     REGISTER graphchi-java-0.2-jar-with-dependencies.jar;
+ *
+ *     pagerank = LOAD 'graphs/soc-LiveJournal1.txt' USING edu.cmu.graphchi.demo.pig.PigPagerank as (vertex:int, rank:float);
+ *
+ *     STORE pagerank INTO 'pagerank-livejournal';
+ * 
+ * + * (To get the livejournal graph, visit: http://snap.stanford.edu/data/soc-LiveJournal1.html) + * + * @see edu.cmu.graphchi.hadoop.PigGraphChiBase + * @author Aapo Kyrola, akyrola@cs.cmu.edu + */ +public class PigWeightedPagerank extends PigGraphChiBase implements GraphChiProgram { + + private static Logger logger = Logger.getLogger("weighted_pagerank"); + + public void update(ChiVertex vertex, GraphChiContext context) { + if (context.getIteration() == 0) { + /* Initialize on first iteration */ + vertex.setValue(1.0f); + } else { + /* On other iterations, set my value to be the weighted + average of my in-coming neighbors pageranks. + */ + float sum = 0.f; + for(int i=0; i> vertexIterator; + + + @Override + /** + * Pig column names + */ + protected String getSchemaString() { + return "(vertex:int, weight:float)"; + } + + @Override + protected int getNumShards() { + return 12; // Unfortunately, currently hard-coded. + } + + @Override + /** + * Runs the GraphChi program + */ + protected void runGraphChi() throws Exception { + /* Run GraphChi */ + GraphChiEngine engine = new GraphChiEngine(getGraphName(), getNumShards()); + engine.setEdataConverter(new FloatPairConverter()); + engine.setVertexDataConverter(new FloatConverter()); + engine.setModifiesInedges(false); // Important optimization + + engine.run(this, 4); + + logger.info("Ready."); + + /* Create iterator for the vertex values */ + this.vertexIterator = VertexAggregator.vertexIterator(engine.numVertices(), getGraphName(), new FloatConverter(), + engine.getVertexIdTranslate()); + } + + @Override + /** + * Constructs "sharder", which takes an edge list and + * converts it to internal binary representation of GraphChi. + */ + protected FastSharder createSharder(String graphName, int numShards) throws IOException { + return new FastSharder(graphName, numShards, new VertexProcessor() { + public Float receiveVertexValue(int vertexId, String token) { + return (token == null ? 0.0f : Float.parseFloat(token)); + } + }, new EdgeProcessor() { + public FloatPair receiveEdge(int from, int to, String token) { + return new FloatPair(Float.parseFloat(token), 0.f); + } + }, new FloatConverter(), new FloatPairConverter()); + } + + @Override + /** + * Generates the output to the Pig script, tuple by tuple + */ + protected Tuple getNextResult(TupleFactory tupleFactory) throws ExecException { + if (vertexIterator.hasNext()) { + Tuple t = tupleFactory.newTuple(2); + VertexIdValue val = vertexIterator.next(); + t.set(0, val.getVertexId()); + t.set(1, val.getValue()); + return t; + } else { + return null; + } + } +} From 15d2d54c83cadaacd1429afb1ede5ff87ecc6cd7 Mon Sep 17 00:00:00 2001 From: jerryye Date: Thu, 17 Apr 2014 01:30:56 -0700 Subject: [PATCH 10/29] Added newline. --- src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java index f7940bd7..62d1dd26 100644 --- a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -27,6 +27,7 @@ * @author akyrola */ public class WeightedPagerank implements GraphChiProgram { + private static Logger logger = ChiLogger.getLogger("weighted_pagerank"); public void update(ChiVertex vertex, GraphChiContext context) { From 905363ee466cd3be3bc122c4413046615a0e1dd6 Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Thu, 17 Apr 2014 09:00:15 -0700 Subject: [PATCH 11/29] Modified weighted pageranks to use the new immutable FloatPair --- .../java/edu/cmu/graphchi/apps/WeightedPagerank.java | 9 +++------ .../edu/cmu/graphchi/apps/pig/PigWeightedPagerank.java | 9 ++++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java index 62d1dd26..a44abe34 100644 --- a/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java +++ b/src/main/java/edu/cmu/graphchi/apps/WeightedPagerank.java @@ -21,10 +21,8 @@ import java.util.logging.Logger; /** - * Example application: PageRank (http://en.wikipedia.org/wiki/Pagerank) - * Iteratively computes a pagerank for each vertex by averaging the pageranks - * of in-neighbors pageranks. - * @author akyrola + * Weighted Pagerank. + * Contributed by Jerry Ye, 2014. */ public class WeightedPagerank implements GraphChiProgram { @@ -55,8 +53,7 @@ public void update(ChiVertex vertex, GraphChiContext context) for(int i=0; iPig in a Hadoop cluster. * @@ -42,7 +42,7 @@ * (To get the livejournal graph, visit: http://snap.stanford.edu/data/soc-LiveJournal1.html) * * @see edu.cmu.graphchi.hadoop.PigGraphChiBase - * @author Aapo Kyrola, akyrola@cs.cmu.edu + * @author Jerry Ye */ public class PigWeightedPagerank extends PigGraphChiBase implements GraphChiProgram { @@ -73,8 +73,7 @@ public void update(ChiVertex vertex, GraphChiContext context) for(int i=0; i Date: Fri, 18 Apr 2014 15:33:40 -0700 Subject: [PATCH 12/29] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4490ef1b..286e2dd1 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,13 @@ Version 0.2 ## News * Performance has been improved by parallelizing shard loading better (Oct 22, 2013) -* GraphChi was moved to GitHub from Google Code (July 24). Please report/fix any broken links. * GraphChi's Java version has a new cool random walk simulation engine: https://github.com/GraphChi/graphchi-java/wiki/Personalized-Pagerank-with-DrunkardMob +### Survey +If you use GraphChi, please fill the form to tell about your experience (it is short!): +https://docs.google.com/forms/d/1E4jjAqQiW76hKAjXpjL1SbkBauC3_V5BfCXQU6yz-I4/edit # Introduction From 4d1a54cc8cf3bf82e3b23e3e2ed88f514a6c3206 Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Mon, 12 May 2014 19:06:41 -0400 Subject: [PATCH 13/29] removed survey link --- README.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index 286e2dd1..823fdb37 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,7 @@ Version 0.2 * Performance has been improved by parallelizing shard loading better (Oct 22, 2013) * GraphChi's Java version has a new cool random walk simulation engine: https://github.com/GraphChi/graphchi-java/wiki/Personalized-Pagerank-with-DrunkardMob - -### Survey - -If you use GraphChi, please fill the form to tell about your experience (it is short!): -https://docs.google.com/forms/d/1E4jjAqQiW76hKAjXpjL1SbkBauC3_V5BfCXQU6yz-I4/edit - - + # Introduction Project for developing the Java version of GraphChi ( http://www.graphchi.org ), the disk-based graph computation engine. To learn more about GraphChi, visit the C++ version's project page: https://github.com/GraphChi/graphchi-cpp From 3412f2749a00661d4d0a12baa269a29eef578465 Mon Sep 17 00:00:00 2001 From: slowknight Date: Fri, 15 Aug 2014 15:59:16 -0700 Subject: [PATCH 14/29] Added a kcore decomposition app (KCoreDecomposer.java) and a indirected-2-directed graph converter (GraphTransformer.java). --- .../graphchi/apps/kcore/GraphTransformer.java | 152 ++++++++++ .../graphchi/apps/kcore/KCoreDecomposer.java | 270 ++++++++++++++++++ 2 files changed, 422 insertions(+) create mode 100644 src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java create mode 100644 src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java diff --git a/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java b/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java new file mode 100644 index 00000000..16e5b400 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java @@ -0,0 +1,152 @@ +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.logging.Logger; + +import edu.cmu.graphchi.ChiFilenames; +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.GraphChiProgram; +import edu.cmu.graphchi.datablocks.IntConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.io.CompressedIO; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexProcessor; + +/** + * Converts an indirected input graph into a directed by checking that each edge has a complimentary edge + * in the opposite direction, and adding those complimentary edges when applicable. + * + * Note: You may change output and input directory path based on your needs. + * + * @author Wissam Khaouid, wissamk@uvic.ca, 2014 + */ + +public class GraphTransformer implements GraphChiProgram { + + protected static int nEdgesAdded = 0; + + protected static BufferedWriter bw; + + private static Logger logger = ChiLogger.getLogger("GraphConverter"); + + public static void startWriting(File file, boolean append) throws IOException { + FileWriter fw = new FileWriter(file, append); + bw = new BufferedWriter(fw); + } + + public static void stopWriting() throws IOException { + bw.close(); + } + + public void update(ChiVertex vertex, GraphChiContext context) { + + ArrayList outNeighbors = new ArrayList(); + + for(int i = 0; i < vertex.numOutEdges(); i++) { + outNeighbors.add(vertex.outEdge(i).getVertexId()); + } + + for(int i = 0; i < vertex.numInEdges(); i++) { + if( !outNeighbors.contains(vertex.inEdge(i).getVertexId()) ) { + try { + bw.write("\n" + vertex.getId() + "\t" + vertex.inEdge(i).getVertexId()); + nEdgesAdded ++; + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + } + + public void beginIteration(GraphChiContext ctx) {} + + public void endIteration(GraphChiContext ctx) {} + + public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + protected static FastSharder createSharder(String graphName, int numShards) throws IOException { + return new FastSharder(graphName, numShards, new VertexProcessor() { + public Integer receiveVertexValue(int vertexId, String token) { + return 0; + } + }, new EdgeProcessor() { + public Integer receiveEdge(int from, int to, String token) { + return 0; + } + }, new IntConverter(), new IntConverter()); + } + + public static void main(String[] args) throws IOException { + + /** + * + * + * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=8 GraphTransformer filename nbrOfShards filetype memoryBudget + */ + + String inputDirectory = "./datasets/", + outputDirectory = "./output/"; + + String fileName = args[0]; + int nShards = Integer.parseInt(args[1]); + String fileType = args[2]; + int memBudget = (args.length >= 4 ? Integer.parseInt(args[3]) : null); + + CompressedIO.disableCompression(); + + String inputFilePath = inputDirectory + fileName; + + /* Making shards */ + FastSharder sharder = createSharder(inputFilePath, nShards); + if (inputFilePath.equals("pipein")) { // Allow piping graph in + sharder.shard(System.in, fileType); + } else { + if (!new File(ChiFilenames.getFilenameIntervals(inputFilePath, nShards)).exists()) { + sharder.shard(new FileInputStream(new File(inputFilePath)), fileType); + } else { + logger.info("Found shards -- no need to preprocess"); + } + } + + /* Complementary edges will be appended to the input graph file throughout execution */ + startWriting(new File(inputFilePath), true); + + /* Running GraphChi */ + GraphChiEngine engine = new GraphChiEngine(inputFilePath, nShards); + engine.setMemoryBudgetMb(memBudget); + engine.setSkipZeroDegreeVertices(true); + engine.setEnableScheduler(true); + engine.setEdataConverter(new IntConverter()); + engine.setVertexDataConverter(new IntConverter()); + + engine.run(new GraphTransformer(), 1); + + stopWriting(); + + /* Write report file */ + startWriting(new File(outputDirectory + "gtransformer-report-" + fileName), false); + bw.write("Total edges added: " + nEdgesAdded + "\n"); + stopWriting(); + + logger.info("Success!"); + + } + +} + + + diff --git a/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java b/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java new file mode 100644 index 00000000..248ff107 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java @@ -0,0 +1,270 @@ +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.logging.Logger; + +import edu.cmu.graphchi.ChiFilenames; +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.GraphChiProgram; +import edu.cmu.graphchi.datablocks.IntConverter; +import edu.cmu.graphchi.engine.GraphChiEngine; +import edu.cmu.graphchi.engine.VertexInterval; +import edu.cmu.graphchi.io.CompressedIO; +import edu.cmu.graphchi.preprocessing.EdgeProcessor; +import edu.cmu.graphchi.preprocessing.FastSharder; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; +import edu.cmu.graphchi.preprocessing.VertexProcessor; +import edu.cmu.graphchi.util.IdInt; +import edu.cmu.graphchi.util.Toplist; + +/** K-core decomposition algorithm + * + * Outputs: a file containing key-value pairs: vertexId, coreness + * + * How does it work ? + * 1 - Initializes vertex values to their degrees then those values are communicated to neighbors. + * 2 - for each vertex v, an upper-bound is computed on its coreness based on the values received from neighbors. + * 3 - if the upper-bound is better than its current value, v updates its value with the upper-bound. + * 4 - Steps 2 and 3 are repeated until no more value updates are occurring. + * + * For correct results, run your input graph through GraphTransformer first. + * Also, make sure to delete the preprocessed shard files created by GraphTransformer prior to running KCoreDecomposer. + * + * KCoreDecomposer is inspired from the algorithm presented in the following paper: + * Distributed K-Core Decomposition + * Alberto Montresor, Francesco De Pellegrini, Daniele Miorandi + * http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6189336 + * + * Note: You may change output and input directory path based on your needs. + * + * @author Wissam Khaouid, wissamk@uvic.ca, 2014 + */ + +public class KCoreDecomposer implements GraphChiProgram { + + public static final int INFINITY = 999999999; + + protected int vertexValuesUpdated; + protected static int nVertexes = 0; + + private static int nIterations = 0; + protected static BufferedWriter bw; + + private static Logger logger = ChiLogger.getLogger("kCoreDecomposition"); + + public static void startWriting(File file, boolean append) throws IOException { + FileWriter fw = new FileWriter(file, append); + bw = new BufferedWriter(fw); + } + + public static void stopWriting() throws IOException { + bw.close(); + } + + public void update(ChiVertex vertex, GraphChiContext context) { + + int iteration = context.getIteration(), + numOutEdges = vertex.numOutEdges(); + + if( iteration == 0 ) { + vertex.setValue(numOutEdges); + broadcastValue(vertex, numOutEdges); + nVertexes ++; + vertexValuesUpdated ++; + } + + else { + + int topDrawer = vertex.getValue() + 1, + topDrawerCount = 0, + localEstimate = 0; + + SortedMap inEdgeValueCounts = Collections.synchronizedSortedMap(new TreeMap(Collections.reverseOrder())); + + for(int i = 0; i <= vertex.numOutEdges(); i++) { + inEdgeValueCounts.put(i, 0); + } + + for(int i = 0; i < vertex.numInEdges(); i++) { + int inEdgeValue = vertex.inEdge(i).getValue(); + if( inEdgeValue >= topDrawer ) { + topDrawerCount ++; + } + else { + + try { + int currentValue = inEdgeValueCounts.get(inEdgeValue); + inEdgeValueCounts.put(inEdgeValue, currentValue + 1); + } + catch(Exception e) { + e.printStackTrace(); + System.exit(0); + } + + } + } + + inEdgeValueCounts.put(topDrawer, topDrawerCount); + + localEstimate = computeLeastValue(inEdgeValueCounts); + + if( localEstimate < vertex.getValue() ) { + vertex.setValue(localEstimate); + broadcastValue(vertex, localEstimate); + vertexValuesUpdated ++; + } + } + + context.getScheduler().addTask(vertex.getId()); + + } + + /** + * Computes the greatest x among a list of values, such that at least x values are greater than x + * For now, the array is instantiated and filled up elsewhere + */ + public int computeLeastValue(SortedMap map) { + int cumulCount = 0; + int key, count; + for(Map.Entry entry : map.entrySet()) { + key = entry.getKey(); + count = entry.getValue(); + cumulCount += count; + if(cumulCount >= key) { + return key; + } + } + return 1; + } + + /** + * Broadcasts a value to the neighbors by writing it to the out-edges + */ + + public void broadcastValue(ChiVertex vertex, int value) { + for(int i = 0; i < vertex.numOutEdges(); i++) { + vertex.outEdge(i).setValue(value); + } + } + + /** + * Invoked with the start of a new iteration + */ + public void beginIteration(GraphChiContext ctx) { + vertexValuesUpdated = 0; + } + + /** + * Invoked at the end of every iteration + */ + public void endIteration(GraphChiContext ctx) { + System.out.println(vertexValuesUpdated + " updates."); + System.out.println("iteration " + ctx.getIteration() + " ends."); + + nIterations ++; + if( vertexValuesUpdated == 0 ) { + System.out.println("no updates in this round. No more rounds .. KCore-montresor terminates!"); + ctx.getScheduler().removeAllTasks(); + } + } + + public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {} + + protected static FastSharder createSharder(String graphName, int numShards) throws IOException { + return new FastSharder(graphName, numShards, new VertexProcessor() { + public Integer receiveVertexValue(int vertexId, String token) { + return 0; + } + }, new EdgeProcessor() { + public Integer receiveEdge(int from, int to, String token) { + return 0; + } + }, new IntConverter(), new IntConverter()); + } + + public static void main(String[] args) throws IOException { + + /** Run from command line (Example) + * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=4 KCoreDecomposition filename nbrOfShards filetype memoryBudget + * + * Assuming GraphChi jar files are saved in ./gchi-libs/ + */ + + String inputDirectory = "./datasets/", + outputDirectory = "./output/"; + + String fileName = args[0]; + int nShards = Integer.parseInt(args[1]); + String fileType = args[2]; + int memBudget = (args.length >= 4 ? Integer.parseInt(args[3]) : null); + + CompressedIO.disableCompression(); + + String inputFilePath = inputDirectory + fileName; + + /* Preprocessing graph : Making shards */ + + FastSharder sharder = createSharder(inputFilePath, nShards); + if (inputFilePath.equals("pipein")) { // Allow piping graph in + sharder.shard(System.in, fileType); + } else { + if (!new File(ChiFilenames.getFilenameIntervals(inputFilePath, nShards)).exists()) { + sharder.shard(new FileInputStream(new File(inputFilePath)), fileType); + } else { + logger.info("Found shards -- no need to preprocess"); + } + } + + /* Running GraphChi */ + + GraphChiEngine engine = new GraphChiEngine(inputFilePath, nShards); + engine.setMemoryBudgetMb(memBudget); + engine.setSkipZeroDegreeVertices(true); + engine.setEnableScheduler(true); + engine.setEdataConverter(new IntConverter()); + engine.setVertexDataConverter(new IntConverter()); + + engine.run(new KCoreDecomposer(), INFINITY); + + logger.info("Ready."); + + + /* Outputting Core Values */ + + startWriting(new File(outputDirectory + "out-cores-" + fileName), false); + bw.write(nVertexes + "\n"); + + VertexIdTranslate trans = engine.getVertexIdTranslate(); + TreeSet topToBottom = Toplist.topListInt(inputFilePath, engine.numVertices(), engine.numVertices()); + + for(IdInt walker : topToBottom) { + float coreValue = walker.getValue(); + bw.write(trans.backward(walker.getVertexId()) + ", " + String.valueOf((int)coreValue) + "\n"); + } + + stopWriting(); + + System.out.println("Vertexes Processed: " + engine.numVertices()); + System.out.println("Edges Processed: " + engine.numEdges()) ; + + System.out.println("nIterations: " + nIterations); + System.out.println("Success!"); + + } + +} From 7f4b37cc0db3f9b35366bcbb92589fc64fb1f4a8 Mon Sep 17 00:00:00 2001 From: Aapo Kyrola Date: Mon, 18 Aug 2014 23:48:39 +0100 Subject: [PATCH 15/29] minor style issues --- .../graphchi/apps/kcore/GraphTransformer.java | 120 ++++--- .../graphchi/apps/kcore/KCoreDecomposer.java | 314 +++++++++--------- 2 files changed, 215 insertions(+), 219 deletions(-) diff --git a/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java b/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java index 16e5b400..ca7fbee5 100644 --- a/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java +++ b/src/main/java/edu/cmu/graphchi/apps/kcore/GraphTransformer.java @@ -1,3 +1,5 @@ +package edu.cmu.graphchi.apps.kcore; + import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; @@ -22,62 +24,60 @@ /** * Converts an indirected input graph into a directed by checking that each edge has a complimentary edge * in the opposite direction, and adding those complimentary edges when applicable. - * + * * Note: You may change output and input directory path based on your needs. - * + * * @author Wissam Khaouid, wissamk@uvic.ca, 2014 */ public class GraphTransformer implements GraphChiProgram { - - protected static int nEdgesAdded = 0; - - protected static BufferedWriter bw; - - private static Logger logger = ChiLogger.getLogger("GraphConverter"); - - public static void startWriting(File file, boolean append) throws IOException { - FileWriter fw = new FileWriter(file, append); - bw = new BufferedWriter(fw); - } - - public static void stopWriting() throws IOException { - bw.close(); - } - - public void update(ChiVertex vertex, GraphChiContext context) { - - ArrayList outNeighbors = new ArrayList(); - - for(int i = 0; i < vertex.numOutEdges(); i++) { - outNeighbors.add(vertex.outEdge(i).getVertexId()); - } - - for(int i = 0; i < vertex.numInEdges(); i++) { - if( !outNeighbors.contains(vertex.inEdge(i).getVertexId()) ) { - try { - bw.write("\n" + vertex.getId() + "\t" + vertex.inEdge(i).getVertexId()); - nEdgesAdded ++; - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - } - - public void beginIteration(GraphChiContext ctx) {} - - public void endIteration(GraphChiContext ctx) {} - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} + + protected static int nEdgesAdded = 0; + + protected static BufferedWriter bw; + + private static Logger logger = ChiLogger.getLogger("GraphConverter"); + + public static void startWriting(File file, boolean append) throws IOException { + FileWriter fw = new FileWriter(file, append); + bw = new BufferedWriter(fw); + } + + public static void stopWriting() throws IOException { + bw.close(); + } + + public void update(ChiVertex vertex, GraphChiContext context) { + ArrayList outNeighbors = new ArrayList(); + + for(int i = 0; i < vertex.numOutEdges(); i++) { + outNeighbors.add(vertex.outEdge(i).getVertexId()); + } + + for(int i = 0; i < vertex.numInEdges(); i++) { + if (!outNeighbors.contains(vertex.inEdge(i).getVertexId()) ) { + try { + bw.write("\n" + vertex.getId() + "\t" + vertex.inEdge(i).getVertexId()); + nEdgesAdded ++; + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + public void beginIteration(GraphChiContext ctx) {} + + public void endIteration(GraphChiContext ctx) {} + + public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} public void endInterval(GraphChiContext ctx, VertexInterval interval) {} public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {} public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {} - + protected static FastSharder createSharder(String graphName, int numShards) throws IOException { return new FastSharder(graphName, numShards, new VertexProcessor() { public Integer receiveVertexValue(int vertexId, String token) { @@ -89,25 +89,23 @@ public Integer receiveEdge(int from, int to, String token) { } }, new IntConverter(), new IntConverter()); } - + public static void main(String[] args) throws IOException { - - /** - * - * - * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=8 GraphTransformer filename nbrOfShards filetype memoryBudget - */ - - String inputDirectory = "./datasets/", - outputDirectory = "./output/"; - - String fileName = args[0]; + + /** + * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=8 edu.cmu.graphchi.apps.kcore.GraphTransformer filename nbrOfShards filetype memoryBudget + */ + + String inputDirectory = "./datasets/"; + String outputDirectory = "./output/"; + + String fileName = args[0]; int nShards = Integer.parseInt(args[1]); String fileType = args[2]; int memBudget = (args.length >= 4 ? Integer.parseInt(args[3]) : null); - + CompressedIO.disableCompression(); - + String inputFilePath = inputDirectory + fileName; /* Making shards */ @@ -134,7 +132,7 @@ public static void main(String[] args) throws IOException { engine.setVertexDataConverter(new IntConverter()); engine.run(new GraphTransformer(), 1); - + stopWriting(); /* Write report file */ @@ -143,7 +141,7 @@ public static void main(String[] args) throws IOException { stopWriting(); logger.info("Success!"); - + } } diff --git a/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java b/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java index 248ff107..f6a313ec 100644 --- a/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java +++ b/src/main/java/edu/cmu/graphchi/apps/kcore/KCoreDecomposer.java @@ -1,3 +1,5 @@ +package edu.cmu.graphchi.apps.kcore; + import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; @@ -26,165 +28,162 @@ import edu.cmu.graphchi.util.IdInt; import edu.cmu.graphchi.util.Toplist; -/** K-core decomposition algorithm - * +/** + * K-core decomposition algorithm + * * Outputs: a file containing key-value pairs: vertexId, coreness - * + * * How does it work ? * 1 - Initializes vertex values to their degrees then those values are communicated to neighbors. * 2 - for each vertex v, an upper-bound is computed on its coreness based on the values received from neighbors. * 3 - if the upper-bound is better than its current value, v updates its value with the upper-bound. * 4 - Steps 2 and 3 are repeated until no more value updates are occurring. - * + * * For correct results, run your input graph through GraphTransformer first. * Also, make sure to delete the preprocessed shard files created by GraphTransformer prior to running KCoreDecomposer. - * + * * KCoreDecomposer is inspired from the algorithm presented in the following paper: * Distributed K-Core Decomposition * Alberto Montresor, Francesco De Pellegrini, Daniele Miorandi * http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6189336 - * + * * Note: You may change output and input directory path based on your needs. - * + * * @author Wissam Khaouid, wissamk@uvic.ca, 2014 */ public class KCoreDecomposer implements GraphChiProgram { - - public static final int INFINITY = 999999999; - - protected int vertexValuesUpdated; - protected static int nVertexes = 0; - - private static int nIterations = 0; - protected static BufferedWriter bw; - - private static Logger logger = ChiLogger.getLogger("kCoreDecomposition"); - - public static void startWriting(File file, boolean append) throws IOException { - FileWriter fw = new FileWriter(file, append); - bw = new BufferedWriter(fw); - } - - public static void stopWriting() throws IOException { - bw.close(); - } - - public void update(ChiVertex vertex, GraphChiContext context) { - - int iteration = context.getIteration(), - numOutEdges = vertex.numOutEdges(); - - if( iteration == 0 ) { - vertex.setValue(numOutEdges); - broadcastValue(vertex, numOutEdges); - nVertexes ++; - vertexValuesUpdated ++; - } - - else { - - int topDrawer = vertex.getValue() + 1, - topDrawerCount = 0, - localEstimate = 0; - - SortedMap inEdgeValueCounts = Collections.synchronizedSortedMap(new TreeMap(Collections.reverseOrder())); - - for(int i = 0; i <= vertex.numOutEdges(); i++) { - inEdgeValueCounts.put(i, 0); - } - - for(int i = 0; i < vertex.numInEdges(); i++) { - int inEdgeValue = vertex.inEdge(i).getValue(); - if( inEdgeValue >= topDrawer ) { - topDrawerCount ++; - } - else { - - try { - int currentValue = inEdgeValueCounts.get(inEdgeValue); - inEdgeValueCounts.put(inEdgeValue, currentValue + 1); - } - catch(Exception e) { - e.printStackTrace(); - System.exit(0); - } - - } - } - - inEdgeValueCounts.put(topDrawer, topDrawerCount); - - localEstimate = computeLeastValue(inEdgeValueCounts); - - if( localEstimate < vertex.getValue() ) { - vertex.setValue(localEstimate); - broadcastValue(vertex, localEstimate); - vertexValuesUpdated ++; - } - } - - context.getScheduler().addTask(vertex.getId()); - - } - - /** + + public static final int INFINITY = Integer.MAX_VALUE; + + protected int vertexValuesUpdated; + protected static int nVertexes = 0; + + private static int nIterations = 0; + protected static BufferedWriter bw; + + private static Logger logger = ChiLogger.getLogger("kCoreDecomposition"); + + public static void startWriting(File file, boolean append) throws IOException { + FileWriter fw = new FileWriter(file, append); + bw = new BufferedWriter(fw); + } + + public static void stopWriting() throws IOException { + bw.close(); + } + + public void update(ChiVertex vertex, GraphChiContext context) { + + int iteration = context.getIteration(); + int numOutEdges = vertex.numOutEdges(); + + if (iteration == 0) { + vertex.setValue(numOutEdges); + broadcastValue(vertex, numOutEdges); + nVertexes++; + vertexValuesUpdated++; + } else { + int topDrawer = vertex.getValue() + 1, + topDrawerCount = 0, + localEstimate = 0; + + SortedMap inEdgeValueCounts = + Collections.synchronizedSortedMap( + new TreeMap(Collections.reverseOrder())); + + for(int i = 0; i <= vertex.numOutEdges(); i++) { + inEdgeValueCounts.put(i, 0); + } + + for(int i = 0; i < vertex.numInEdges(); i++) { + int inEdgeValue = vertex.inEdge(i).getValue(); + if( inEdgeValue >= topDrawer ) { + topDrawerCount ++; + } else { + try { + int currentValue = inEdgeValueCounts.get(inEdgeValue); + inEdgeValueCounts.put(inEdgeValue, currentValue + 1); + } + catch(Exception e) { + e.printStackTrace(); + System.exit(0); + } + + } + } + + inEdgeValueCounts.put(topDrawer, topDrawerCount); + localEstimate = computeLeastValue(inEdgeValueCounts); + + if( localEstimate < vertex.getValue() ) { + vertex.setValue(localEstimate); + broadcastValue(vertex, localEstimate); + vertexValuesUpdated ++; + } + } + + context.getScheduler().addTask(vertex.getId()); + + } + + /** * Computes the greatest x among a list of values, such that at least x values are greater than x * For now, the array is instantiated and filled up elsewhere */ public int computeLeastValue(SortedMap map) { - int cumulCount = 0; - int key, count; - for(Map.Entry entry : map.entrySet()) { - key = entry.getKey(); - count = entry.getValue(); - cumulCount += count; - if(cumulCount >= key) { - return key; - } - } - return 1; + int cumulCount = 0; + int key, count; + for(Map.Entry entry : map.entrySet()) { + key = entry.getKey(); + count = entry.getValue(); + cumulCount += count; + if(cumulCount >= key) { + return key; + } + } + return 1; } - + /** * Broadcasts a value to the neighbors by writing it to the out-edges */ - - public void broadcastValue(ChiVertex vertex, int value) { - for(int i = 0; i < vertex.numOutEdges(); i++) { - vertex.outEdge(i).setValue(value); - } - } - - /** - * Invoked with the start of a new iteration - */ - public void beginIteration(GraphChiContext ctx) { - vertexValuesUpdated = 0; - } - - /** - * Invoked at the end of every iteration - */ - public void endIteration(GraphChiContext ctx) { - System.out.println(vertexValuesUpdated + " updates."); - System.out.println("iteration " + ctx.getIteration() + " ends."); - - nIterations ++; - if( vertexValuesUpdated == 0 ) { - System.out.println("no updates in this round. No more rounds .. KCore-montresor terminates!"); - ctx.getScheduler().removeAllTasks(); - } - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} + + public void broadcastValue(ChiVertex vertex, int value) { + for(int i = 0; i < vertex.numOutEdges(); i++) { + vertex.outEdge(i).setValue(value); + } + } + + /** + * Invoked with the start of a new iteration + */ + public void beginIteration(GraphChiContext ctx) { + vertexValuesUpdated = 0; + } + + /** + * Invoked at the end of every iteration + */ + public void endIteration(GraphChiContext ctx) { + System.out.println(vertexValuesUpdated + " updates."); + System.out.println("iteration " + ctx.getIteration() + " ends."); + + nIterations ++; + if( vertexValuesUpdated == 0 ) { + System.out.println("no updates in this round. No more rounds .. KCore-montresor terminates!"); + ctx.getScheduler().removeAllTasks(); + } + } + + public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} public void endInterval(GraphChiContext ctx, VertexInterval interval) {} public void beginSubInterval(GraphChiContext ctx, VertexInterval interval) {} public void endSubInterval(GraphChiContext ctx, VertexInterval interval) {} - + protected static FastSharder createSharder(String graphName, int numShards) throws IOException { return new FastSharder(graphName, numShards, new VertexProcessor() { public Integer receiveVertexValue(int vertexId, String token) { @@ -196,29 +195,29 @@ public Integer receiveEdge(int from, int to, String token) { } }, new IntConverter(), new IntConverter()); } - + public static void main(String[] args) throws IOException { - - /** Run from command line (Example) - * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=4 KCoreDecomposition filename nbrOfShards filetype memoryBudget - * - * Assuming GraphChi jar files are saved in ./gchi-libs/ - */ - - String inputDirectory = "./datasets/", - outputDirectory = "./output/"; - - String fileName = args[0]; + + /** Run from command line (Example) + * java -Xmx2048m -cp bin:gchi-libs/* -Dnum_threads=4 edu.cmu.graphchi.apps.kcore.KCoreDecomposition filename nbrOfShards filetype memoryBudget + * + * Assuming GraphChi jar files are saved in ./gchi-libs/ + */ + + String inputDirectory = "./datasets/"; + String outputDirectory = "./output/"; + + String fileName = args[0]; int nShards = Integer.parseInt(args[1]); String fileType = args[2]; int memBudget = (args.length >= 4 ? Integer.parseInt(args[3]) : null); - + CompressedIO.disableCompression(); - + String inputFilePath = inputDirectory + fileName; /* Preprocessing graph : Making shards */ - + FastSharder sharder = createSharder(inputFilePath, nShards); if (inputFilePath.equals("pipein")) { // Allow piping graph in sharder.shard(System.in, fileType); @@ -231,40 +230,39 @@ public static void main(String[] args) throws IOException { } /* Running GraphChi */ - - GraphChiEngine engine = new GraphChiEngine(inputFilePath, nShards); + GraphChiEngine engine = + new GraphChiEngine(inputFilePath, nShards); engine.setMemoryBudgetMb(memBudget); engine.setSkipZeroDegreeVertices(true); engine.setEnableScheduler(true); engine.setEdataConverter(new IntConverter()); engine.setVertexDataConverter(new IntConverter()); - + engine.run(new KCoreDecomposer(), INFINITY); logger.info("Ready."); - - + /* Outputting Core Values */ - startWriting(new File(outputDirectory + "out-cores-" + fileName), false); bw.write(nVertexes + "\n"); - + VertexIdTranslate trans = engine.getVertexIdTranslate(); - TreeSet topToBottom = Toplist.topListInt(inputFilePath, engine.numVertices(), engine.numVertices()); - + TreeSet topToBottom = Toplist.topListInt(inputFilePath, + engine.numVertices(), engine.numVertices()); + for(IdInt walker : topToBottom) { - float coreValue = walker.getValue(); - bw.write(trans.backward(walker.getVertexId()) + ", " + String.valueOf((int)coreValue) + "\n"); + float coreValue = walker.getValue(); + bw.write(trans.backward(walker.getVertexId()) + ", " + String.valueOf((int)coreValue) + "\n"); } - + stopWriting(); - + System.out.println("Vertexes Processed: " + engine.numVertices()); System.out.println("Edges Processed: " + engine.numEdges()) ; - + System.out.println("nIterations: " + nIterations); System.out.println("Success!"); - + } } From 6029073f85ecd8530aefd4c985e29b34b502f497 Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Tue, 26 Aug 2014 14:13:43 -0700 Subject: [PATCH 16/29] Added my changes that allow for long walks instead of just int walks --- .../randomwalks/PersonalizedPageRank.java | 17 +- .../recommendations/MovieRecommender.java | 23 +- .../apps/recommendations/TwitterWTF.java | 18 +- .../edu/cmu/graphchi/walks/BucketsToSend.java | 13 + .../cmu/graphchi/walks/DrunkardContext.java | 24 - .../cmu/graphchi/walks/DrunkardDriver.java | 169 ++----- .../cmu/graphchi/walks/DrunkardFactory.java | 8 + .../edu/cmu/graphchi/walks/DrunkardJob.java | 23 +- .../cmu/graphchi/walks/DrunkardMobEngine.java | 32 +- .../graphchi/walks/GrabbedBucketConsumer.java | 3 +- .../graphchi/walks/IntDrunkardContext.java | 29 ++ .../cmu/graphchi/walks/IntDrunkardDriver.java | 122 +++++ .../graphchi/walks/IntDrunkardFactory.java | 16 + .../graphchi/walks/IntLocalWalkBuffer.java | 36 ++ .../edu/cmu/graphchi/walks/IntWalkArray.java | 18 + .../cmu/graphchi/walks/IntWalkManager.java | 381 +++++++++++++++ .../cmu/graphchi/walks/LocalWalkBuffer.java | 62 +-- .../graphchi/walks/LongDrunkardContext.java | 29 ++ .../graphchi/walks/LongDrunkardDriver.java | 122 +++++ .../graphchi/walks/LongDrunkardFactory.java | 16 + .../graphchi/walks/LongLocalWalkBuffer.java | 36 ++ .../edu/cmu/graphchi/walks/LongWalkArray.java | 18 + .../cmu/graphchi/walks/LongWalkManager.java | 384 +++++++++++++++ .../edu/cmu/graphchi/walks/WalkArray.java | 5 + .../edu/cmu/graphchi/walks/WalkManager.java | 363 +------------- .../graphchi/walks/WalkManagerForPaths.java | 258 ---------- .../cmu/graphchi/walks/WalkPathAnalyzer.java | 102 ---- .../edu/cmu/graphchi/walks/WalkSnapshot.java | 5 +- .../graphchi/walks/WalkSnapshotForPaths.java | 14 - .../graphchi/walks/WalkUpdateFunction.java | 20 +- .../walks/deprecated/DrunkardMob.java | 179 ------- .../walks/deprecated/DrunkardMobForPaths.java | 187 -------- .../deprecated/DrunkardMobWithCompanion.java | 451 ------------------ .../distributions/DrunkardCompanion.java | 42 +- .../distributions/IntDrunkardCompanion.java | 42 ++ .../distributions/LongDrunkardCompanion.java | 42 ++ .../RemoteDrunkardCompanion.java | 3 +- .../walks/distributions/TwoKeyCompanion.java | 415 ++++++++++++++++ .../cmu/graphchi/walks/TestWalkManager.java | 211 ++++++-- .../walks/TestWalkManagerWithPaths.java | 121 ----- 40 files changed, 2082 insertions(+), 1977 deletions(-) create mode 100644 src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntDrunkardContext.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntDrunkardDriver.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntWalkArray.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntWalkManager.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongWalkArray.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongWalkManager.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/WalkArray.java delete mode 100644 src/main/java/edu/cmu/graphchi/walks/WalkManagerForPaths.java delete mode 100644 src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java delete mode 100644 src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java delete mode 100644 src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java delete mode 100644 src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java delete mode 100644 src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java delete mode 100644 test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java diff --git a/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java b/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java index e2a5aaf3..61be9745 100644 --- a/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java +++ b/src/main/java/edu/cmu/graphchi/apps/randomwalks/PersonalizedPageRank.java @@ -7,7 +7,13 @@ import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; +import edu.cmu.graphchi.walks.IntDrunkardContext; +import edu.cmu.graphchi.walks.IntDrunkardFactory; +import edu.cmu.graphchi.walks.IntWalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.WeightedHopper; +import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; import org.apache.commons.cli.*; @@ -39,7 +45,8 @@ public class PersonalizedPageRank implements WalkUpdateFunction(baseFilename, nShards); + this.drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards, + new IntDrunkardFactory()); this.companionUrl = companionUrl; this.firstSource = firstSource; @@ -55,7 +62,7 @@ private void execute(int numIters) throws Exception { */ RemoteDrunkardCompanion companion; if (companionUrl.equals("local")) { - companion = new DrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); + companion = new IntDrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); } else { companion = (RemoteDrunkardCompanion) Naming.lookup(companionUrl); } @@ -91,10 +98,12 @@ private void execute(int numIters) throws Exception { * WalkUpdateFunction interface implementations */ @Override - public void processWalksAtVertex(int[] walks, + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, - DrunkardContext drunkardContext, + DrunkardContext drunkardContext_, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; int numWalks = walks.length; int numOutEdges = vertex.numOutEdges(); diff --git a/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java b/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java index b2c40a81..00bcbaa4 100644 --- a/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java +++ b/src/main/java/edu/cmu/graphchi/apps/recommendations/MovieRecommender.java @@ -10,9 +10,13 @@ import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; +import edu.cmu.graphchi.walks.IntDrunkardContext; +import edu.cmu.graphchi.walks.IntDrunkardFactory; +import edu.cmu.graphchi.walks.IntWalkArray; +import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; import edu.cmu.graphchi.walks.WeightedHopper; -import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; +import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import org.apache.commons.cli.*; @@ -51,11 +55,11 @@ protected void execute() throws Exception { logger.info("Computed ALS, now random walks"); /* Initialize drunkardmob */ - DrunkardMobEngine drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards); + DrunkardMobEngine drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards, new IntDrunkardFactory()); DrunkardJob positiveJob = drunkardMobEngine.addJob("positive", EdgeDirection.IN_AND_OUT_EDGES, - new PositiveWalkUpdate(), new DrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); + new PositiveWalkUpdate(), new IntDrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); DrunkardJob negativeJob = drunkardMobEngine.addJob("negative", EdgeDirection.IN_AND_OUT_EDGES, - new NegativeWalkUpdate(), new DrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); + new NegativeWalkUpdate(), new IntDrunkardCompanion(2, Runtime.getRuntime().maxMemory() / 8)); drunkardMobEngine.setEdataConverter(new FloatConverter()); @@ -117,14 +121,15 @@ protected void execute() throws Exception { protected static class PositiveWalkUpdate implements WalkUpdateFunction { @Override - public void processWalksAtVertex(int[] walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { - hopToHighRatings(walks, vertex, drunkardContext, randomGenerator); + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + hopToHighRatings(walks, vertex, (IntDrunkardContext)drunkardContext, randomGenerator); } // Have some weight for <= 3 ratings to avoid divide by zeroes. private static final float weightedRating[] = {0.0f, 0.00001f, 0.00001f, 0.0001f, 100.0f, 1000.0f}; - protected static void hopToHighRatings(int[] walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { + protected static void hopToHighRatings(int[] walks, ChiVertex vertex, IntDrunkardContext drunkardContext, Random randomGenerator) { int[] hops = WeightedHopper.generateRandomHopsAliasMethod(randomGenerator, vertex, walks.length, EdgeDirection.IN_AND_OUT_EDGES, new WeightedHopper.EdgeWeightMap() { @@ -164,7 +169,9 @@ public int[] getNotTrackedVertices(ChiVertex vertex) { protected class NegativeWalkUpdate extends PositiveWalkUpdate { @Override - public void processWalksAtVertex(int[] walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator) { + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, DrunkardContext drunkardContext_, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; // Movie vertex - do same as the positive if (vertex.numInEdges() > 0 || drunkardContext.getIteration() > 0) { hopToHighRatings(walks, vertex, drunkardContext, randomGenerator); diff --git a/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java b/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java index a7496fd2..f33737e7 100644 --- a/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java +++ b/src/main/java/edu/cmu/graphchi/apps/recommendations/TwitterWTF.java @@ -11,8 +11,13 @@ import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; +import edu.cmu.graphchi.walks.IntDrunkardContext; +import edu.cmu.graphchi.walks.IntDrunkardFactory; +import edu.cmu.graphchi.walks.IntWalkArray; +import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; -import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; +import edu.cmu.graphchi.walks.WeightedHopper; +import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; import org.apache.commons.cli.*; @@ -65,7 +70,8 @@ public class TwitterWTF implements WalkUpdateFunction { public TwitterWTF(String companionUrl, String baseFilename, int nShards, int firstSource, int numSources, int walksPerSource) throws Exception{ this.baseFilename = baseFilename; - this.drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards); + this.drunkardMobEngine = new DrunkardMobEngine(baseFilename, nShards, + new IntDrunkardFactory()); this.numShards = nShards; this.companionUrl = companionUrl; @@ -82,7 +88,7 @@ private void execute(int numIters) throws Exception { */ final RemoteDrunkardCompanion companion; if (companionUrl.equals("local")) { - companion = new DrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); + companion = new IntDrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); } else { companion = (RemoteDrunkardCompanion) Naming.lookup(companionUrl); } @@ -206,10 +212,12 @@ private void computeRecs(RemoteDrunkardCompanion companion, int circleOfTrustSiz * WalkUpdateFunction interface implementations */ @Override - public void processWalksAtVertex(int[] walks, + public void processWalksAtVertex(WalkArray walkArray, ChiVertex vertex, - DrunkardContext drunkardContext, + DrunkardContext drunkardContext_, Random randomGenerator) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; int numWalks = walks.length; int numOutEdges = vertex.numOutEdges(); diff --git a/src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java b/src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java new file mode 100644 index 00000000..dfc37a18 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/BucketsToSend.java @@ -0,0 +1,13 @@ +package edu.cmu.graphchi.walks; + +public class BucketsToSend { + public final int firstVertex; + public final WalkArray walks; + public final int length; + + BucketsToSend(int firstVertex, WalkArray walks, int length) { + this.firstVertex = firstVertex; + this.walks = walks; + this.length = length; + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java index 70fc9553..ab460f4e 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardContext.java @@ -22,33 +22,9 @@ public interface DrunkardContext { int getIteration(); - /** - * Moves walk to next vertex - * @param walk walk identified - * @param destinationVertex vertex id to move hte walk to - * @param trackBit set to true if this walk should be tracked, otherwise false - */ - void forwardWalkTo(int walk, int destinationVertex, boolean trackBit); - - void resetWalk(int walk, boolean trackBit); - - /** - * Reads the track-bit of a walk identifier. - * @param walk - * @return - */ - boolean getTrackBit(int walk); - - /** - * Returns true if walk was started from the vertex - */ - boolean isWalkStartedFromVertex(int walk); - /** * Object for translating from internal to original vertex ids * @return */ VertexIdTranslate getVertexIdTranslate(); - - void resetAll(int[] walks); } diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java index a85fc6ef..cded89ed 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java @@ -18,12 +18,12 @@ * Class to encapsulate the graphchi program running the show. * Due to several optimizations, it is quite complicated! */ -public class DrunkardDriver implements GrabbedBucketConsumer { +public abstract class DrunkardDriver implements GrabbedBucketConsumer { private WalkSnapshot curWalkSnapshot; - private final DrunkardJob job; - private static Logger logger = ChiLogger.getLogger("drunkard-driver"); + protected final DrunkardJob job; + protected static Logger logger = ChiLogger.getLogger("drunkard-driver"); - private LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); + protected LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); private boolean finished = false; private Thread dumperThread; private AtomicLong pendingWalksToSubmit = new AtomicLong(0); @@ -39,72 +39,45 @@ public class DrunkardDriver implements GrabbedBuck // Setup thread for sending walks to the companion (i.e tracker) // Launch a thread to send to the companion - dumperThread = new Thread(new Runnable() { - public void run() { - int[] walks = new int[256 * 1024]; - int[] vertices = new int[256 * 1024]; - int idx = 0; - - while(!finished || bucketQueue.size() > 0) { - BucketsToSend bucket = null; - try { - bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - } - if (bucket != null) { - pendingWalksToSubmit.addAndGet(-bucket.length); - for(int i=0; i= walks.length) { - try { - job.getCompanion().processWalks(walks, vertices); - } catch (Exception err) { - err.printStackTrace(); - } - idx = 0; - } - - } - } - } + dumperThread = new Thread(createDumperThread()); + dumperThread.start(); + } + + protected abstract DumperThread createDumperThread(); - // Send rest + protected abstract class DumperThread implements Runnable { + + public void run() { + while(!finished || bucketQueue.size() > 0) { + BucketsToSend bucket = null; try { - int[] tmpWalks = new int[idx]; - int[] tmpVertices = new int[idx]; - System.arraycopy(walks, 0, tmpWalks, 0, idx); - System.arraycopy(vertices, 0, tmpVertices, 0, idx); - job.getCompanion().processWalks(tmpWalks, tmpVertices); - } catch (Exception err) { - err.printStackTrace(); + bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + } + if (bucket != null) { + pendingWalksToSubmit.addAndGet(-bucket.length); + for(int i=0; i vertex, final GraphChiContext context, - final LocalWalkBuffer localBuf) { - + protected abstract DrunkardContext createDrunkardContext(int vertexId, GraphChiContext context, + LocalWalkBuffer localBuf); + public void update(ChiVertex vertex, + final GraphChiContext context, final LocalWalkBuffer localBuf) { try { // Flow control while (pendingWalksToSubmit.get() > job.getWalkManager().getTotalWalks() / 40) { @@ -116,7 +89,7 @@ public void update(ChiVertex vertex, final GraphCh } boolean firstIteration = (context.getIteration() == 0); - int[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); + WalkArray walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); // Very dirty memory management curWalkSnapshot.clear(vertex.getId()); @@ -130,61 +103,12 @@ public void update(ChiVertex vertex, final GraphCh job.getCompanion().setAvoidList(mySourceIdx, callback.getNotTrackedVertices(vertex)); } } - if (walksAtMe == null || walksAtMe.length == 0) { - return; - } + if (walksAtMe == null || walksAtMe.size() == 0) return; Random randomGenerator = localBuf.random; - final boolean isSource = job.getWalkManager().isSource(vertex.getId()); - final int mySourceIndex = (isSource ? job.getWalkManager().getVertexSourceIdx(vertex.getId()) : -1); - - callback.processWalksAtVertex(walksAtMe, vertex, new DrunkardContext() { - @Override - public boolean isSource() { - return isSource; - } - - @Override - public int sourceIndex() { - return mySourceIndex; - } - - @Override - public int getIteration() { - return context.getIteration(); - } - - @Override - public void forwardWalkTo(int walk, int destinationVertex, boolean trackBit) { - localBuf.add(WalkManager.sourceIdx(walk), destinationVertex, trackBit); - } - - @Override - public void resetWalk(int walk, boolean trackBit) { - forwardWalkTo(walk, job.getWalkManager().getSourceVertex(WalkManager.sourceIdx(walk)), false); - } - - @Override - public boolean getTrackBit(int walk) { - return WalkManager.hop(walk); - } - - @Override - public boolean isWalkStartedFromVertex(int walk) { - return mySourceIndex == WalkManager.sourceIdx(walk); - } - - @Override - public VertexIdTranslate getVertexIdTranslate() { - return getVertexIdTranslate(); - } - - @Override - public void resetAll(int[] walks) { - for(int w : walks) resetWalk(w, false); - } - }, randomGenerator); + DrunkardContext drunkardContext = createDrunkardContext(vertex.getId(), context, localBuf); + callback.processWalksAtVertex(walksAtMe, vertex, drunkardContext, randomGenerator); } catch (RemoteException re) { throw new RuntimeException(re); } @@ -195,9 +119,6 @@ public void initWalks() throws RemoteException{ job.getCompanion().setSources(job.getWalkManager().getSources()); } - - - public void beginIteration(GraphChiContext ctx) { if (ctx.getIteration() == 0) { ctx.getScheduler().removeAllTasks(); @@ -262,8 +183,6 @@ public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { } } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) { /* Count walks */ long initializedWalks = job.getWalkManager().getTotalWalks(); @@ -279,7 +198,7 @@ public void beginInterval(GraphChiContext ctx, VertexInterval interval) { public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - public void consume(int firstVertexInBucket, int[] walkBucket, int len) { + public void consume(int firstVertexInBucket, WalkArray walkBucket, int len) { try { pendingWalksToSubmit.addAndGet(len); bucketQueue.put(new BucketsToSend(firstVertexInBucket, walkBucket, len)); @@ -287,18 +206,4 @@ public void consume(int firstVertexInBucket, int[] walkBucket, int len) { e.printStackTrace(); } } - - private static class BucketsToSend { - int firstVertex; - int[] walks; - int length; - - BucketsToSend(int firstVertex, int[] walks, int length) { - this.firstVertex = firstVertex; - this.walks = walks; - this.length = length; - } - } - } - diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java new file mode 100644 index 00000000..26e40da2 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardFactory.java @@ -0,0 +1,8 @@ +package edu.cmu.graphchi.walks; + +public interface DrunkardFactory { + public DrunkardDriver createDrunkardDriver(DrunkardJob job, + WalkUpdateFunction callback); + public WalkManager createWalkManager(int numVertices, int numSources); + public LocalWalkBuffer createLocalWalkBuffer(); +} diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java index e62ce8f8..13e62ced 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardJob.java @@ -11,16 +11,15 @@ public class DrunkardJob { private String name; private WalkManager walkManager; private RemoteDrunkardCompanion companion; - private int numVertices; + private DrunkardFactory factory; + protected int numVertices; - public DrunkardJob(String name, RemoteDrunkardCompanion companion, int numVertices) { + public DrunkardJob(String name, RemoteDrunkardCompanion companion, int numVertices, + DrunkardFactory factory) { this.name = name; this.numVertices = numVertices; this.companion = companion; - } - - protected WalkManager createWalkManager(int numSources) { - return new WalkManager(numVertices, numSources); + this.factory = factory; } /** @@ -30,13 +29,13 @@ protected WalkManager createWalkManager(int numSources) { * @param walksPerSource how many walks to start from each source */ public void configureSourceRangeInternalIds(int firstSourceId, int numSources, int walksPerSource) { - if (this.walkManager != null) { + if (walkManager != null) { throw new IllegalStateException("You can configure walks only once!"); } - this.walkManager = createWalkManager(numSources); + walkManager = factory.createWalkManager(numVertices, numSources); for(int i=firstSourceId; i < firstSourceId + numSources; i++) { - this.walkManager.addWalkBatch(i, walksPerSource); + walkManager.addWalkBatch(i, walksPerSource); } } @@ -46,13 +45,13 @@ public void configureSourceRangeInternalIds(int firstSourceId, int numSources, i * @param walksPerSource */ public void configureWalkSources(List walkSources, int walksPerSource) { - if (this.walkManager != null) { + if (walkManager != null) { throw new IllegalStateException("You can configure walks only once!"); } - this.walkManager = createWalkManager(walkSources.size()); + walkManager = factory.createWalkManager(numVertices, walkSources.size()); Collections.sort(walkSources); for(int src : walkSources) { - this.walkManager.addWalkBatch(src, walksPerSource); + walkManager.addWalkBatch(src, walksPerSource); } } diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java index b8f933f2..dbe80217 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardMobEngine.java @@ -20,15 +20,27 @@ */ public class DrunkardMobEngine { - protected GraphChiEngine engine; - protected List drivers; + private GraphChiEngine engine; + private List drivers; - protected static Logger logger = ChiLogger.getLogger("drunkardmob-engine"); + private static Logger logger = ChiLogger.getLogger("drunkardmob-engine"); + private DrunkardFactory factory; - - public DrunkardMobEngine(String baseFilename, int nShards) throws IOException { + /** + * Create the engine + * @param factory we allow walks to be represented either as ints or as longs (if more + * information needs to be stored, e.g. to retrieve path information from the walks). In order + * to avoid autoboxing, we do a little bit of fancy footwork here. The caller must pass in an + * IntDrunkardFactory or a LongDrunkardFactory, and then when processing WalkArrays and + * DrunkardContexts, they must be cast to IntWalkArrays or LongWalkArrays (and Contexts, and + * whatever else) in order to get the actual values out. This way we can keep the primitive + * typing while still sharing as much code as possible between the int and the long processing. + */ + public DrunkardMobEngine(String baseFilename, int nShards, + DrunkardFactory factory) throws IOException { createGraphChiEngine(baseFilename, nShards); this.drivers = new ArrayList(); + this.factory = factory; // Disable all edge directions by default engine.setDisableInedges(true); @@ -37,7 +49,7 @@ public DrunkardMobEngine(String baseFilename, int nShards) throws IOException { engine.setModifiesOutedges(false); } - protected void createGraphChiEngine(String baseFilename, int nShards) throws IOException { + private void createGraphChiEngine(String baseFilename, int nShards) throws IOException { this.engine = new GraphChiEngine(baseFilename, nShards); this.engine.setOnlyAdjacency(true); this.engine.setVertexDataConverter(null); @@ -88,7 +100,7 @@ public void setVertexDataConverter(BytesToValueConverter vertexD */ public DrunkardJob addJob(String jobName, EdgeDirection edgeDirection, WalkUpdateFunction callback, - RemoteDrunkardCompanion companion) throws IOException { + RemoteDrunkardCompanion companion) { /* Configure engine parameters */ switch(edgeDirection) { @@ -107,8 +119,8 @@ public DrunkardJob addJob(String jobName, EdgeDirection edgeDirection, /** * Create job object and the driver-object. */ - DrunkardJob job = new DrunkardJob(jobName, companion, engine.numVertices()); - drivers.add(new DrunkardDriver(job, callback)); + DrunkardJob job = new DrunkardJob(jobName, companion, engine.numVertices(), factory); + drivers.add(factory.createDrunkardDriver(job, callback)); return job; } @@ -169,7 +181,7 @@ public void update(ChiVertex vertex, GraphChiConte if (context.getThreadLocal() == null) { ArrayList multiplexedLocalBuffers = new ArrayList(drivers.size()); for(DrunkardDriver driver: drivers) { - LocalWalkBuffer buf = new LocalWalkBuffer(); + LocalWalkBuffer buf = factory.createLocalWalkBuffer(); driver.addLocalBuffer(buf); multiplexedLocalBuffers.add(buf); } diff --git a/src/main/java/edu/cmu/graphchi/walks/GrabbedBucketConsumer.java b/src/main/java/edu/cmu/graphchi/walks/GrabbedBucketConsumer.java index db5e0678..d183c7b7 100644 --- a/src/main/java/edu/cmu/graphchi/walks/GrabbedBucketConsumer.java +++ b/src/main/java/edu/cmu/graphchi/walks/GrabbedBucketConsumer.java @@ -1,6 +1,5 @@ package edu.cmu.graphchi.walks; - public interface GrabbedBucketConsumer { - void consume(int firstVertexInBucket, int[] walkBucket, int len); + void consume(int firstVertexInBucket, WalkArray walkBucket, int len); } diff --git a/src/main/java/edu/cmu/graphchi/walks/IntDrunkardContext.java b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardContext.java new file mode 100644 index 00000000..4ad6d54a --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardContext.java @@ -0,0 +1,29 @@ +package edu.cmu.graphchi.walks; + +/** + * @author Aapo Kyrola + */ +public interface IntDrunkardContext extends DrunkardContext { + + /** + * Moves walk to next vertex + * @param walk walk identified + * @param destinationVertex vertex id to move hte walk to + * @param trackBit set to true if this walk should be tracked, otherwise false + */ + void forwardWalkTo(int walk, int destinationVertex, boolean trackBit); + + void resetWalk(int walk, boolean trackBit); + + /** + * Reads the track-bit of a walk identifier. + * @param walk + * @return + */ + boolean getTrackBit(int walk); + + /** + * Returns true if walk was started from the vertex + */ + boolean isWalkStartedFromVertex(int walk); +} diff --git a/src/main/java/edu/cmu/graphchi/walks/IntDrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardDriver.java new file mode 100644 index 00000000..e51fb61c --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardDriver.java @@ -0,0 +1,122 @@ +package edu.cmu.graphchi.walks; + +import edu.cmu.graphchi.*; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; + +/** + * Class to encapsulate the graphchi program running the show. + * Due to several optimizations, it is quite complicated! + */ +public class IntDrunkardDriver + extends DrunkardDriver implements GrabbedBucketConsumer { + + IntDrunkardDriver(final DrunkardJob job, + WalkUpdateFunction callback) { + super(job, callback); + } + + @Override + protected IntDumperThread createDumperThread() { + return new IntDumperThread(); + } + + protected class IntDumperThread extends DrunkardDriver.DumperThread { + private int[] walks = new int[256 * 1024]; + private int[] vertices = new int[256 * 1024]; + private int idx = 0; + + @Override + protected void processWalks(BucketsToSend bucket, int i) { + IntWalkManager manager = (IntWalkManager) job.getWalkManager(); + IntWalkArray bucketWalks = (IntWalkArray) bucket.walks; + int w = bucketWalks.getArray()[i]; + int v = manager.off(w) + bucket.firstVertex; + + + // Skip walks with the track-bit (hop-bit) not set + boolean trackBit = manager.trackBit(w); + + if (!trackBit) { + return; + } + + walks[idx] = w; + vertices[idx] = v; + idx++; + + if (idx >= walks.length) { + try { + job.getCompanion().processWalks(new IntWalkArray(walks), vertices); + } catch (Exception err) { + err.printStackTrace(); + } + idx = 0; + } + } + + @Override + protected void sendRest() { + // Send rest + try { + int[] tmpWalks = new int[idx]; + int[] tmpVertices = new int[idx]; + System.arraycopy(walks, 0, tmpWalks, 0, idx); + System.arraycopy(vertices, 0, tmpVertices, 0, idx); + job.getCompanion().processWalks(new IntWalkArray(tmpWalks), tmpVertices); + } catch (Exception err) { + err.printStackTrace(); + } + } + } + + @Override + protected DrunkardContext createDrunkardContext(int vertexId, final GraphChiContext context, + final LocalWalkBuffer localBuf_) { + final IntWalkManager manager = (IntWalkManager) job.getWalkManager(); + final boolean isSource = manager.isSource(vertexId); + final int mySourceIndex = (isSource ? manager.getVertexSourceIdx(vertexId) : -1); + final IntLocalWalkBuffer localBuf = (IntLocalWalkBuffer) localBuf_; + return new IntDrunkardContext() { + @Override + public boolean isSource() { + return isSource; + } + + @Override + public int sourceIndex() { + return mySourceIndex; + } + + @Override + public int getIteration() { + return context.getIteration(); + } + + @Override + public void forwardWalkTo(int walk, int destinationVertex, boolean trackBit) { + localBuf.add(walk, destinationVertex, trackBit); + } + + @Override + public void resetWalk(int walk, boolean trackBit) { + forwardWalkTo(walk, manager.getSourceVertex(walk), trackBit); + } + + @Override + public boolean getTrackBit(int walk) { + return manager.trackBit(walk); + } + + @Override + public boolean isWalkStartedFromVertex(int walk) { + return mySourceIndex == manager.sourceIdx(walk); + } + + @Override + public VertexIdTranslate getVertexIdTranslate() { + return getVertexIdTranslate(); + } + }; + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java new file mode 100644 index 00000000..8621557e --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntDrunkardFactory.java @@ -0,0 +1,16 @@ +package edu.cmu.graphchi.walks; + +public class IntDrunkardFactory + implements DrunkardFactory { + public DrunkardDriver createDrunkardDriver(DrunkardJob job, + WalkUpdateFunction callback) { + return new IntDrunkardDriver(job, callback); + } + public WalkManager createWalkManager(int numVertices, int numSources) { + return new IntWalkManager(numVertices, numSources); + } + public LocalWalkBuffer createLocalWalkBuffer() { + return new IntLocalWalkBuffer(); + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java b/src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java new file mode 100644 index 00000000..d29b4b7a --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntLocalWalkBuffer.java @@ -0,0 +1,36 @@ +package edu.cmu.graphchi.walks; + +class IntLocalWalkBuffer extends LocalWalkBuffer { + int[] walks; + + IntLocalWalkBuffer() { + super(); + walks = new int[DEFAULT_SIZE]; + } + + public void add(int walk, int destination, boolean trackBit) { + if (idx == walks.length) { + int[] tmp = walks; + walks = new int[tmp.length * 2]; + System.arraycopy(tmp, 0, walks, 0, tmp.length); + + expandArrays(); + } + walkBufferDests[idx] = destination; + walks[idx] = walk; + trackBits[idx] = trackBit; + idx++; + } + + @Override + public void purge(WalkManager walkManager) { + IntWalkManager manager = (IntWalkManager) walkManager; + for(int i=0; i> 8) & 0xffffff; + } + + public boolean trackBit(int walk) { + return ((walk & 1) != 0); + } + + public int off(int walk) { + return (walk >> 1) & 0x7f; + } + + /** + * Resets the bucket offset to reflect the new destination vertex, and also resets the track + * bit, according to the parameters. Note that those are the _only_ things re-encoded by this + * method, as those are the only things this method has access to; if other parts of the walk + * need to be changed, that must be taken care of in the WalkUpdateFunction _before_ forwarding + * the walk. + */ + public int reencodeWalk(int walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + return encode(sourceIdx(walk), trackBit, toVertex % bucketSize); + } + + /** + * @param sourceId + * @param toVertex + * @param trackBit true if odd, false if even hop + */ + public void moveWalk(int walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + synchronized (bucketLocks[bucket]) { + moveWalkUnsafe(walk, toVertex, trackBit); + } + } + + public void moveWalkUnsafe(int walk, int toVertex, boolean trackBit) { + // Re-encode the walk to reflect the movement + walk = reencodeWalk(walk, toVertex, trackBit); + + // Move the walk to the new bucket for processing + int bucket = toVertex / bucketSize; + int idx = walkIndices[bucket]; + if (idx == 0) { + walks[bucket] = new int[initialSize]; + } else { + if (idx == walks[bucket].length) { + int[] newBucket = new int[walks[bucket].length * 3 / 2]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } + walks[bucket][idx] = walk; + walkIndices[bucket]++; + } + + @Override + protected void expandCapacity(int bucket, int additional) { + if (walks[bucket] != null) { + int desiredLength = walks[bucket].length + additional; + if (walks[bucket].length < desiredLength) { + int[] newBucket = new int[desiredLength]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } else { + walks[bucket] = new int[additional]; + } + } + + @Override + public void initializeWalks() { + walks = new int[1 + numVertices / bucketSize][]; + bucketLocks = new Object[walks.length]; + for(int i=0; i 0 && i >= -offt && i + offt < snapshots.length) + snapshots[i + offt] = new int[snapshotSizes[i]]; + } + + for(int i=0; i < len; i++) { + int w = bucketToConsume[i]; + int vertex = bucketFirstVertex + off(w); + + if (vertex >= fromVertex && vertex <= toVertexInclusive) { + int snapshotOff = vertex - fromVertex; + int localOff = vertex - bucketFirstVertex; + snapshots[snapshotOff][snapshotIdxs[localOff]] = w; + snapshotIdxs[localOff]++; + } else { + // add back + moveWalk(w, vertex, trackBit(w)); + } + } + } + snapshotInitBits[localBucketIdx] = true; + } + } + if (bucketConsumer != null && bucketToConsume != null && len > 0) { + bucketConsumer.consume(bucketIdx * bucketSize, new IntWalkArray(bucketToConsume), len); + if (len > 1000000) { + log((bucketIdx * bucketSize) + " - " + ((bucketIdx+1)) * bucketSize + ", " + len); + } + } + _timer.stop(); + int[] array = snapshots[vertexId - fromVertex]; + if (array == null) { + return null; + } else { + return new IntWalkArray(snapshots[vertexId - fromVertex]); + } + } + } + + @Override + public int getFirstVertex() { + return fromVertex; + } + + @Override + public int getLastVertex() { + return toVertexInclusive; + } + }; + } + + /** Dump to file all walks with more than 0 hop */ + @Override + public void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException { + final TimerContext _timer = dumpTimer.time(); + synchronized (filename.intern()) { + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); + for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { + int[] ws = ((IntWalkArray)snapshot.getWalksAtVertex(i, false)).getArray(); + if (ws != null) { + for(int j=0; j < ws.length; j++) { + int w = ws[j]; + int source = sources[sourceIdx(w)]; + dos.writeInt(source); + dos.writeInt(i); + } + } + } + dos.flush(); + dos.close(); + } + _timer.stop(); + } + + public int getSourceVertex(int walk) { + return sources[sourceIdx(walk)]; + } + + @Override + public void populateSchedulerForInterval(Scheduler scheduler, VertexInterval interval) { + final TimerContext _timer = schedulePopulate.time(); + int fromBucket = interval.getFirstVertex() / bucketSize; + int toBucket = interval.getLastVertex() / bucketSize; + + for(int bucketIdx=fromBucket; bucketIdx <= toBucket; bucketIdx++) { + int vertexBase = bucketIdx * bucketSize; + int[] bucket = walks[bucketIdx]; + + if (bucket != null) { + BitSet alreadySeen = new BitSet(bucketSize); + int counter = 0; + for(int j=0; j dests = new ArrayList(); - ArrayList hops = new ArrayList(); - - - + boolean[] trackBits; int idx = 0; - LocalWalkBuffer() { - walkBufferDests = new int[65536]; - walkSourcesAndHops = new int[65536]; - } + int DEFAULT_SIZE = 65536; + Random random; - public void add(int src, int dst, boolean hop) { - if (idx == walkSourcesAndHops.length) { - dests.add(walkBufferDests); - hops.add(walkSourcesAndHops); - walkBufferDests = new int[1000000]; - walkSourcesAndHops = new int[1000000]; - idx = 0; - } - walkBufferDests[idx] = dst; - walkSourcesAndHops[idx] = (hop ? -1 : 1) * (1 + src); // Note +1 so zero will be handled correctly - idx++; + LocalWalkBuffer() { + walkBufferDests = new int[DEFAULT_SIZE]; + trackBits = new boolean[DEFAULT_SIZE]; + random = new Random(); } - public void purge(WalkManager walkManager) { - dests.add(walkBufferDests); - hops.add(walkSourcesAndHops); + protected void expandArrays() { + int[] tmp = walkBufferDests; + walkBufferDests = new int[tmp.length * 2]; + System.arraycopy(tmp, 0, walkBufferDests, 0, tmp.length); - for(int k=0; k < hops.size(); k++) { - int[] d = dests.get(k); - int[] h = hops.get(k); - int len = (k == hops.size() - 1 ? idx : d.length); - for(int i=0; i < len; i++) { - int dst = d[i]; - int src = h[i]; - boolean hop = src < 0; - if (src < 0) src = -src; - src = src - 1; // Note, -1 - walkManager.updateWalkUnsafe(src, dst, hop); - } - } - hops = null; - dests = null; - walkSourcesAndHops = null; - walkBufferDests = null; + boolean[] tmpB = trackBits; + trackBits = new boolean[tmpB.length * 2]; + System.arraycopy(tmpB, 0, trackBits, 0, tmpB.length); } + + public abstract void purge(WalkManager walkManager); } diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java new file mode 100644 index 00000000..fa4c6c19 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardContext.java @@ -0,0 +1,29 @@ +package edu.cmu.graphchi.walks; + +/** + * @author Aapo Kyrola + */ +public interface LongDrunkardContext extends DrunkardContext { + + /** + * Moves walk to next vertex + * @param walk walk identified + * @param destinationVertex vertex id to move hte walk to + * @param trackBit set to true if this walk should be tracked, otherwise false + */ + void forwardWalkTo(long walk, int destinationVertex, boolean trackBit); + + void resetWalk(long walk, boolean trackBit); + + /** + * Reads the track-bit of a walk identifier. + * @param walk + * @return + */ + boolean getTrackBit(long walk); + + /** + * Returns true if walk was started from the vertex + */ + boolean isWalkStartedFromVertex(long walk); +} diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java new file mode 100644 index 00000000..c9d1c70c --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java @@ -0,0 +1,122 @@ +package edu.cmu.graphchi.walks; + +import edu.cmu.graphchi.*; +import edu.cmu.graphchi.preprocessing.VertexIdTranslate; + +/** + * Class to encapsulate the graphchi program running the show. + * Due to several optimizations, it is quite complicated! + */ +public class LongDrunkardDriver + extends DrunkardDriver implements GrabbedBucketConsumer { + + public LongDrunkardDriver(final DrunkardJob job, + WalkUpdateFunction callback) { + super(job, callback); + } + + @Override + protected LongDumperThread createDumperThread() { + return new LongDumperThread(); + } + + protected class LongDumperThread extends DrunkardDriver.DumperThread { + protected long[] walks = new long[256 * 1024]; + protected int[] vertices = new int[256 * 1024]; + protected int idx = 0; + + @Override + protected void processWalks(BucketsToSend bucket, int i) { + LongWalkArray bucketWalks = (LongWalkArray) bucket.walks; + long w = bucketWalks.getArray()[i]; + LongWalkManager manager = (LongWalkManager) job.getWalkManager(); + int v = manager.off(w) + bucket.firstVertex; + + + // Skip walks with the track-bit (hop-bit) not set + boolean trackBit = manager.trackBit(w); + + if (!trackBit) { + return; + } + + walks[idx] = w; + vertices[idx] = v; + idx++; + + if (idx >= walks.length) { + try { + job.getCompanion().processWalks(new LongWalkArray(walks), vertices); + } catch (Exception err) { + err.printStackTrace(); + } + idx = 0; + } + } + + @Override + protected void sendRest() { + // Send rest + try { + long[] tmpWalks = new long[idx]; + int[] tmpVertices = new int[idx]; + System.arraycopy(walks, 0, tmpWalks, 0, idx); + System.arraycopy(vertices, 0, tmpVertices, 0, idx); + job.getCompanion().processWalks(new LongWalkArray(tmpWalks), tmpVertices); + } catch (Exception err) { + err.printStackTrace(); + } + } + } + + @Override + protected DrunkardContext createDrunkardContext(int vertexId, final GraphChiContext context, + final LocalWalkBuffer localBuf_) { + final LongWalkManager manager = (LongWalkManager) job.getWalkManager(); + final boolean isSource = manager.isSource(vertexId); + final int mySourceIndex = (isSource ? manager.getVertexSourceIdx(vertexId) : -1); + final LongLocalWalkBuffer localBuf = (LongLocalWalkBuffer) localBuf_; + return new LongDrunkardContext() { + @Override + public boolean isSource() { + return isSource; + } + + @Override + public int sourceIndex() { + return mySourceIndex; + } + + @Override + public int getIteration() { + return context.getIteration(); + } + + @Override + public void forwardWalkTo(long walk, int destinationVertex, boolean trackBit) { + localBuf.add(walk, destinationVertex, trackBit); + } + + @Override + public void resetWalk(long walk, boolean trackBit) { + forwardWalkTo(walk, manager.getSourceVertex(walk), trackBit); + } + + @Override + public boolean getTrackBit(long walk) { + return manager.trackBit(walk); + } + + @Override + public boolean isWalkStartedFromVertex(long walk) { + return mySourceIndex == manager.sourceIdx(walk); + } + + @Override + public VertexIdTranslate getVertexIdTranslate() { + return getVertexIdTranslate(); + } + }; + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java new file mode 100644 index 00000000..f1136832 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardFactory.java @@ -0,0 +1,16 @@ +package edu.cmu.graphchi.walks; + +public class LongDrunkardFactory + implements DrunkardFactory { + public DrunkardDriver createDrunkardDriver(DrunkardJob job, + WalkUpdateFunction callback) { + return new LongDrunkardDriver(job, callback); + } + public WalkManager createWalkManager(int numVertices, int numSources) { + return new LongWalkManager(numVertices, numSources); + } + public LocalWalkBuffer createLocalWalkBuffer() { + return new LongLocalWalkBuffer(); + } +} + diff --git a/src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java b/src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java new file mode 100644 index 00000000..4605fc4e --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongLocalWalkBuffer.java @@ -0,0 +1,36 @@ +package edu.cmu.graphchi.walks; + +class LongLocalWalkBuffer extends LocalWalkBuffer { + long[] walks; + + LongLocalWalkBuffer() { + super(); + walks = new long[DEFAULT_SIZE]; + } + + public void add(long walk, int destination, boolean trackBit) { + if (idx == walks.length) { + long[] tmp = walks; + walks = new long[tmp.length * 2]; + System.arraycopy(tmp, 0, walks, 0, tmp.length); + + expandArrays(); + } + walkBufferDests[idx] = destination; + walks[idx] = walk; + trackBits[idx] = trackBit; + idx++; + } + + @Override + public void purge(WalkManager walkManager) { + LongWalkManager manager = (LongWalkManager) walkManager; + for(int i=0; i> 8) & 0xffffff; + } + + public boolean trackBit(long walk) { + return ((walk & 1) != 0); + } + + public int off(long walk) { + return (int) (walk >> 1) & 0x7f; + } + + /** + * Resets the bucket offset to reflect the new destination vertex, and also resets the track + * bit, according to the parameters. Note that those are the _only_ things re-encoded by this + * method, as those are the only things this method has access to; if other parts of the walk + * need to be changed, that must be taken care of in the WalkUpdateFunction _before_ forwarding + * the walk. + */ + protected long reencodeWalk(long walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + return encode(sourceIdx(walk), trackBit, toVertex % bucketSize); + } + + /** + * @param sourceId + * @param toVertex + * @param trackBit true if odd, false if even hop + */ + public void moveWalk(long walk, int toVertex, boolean trackBit) { + int bucket = toVertex / bucketSize; + synchronized (bucketLocks[bucket]) { + moveWalkUnsafe(walk, toVertex, trackBit); + } + } + + public void moveWalkUnsafe(long walk, int toVertex, boolean trackBit) { + // Reincode the walk to reflect the movement + walk = reencodeWalk(walk, toVertex, trackBit); + + // Move the walk to the new bucket for processing + int bucket = toVertex / bucketSize; + int idx = walkIndices[bucket]; + if (idx == 0) { + walks[bucket] = new long[initialSize]; + } else { + if (idx == walks[bucket].length) { + long[] newBucket = new long[walks[bucket].length * 3 / 2]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } + walks[bucket][idx] = walk; + walkIndices[bucket]++; + } + + @Override + protected void expandCapacity(int bucket, int additional) { + if (walks[bucket] != null) { + int desiredLength = walks[bucket].length + additional; + if (walks[bucket].length < desiredLength) { + long[] newBucket = new long[desiredLength]; + System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); + walks[bucket] = newBucket; + } + } else { + walks[bucket] = new long[additional]; + } + } + + @Override + public void initializeWalks() { + walks = new long[1 + numVertices / bucketSize][]; + bucketLocks = new Object[walks.length]; + for(int i=0; i 0 && i >= -offt && i + offt < snapshots.length) + snapshots[i + offt] = new long[snapshotSizes[i]]; + } + + for(int i=0; i < len; i++) { + long w = bucketToConsume[i]; + int vertex = bucketFirstVertex + off(w); + + if (vertex >= fromVertex && vertex <= toVertexInclusive) { + int snapshotOff = vertex - fromVertex; + int localOff = vertex - bucketFirstVertex; + snapshots[snapshotOff][snapshotIdxs[localOff]] = w; + snapshotIdxs[localOff]++; + } else { + // add back + moveWalk(w, vertex, trackBit(w)); + } + } + } + snapshotInitBits[localBucketIdx] = true; + } + } + if (bucketConsumer != null && bucketToConsume != null && len > 0) { + bucketConsumer.consume(bucketIdx * bucketSize, new LongWalkArray(bucketToConsume), len); + if (len > 1000000) { + log((bucketIdx * bucketSize) + " - " + ((bucketIdx+1)) * bucketSize + ", " + len); + } + } + _timer.stop(); + long[] array = snapshots[vertexId - fromVertex]; + if (array == null) { + return null; + } else { + return new LongWalkArray(snapshots[vertexId - fromVertex]); + } + } + } + + @Override + public int getFirstVertex() { + return fromVertex; + } + + @Override + public int getLastVertex() { + return toVertexInclusive; + } + }; + } + + /** Dump to file all walks with more than 0 hop */ + @Override + public void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException { + final TimerContext _timer = dumpTimer.time(); + synchronized (filename.intern()) { + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); + for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { + long[] ws = ((LongWalkArray)snapshot.getWalksAtVertex(i, false)).getArray(); + if (ws != null) { + for(int j=0; j < ws.length; j++) { + long w = ws[j]; + int source = sources[sourceIdx(w)]; + dos.writeLong(source); + dos.writeInt(i); + } + } + } + dos.flush(); + dos.close(); + } + _timer.stop(); + } + + public int getSourceVertex(long walk) { + return sources[sourceIdx(walk)]; + } + + @Override + public void populateSchedulerForInterval(Scheduler scheduler, VertexInterval interval) { + final TimerContext _timer = schedulePopulate.time(); + int fromBucket = interval.getFirstVertex() / bucketSize; + int toBucket = interval.getLastVertex() / bucketSize; + + for(int bucketIdx=fromBucket; bucketIdx <= toBucket; bucketIdx++) { + int vertexBase = bucketIdx * bucketSize; + long[] bucket = walks[bucketIdx]; + + if (bucket != null) { + BitSet alreadySeen = new BitSet(bucketSize); + int counter = 0; + for(int j=0; j MAX_SOURCES) throw new IllegalArgumentException("Max sources: " + numSources); sources = new int[numSources]; @@ -60,8 +59,12 @@ public WalkManager(int numVertices, int numSources) { } } + /** + * Sets MAX_SOURCES and bucketSize, which may be different for different subclasses. + */ + protected abstract void setSourceAndBucketBits(); - private void log(String s) { + protected void log(String s) { try { log.write(s + "\n"); log.flush(); @@ -113,142 +116,9 @@ public synchronized int addWalkBatch(int vertex, int numWalks) { return sourceSeqIdx - 1; } + protected abstract void expandCapacity(int bucket, int additional); - /** - * Encode a walk. Note, as sourceIdx is the highest order bits, the - * walks can be sorted by source simply by sorting the list. - * @param sourceId index of the rousce vertex - * @param hop true if odd, false if even - * @param off bucket offset - * @return - */ - static int encode(int sourceId, boolean hop, int off) { - assert(off < 128); - int hopbit = (hop ? 1 : 0); - return ((sourceId & 0xffffff) << 8) | ((off & 0x7f) << 1) | hopbit; - } - - static int encodeV(int sourceId, boolean hop, int vertexId) { - return encode(sourceId, hop, vertexId % bucketSize); - } - - - public static int sourceIdx(int walk) { - return ((walk & 0xffffff00) >> 8) & 0xffffff; - } - - public static boolean hop(int walk) { - return ((walk & 1) != 0); - } - - public static int off(int walk) { - return (walk >> 1) & 0x7f; - } - - - /** - * @param sourceId - * @param toVertex - * @param hop true if odd, false if even hop - */ - public void updateWalk(int sourceId, int toVertex, boolean hop) { - int bucket = toVertex / bucketSize; - synchronized (bucketLocks[bucket]) { - updateWalkUnsafe(sourceId, toVertex, hop); - } - } - - public void updateWalkUnsafe(int sourceId, int toVertex, boolean hop) { - int bucket = toVertex / bucketSize; - int w = encode(sourceId, hop, toVertex % bucketSize); - int idx = walkIndices[bucket]; - if (idx == 0) { - walks[bucket] = new int[initialSize]; - } else { - if (idx == walks[bucket].length) { - int[] newBucket = new int[walks[bucket].length * 3 / 2]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - } - walks[bucket][idx] = w; - walkIndices[bucket]++; - } - - - - - protected void expandCapacity(int bucket, int additional) { - if (walks[bucket] != null) { - int desiredLength = walks[bucket].length + additional; - if (walks[bucket].length < desiredLength) { - int[] newBucket = new int[desiredLength]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - } else { - walks[bucket] = new int[additional]; - } - } - - public void initializeWalks() { - walks = new int[1 + numVertices / bucketSize][]; - bucketLocks = new Object[walks.length]; - for(int i=0; i 0 && i >= -offt && i + offt < snapshots.length) - snapshots[i + offt] = new int[snapshotSizes[i]]; - } - - for(int i=0; i < len; i++) { - int w = bucketToConsume[i]; - int vertex = bucketFirstVertex + off(w); - - if (vertex >= fromVertex && vertex <= toVertexInclusive) { - int snapshotOff = vertex - fromVertex; - int localOff = vertex - bucketFirstVertex; - snapshots[snapshotOff][snapshotIdxs[localOff]] = w; - snapshotIdxs[localOff]++; - } else { - // add back - boolean hop = hop(w); - int src = sourceIdx(w); - updateWalk(src, vertex, hop); - } - } - } - snapshotInitBits[localBucketIdx] = true; - } - } - if (bucketConsumer != null && bucketToConsume != null && len > 0) { - bucketConsumer.consume(bucketIdx * bucketSize, bucketToConsume, len); - if (len > 1000000) { - log((bucketIdx * bucketSize) + " - " + ((bucketIdx+1)) * bucketSize + ", " + len); - } - } - _timer.stop(); - return snapshots[vertexId - fromVertex]; - } - } - - @Override - public int getFirstVertex() { - return fromVertex; - } - - @Override - public int getLastVertex() { - return toVertexInclusive; - } - - - }; - - } - - public static int getWalkLength(int[] w) { - if (w == null) return 0; - return w.length; - } + public abstract WalkSnapshot grabSnapshot(final int fromVertex, final int toVertexInclusive); /** Dump to file all walks with more than 0 hop */ - public void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException { - final TimerContext _timer = dumpTimer.time(); - synchronized (filename.intern()) { - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); - for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { - int[] ws = snapshot.getWalksAtVertex(i, false); - if (ws != null) { - for(int j=0; j < ws.length; j++) { - int w = ws[j]; - int source = sources[sourceIdx(w)]; - dos.writeInt(source); - dos.writeInt(i); - } - } - } - dos.flush(); - dos.close(); - } - _timer.stop(); - } - - public int getSourceVertex(int walk) { - return sources[sourceIdx(walk)]; - } + public abstract void dumpToFile(WalkSnapshot snapshot, String filename) throws IOException; public void populateSchedulerWithSources(Scheduler scheduler) { for(int i=0; i sources = new ArrayList(32678); - private ArrayList sourceWalkCounts = new ArrayList(32678); - private int totalWalks = 0; - - private long[][] walks; - private int[] walkIndices; - - private int numVertices; - private final Timer grabTimer = Metrics.defaultRegistry().newTimer(WalkManagerForPaths.class, "grab-walks", TimeUnit.SECONDS, TimeUnit.MINUTES); - private final Timer dumpTimer = Metrics.defaultRegistry().newTimer(WalkManagerForPaths.class, "dump-walks", TimeUnit.SECONDS, TimeUnit.MINUTES); - private final Timer initTimer = Metrics.defaultRegistry().newTimer(WalkManagerForPaths.class, "init-walks", TimeUnit.SECONDS, TimeUnit.MINUTES); - - - public WalkManagerForPaths(int numVertices) { - this.numVertices = numVertices; - System.out.println("Initial size for walk bucket: " + initialSize); - } - - public synchronized void addWalkBatch(int vertex, int numWalks) { - sources.add(vertex); - sourceWalkCounts.add(numWalks); - totalWalks += numWalks; - - } - - - // Note: there are some extra bits to be used here - public long encode(int id, int hop, int off) { - return ((long)id) << 32 | (((long)hop << 16) & 0x000f0000l) | ((off << 20) & 0xfff00000l); - } - - - public int hop(long walk) { - return (int) ((walk & 0x000f0000) >> 16); - } - - public int off(long walk) { - return (int) ((walk & 0xfff00000l) >> 20); - } - - public int walkId(long walk) { - return (int) (walk >> 32); - } - - - public void updateWalk(int id, int toVertex, int hop) { - int bucket = toVertex / bucketSize; - assert(hop < 16); - - - synchronized (walks[bucket]) { - int idx = walkIndices[bucket]; - if (idx == walks[bucket].length) { - long[] newBucket = new long[walks[bucket].length * 3 / 2]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - walks[bucket][idx] = encode(id, hop, toVertex % bucketSize); - walkIndices[bucket]++; - } - } - - public void expandCapacity(int bucket, int additional) { - int desiredLength = walks[bucket].length + additional; - - if (walks[bucket].length < desiredLength) { - long[] newBucket = new long[desiredLength]; - System.arraycopy(walks[bucket], 0, newBucket, 0, walks[bucket].length); - walks[bucket] = newBucket; - } - } - - public void initializeWalks() { - final TimerContext _timer = initTimer.time(); - walks = new long[1 + numVertices / bucketSize][]; - walkIndices = new int[walks.length]; - for(int i = 0; i < walks.length; i++) { - walks[i] = new long[initialSize]; - walkIndices[i] = 0; - } - - /* Precalculate bucket sizes for performance */ - int[] tmpsizes = new int[walks.length]; - for(int j=0; j < sources.size(); j++) { - int source = sources.get(j); - tmpsizes[source / bucketSize] += sourceWalkCounts.get(j); - } - - for(int b=0; b < walks.length; b++) { - expandCapacity(b, tmpsizes[b]); - } - - int walkId = 0; - for(int i=0; i < sources.size(); i++) { - int source = sources.get(i); - int count = sourceWalkCounts.get(i); - for(int c=0; c tmpBuckets = new ArrayList(toBucket - fromBucket + 1); - int[] tmpBucketLengths = new int[toBucket - fromBucket + 1]; - for(int b=fromBucket; b <= toBucket; b++) { - tmpBuckets.add(walks[b]); - tmpBucketLengths[b - fromBucket] = walkIndices[b]; - walks[b] = new long[initialSize]; - walkIndices[b] = 0; - } - - /* Now create data structure for fast retrieval */ - final long[][] snapshots = new long[toVertexInclusive - fromVertex + 1][]; - final int[] snapshotIdxs = new int[snapshots.length]; - - for(int i=0; i < snapshots.length; i++) { - snapshots[i] = null; - snapshotIdxs[i] = 0; - } - /* Add walks to snapshot arrays -- TODO: parallelize */ - for(int b=0; b < tmpBuckets.size(); b++) { - int bucketFirstVertex = bucketSize * (fromBucket + b); - long[] arr = tmpBuckets.get(b); - int len = tmpBucketLengths[b]; - - final int[] snapshotSizes = new int[bucketSize]; - - /* Calculate vertex-walks sizes */ - for(int i=0; i < len; i++) { - long w = arr[i]; - snapshotSizes[off(w)]++; - } - - int offt = bucketFirstVertex - fromVertex; - - /* Precalculate the array sizes. offt is the - offset of the bucket's first vertex from the first - vertex of the snapshot - */ - - for(int i=0; i < snapshotSizes.length; i++) { - if (snapshotSizes[i] > 0 && i >= -offt && i + offt < snapshots.length) - snapshots[i + offt] = new long[snapshotSizes[i]]; - } - - for(int i=0; i < len; i++) { - long w = arr[i]; - int hop = hop(w); - int id = walkId(w); - int vertex = bucketFirstVertex + off(w); - - if (vertex >= fromVertex && vertex <= toVertexInclusive) { - int snapshotOff = vertex - fromVertex; - if (snapshots[snapshotOff] == null) - throw new IllegalStateException(); - - if (snapshotIdxs[snapshotOff] >= snapshots[snapshotOff].length) { - throw new RuntimeException("Not possible!"); - /* Duplicate array - int[] tmp = new int[snapshots[snapshotOff].length * 2]; - System.arraycopy(snapshots[snapshotOff], 0, tmp, 0, snapshots[snapshotOff].length); - snapshots[snapshotOff] = tmp; */ - } - snapshots[snapshotOff][snapshotIdxs[snapshotOff]] = w; - snapshotIdxs[snapshotOff]++; - } else { - // add back - updateWalk(id, vertex, hop); - } - } - tmpBuckets.set(b, null); // Save memory - } - - _timer.stop(); - - /* Create the snapshot object */ - return new WalkSnapshotForPaths() { - @Override - public long[] getWalksAtVertex(int vertexId) { - return snapshots[vertexId - fromVertex]; - } - - @Override - public int getFirstVertex() { - return fromVertex; - } - - @Override - public int getLastVertex() { - return toVertexInclusive; - } - }; - - } - - - - /** Dump to file all walks with more than 0 hop */ - public void dumpToFile(WalkSnapshotForPaths snapshot, String filename) throws IOException { - final TimerContext _timer = dumpTimer.time(); - synchronized (filename.intern()) { - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(filename), true))); - for(int i=snapshot.getFirstVertex(); i <= snapshot.getLastVertex(); i++) { - long[] ws = snapshot.getWalksAtVertex(i); - if (ws != null) { - for(int j=0; j < ws.length; j++) { - long w = ws[j]; - /* walk-id: int, hop: short, vertex: int */ - dos.writeInt(walkId(w)); - dos.writeShort(hop(w)); - dos.writeInt(i); - } - } - } - dos.flush(); - dos.close(); - } - _timer.stop(); - } - - - public void populateSchedulerWithSources(Scheduler scheduler) { - for(int i=0; i < sources.size(); i++) { - scheduler.addTask(sources.get(i)); - } - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java b/src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java deleted file mode 100644 index 8e297e94..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/WalkPathAnalyzer.java +++ /dev/null @@ -1,102 +0,0 @@ -package edu.cmu.graphchi.walks; - -import java.io.*; -import java.util.Arrays; - -/** - * Class for computing paths from the walk-files produced - * by DrunkardMobForPaths - * @author Aapo Kyrola, akyrola@cs.cmu.edu, akyrola@twitter.com - */ -public class WalkPathAnalyzer { - - private File directory; - - public WalkPathAnalyzer(File directory) { - this.directory = directory; - if (!this.directory.isDirectory()) throw new IllegalArgumentException("You must provide a directory"); - } - - /** - * Currently very dummy implementation. TODO: Make memory efficient and smarter in general. - * Just for demonstration purposes. - */ - public void analyze(int minWalkId, int maxWalkId, int maxHops) throws IOException { - int numberOfWalks = maxWalkId - minWalkId + 1; - Walk[] paths = new Walk[numberOfWalks]; - for(int i=0; i < paths.length; i++) { - paths[i] = new Walk(maxHops); - } - - String[] walkFiles = directory.list(new FilenameFilter() { - @Override - public boolean accept(File file, String s) { - return s.startsWith("walks_"); - } - }); - - for(String walkFile : walkFiles) { - System.out.println("Analyze: " + walkFile); - long walksInFile = new File(directory, walkFile).length() / 10; - DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream( - new File(directory, walkFile)), 1024 * 1024 * 50)); - try { - long i = 0; - while(i < walksInFile) { - if (i % 1000000 == 0) System.out.println(i + " / " + walksInFile); - i++; - - int walkId = dis.readInt(); - - short hop = dis.readShort(); - int atVertex = dis.readInt(); - if (walkId >= minWalkId && walkId <= maxWalkId) { - paths[walkId - minWalkId].addWalk(hop, atVertex); - } - } - } catch (EOFException ioe) { - continue; - } - dis.close(); - } - - for(Walk w : paths) { - System.out.println(w.getPathDescription()); - } - } - - private static class Walk { - - private long[] path; - int idx; - - private Walk(int maxHops) { - idx = 0; - path = new long[maxHops]; - } - - void addWalk(short hop, int atVertex) { - long w = atVertex | ((long)hop << 32); - if (idx < path.length) path[idx++] = w; - } - - String getPathDescription() { - /* Super-slow */ - Arrays.sort(path); // Hop is the highest order bit so sorts by hop - StringBuffer sb = new StringBuffer(); - for(long w : path) { - sb.append((w & 0xffffffffl) + "-"); - } - return sb.toString(); - } - } - - public static void main(String[] args) throws Exception { - WalkPathAnalyzer analyzer = new WalkPathAnalyzer(new File(".")); - int minWalkId = Integer.parseInt(args[0]); - int maxWalkId = Integer.parseInt(args[1]); - int maxHops = Integer.parseInt(args[2]); - - analyzer.analyze(minWalkId, maxWalkId, maxHops); - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java b/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java index a458e436..e5d8ed54 100644 --- a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java +++ b/src/main/java/edu/cmu/graphchi/walks/WalkSnapshot.java @@ -5,9 +5,6 @@ */ public interface WalkSnapshot { - /** Returns walk at vertex, or null if none **/ - int[] getWalksAtVertex(int vertexId, boolean processed); - int getFirstVertex(); int getLastVertex(); @@ -17,4 +14,6 @@ public interface WalkSnapshot { public long numWalks(); public void restoreUngrabbed(); + + WalkArray getWalksAtVertex(int vertexId, boolean processed); } diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java b/src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java deleted file mode 100644 index 9dc0e01d..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/WalkSnapshotForPaths.java +++ /dev/null @@ -1,14 +0,0 @@ -package edu.cmu.graphchi.walks; - -/** - * @author Aapo Kyrola - */ -public interface WalkSnapshotForPaths { - - /** Returns walk at vertex, or null if none **/ - long[] getWalksAtVertex(int vertexId); - - int getFirstVertex(); - - int getLastVertex(); -} diff --git a/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java b/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java index a7e4bcdc..1f906fae 100644 --- a/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java +++ b/src/main/java/edu/cmu/graphchi/walks/WalkUpdateFunction.java @@ -10,6 +10,15 @@ */ public interface WalkUpdateFunction { + /** + * Called for each source vertex. Return an int-array of vertices to which walk visits should not + * be tracked. For example, if you are not interested about the walks to the immediate neighbors, + * you should returns an array of the vertex ids of the neighbors. + * @param vertex + * @return + */ + int[] getNotTrackedVertices(ChiVertex vertex); + /** * Callback * @param walks @@ -17,17 +26,8 @@ public interface WalkUpdateFunction { * @param drunkardContext * @param randomGenerator random-generator */ - void processWalksAtVertex(int[] walks, + void processWalksAtVertex(WalkArray walks, ChiVertex vertex, DrunkardContext drunkardContext, Random randomGenerator); - - /** - * Called for each source vertex. Return an int-array of vertices to which walk visits should not - * be tracked. For example, if you are not interested about the walks to the immediate neighbors, - * you should returns an array of the vertex ids of the neighbors. - * @param vertex - * @return - */ - int[] getNotTrackedVertices(ChiVertex vertex); } diff --git a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java b/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java deleted file mode 100644 index d7638e9d..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMob.java +++ /dev/null @@ -1,179 +0,0 @@ -package edu.cmu.graphchi.walks.deprecated; - -import edu.cmu.graphchi.ChiFilenames; -import edu.cmu.graphchi.ChiVertex; -import edu.cmu.graphchi.GraphChiContext; -import edu.cmu.graphchi.GraphChiProgram; -import edu.cmu.graphchi.datablocks.IntConverter; -import edu.cmu.graphchi.engine.GraphChiEngine; -import edu.cmu.graphchi.engine.VertexInterval; -import edu.cmu.graphchi.util.IdInt; -import edu.cmu.graphchi.util.Toplist; -import edu.cmu.graphchi.walks.WalkManager; -import edu.cmu.graphchi.walks.WalkSnapshot; - -import java.io.File; -import java.io.IOException; -import java.util.TreeSet; - -/** - * Launch millions (?) of random walks and record the - * hops for each source. This version can be used only for computing - * distribution of the source-destinations. For recording the actual - * paths, use DrunkardMobForPaths - * Done partially during authors internship at Twitter, Fall 2012. - * @author Aapo Kyrola, akyrola@cs.cmu.edu - */ -public class DrunkardMob implements GraphChiProgram { - - private WalkManager walkManager; - private WalkSnapshot curWalkSnapshot; - - public DrunkardMob() { - } - - private static final double RESETPROB = 0.15; - - public void update(ChiVertex vertex, GraphChiContext context) { - int[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); - if (context.getIteration() == 0) vertex.setValue(0); - if (walksAtMe == null) return; - - int walkLength = walksAtMe.length; - - int numWalks = 0; - for(int i=0; i < walkLength; i++) { - int walk = walksAtMe[i]; - boolean hop = walkManager.hop(walk); - // Choose a random destination and move the walk forward - int dst; - if (vertex.getId() != walkManager.getSourceVertex(walk)) { - numWalks++; - } - if (vertex.numOutEdges() > 0 && (context.getIteration() == 0 || Math.random() > RESETPROB)) { - dst = vertex.getRandomOutNeighbor(); - } else { - // Dead end! - dst = walkManager.getSourceVertex(walk); - } - walkManager.updateWalk(walkManager.sourceIdx(walk), dst, !hop); - context.getScheduler().addTask(dst); - - } - vertex.setValue(vertex.getValue() + numWalks); - } - - - public void beginIteration(GraphChiContext ctx) { - if (ctx.getIteration() == 0) { - ctx.getScheduler().removeAllTasks(); - walkManager.populateSchedulerWithSources(ctx.getScheduler()); - } - } - - public void endIteration(GraphChiContext ctx) {} - - /** - * At the start of interval - grab the snapshot of walks - */ - public void beginSubInterval(GraphChiContext ctx, final VertexInterval interval) { - long t = System.currentTimeMillis(); - curWalkSnapshot = walkManager.grabSnapshot(interval.getFirstVertex(), interval.getLastVertex()); - System.out.println("Grab snapshot took " + (System.currentTimeMillis() - t) + " ms."); - - String walkDir = System.getProperty("walk.dir", "."); - final String filename = walkDir + "/walks_.dat"; - if (ctx.getIteration() == 0) { // NOTE, temporary hack to save disk space but have the same I/O cost for testing - new File(filename).delete(); - } - - // Launch a thread to dump - final WalkSnapshot snapshot = curWalkSnapshot; - synchronized (filename.intern()) { - try { - walkManager.dumpToFile(snapshot, filename); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { - curWalkSnapshot.restoreUngrabbed(); - curWalkSnapshot = null; // Release memory - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} - - public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - - public static void main(String[] args) throws Exception { - - String baseFilename = args[0]; - - if (args.length > 1) { - int nShards = Integer.parseInt(args[1]); - int nSources = Integer.parseInt(args[2]); - int walksPerSource = Integer.parseInt(args[3]); - int maxHops = Integer.parseInt(args[4]); - - System.out.println("Walks will start from " + nSources + " sources."); - System.out.println("Going to start " + walksPerSource + " walks per source."); - System.out.println("Max hops: " + maxHops); - - /* Delete vertex data */ - File vertexDataFile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, new IntConverter(), false)); - if (vertexDataFile.exists()) { - vertexDataFile.delete(); - } - - /* Initialize GraphChi engine */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - engine.setEdataConverter(null); - engine.setVertexDataConverter(new IntConverter()); - engine.setModifiesInedges(false); - engine.setModifiesOutedges(false); - engine.setEnableScheduler(true); - engine.setOnlyAdjacency(true); - engine.setDisableInedges(true); - engine.setMemoryBudgetMb(1200); - engine.setUseStaticWindowSize(false); // Disable dynamic window size detection - engine.setEnableDeterministicExecution(false); - engine.setAutoLoadNext(false); - engine.setMaxWindow(2000000); // Handle maximum 2M vertices a time. - - long t1 = System.currentTimeMillis(); - - /* Initialize application object */ - DrunkardMob mob = new DrunkardMob(); - - /* Initialize Random walks */ - int nVertices = engine.numVertices(); - mob.walkManager = new WalkManager(nVertices, nSources); - - for(int i=0; i < nSources; i++) { - int source = 234224 + i; - mob.walkManager.addWalkBatch(source, walksPerSource); - } - mob.walkManager.initializeWalks(); - - System.out.println("Configured " + mob.walkManager.getTotalWalks() + " walks in " + - (System.currentTimeMillis() - t1) + " ms"); - - - /* Run */ - engine.run(mob, maxHops + 1); - - System.out.println("Ready. Going to output..."); - - TreeSet top20 = Toplist.topListInt(baseFilename, engine.numVertices(), 20); - int i = 0; - for(IdInt vertexRank : top20) { - System.out.println(++i + ": " + - engine.getVertexIdTranslate().backward(vertexRank.getVertexId()) + " = " + vertexRank.getValue()); - } - System.out.println("Finished."); - } - - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java b/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java deleted file mode 100644 index 6d4a8275..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobForPaths.java +++ /dev/null @@ -1,187 +0,0 @@ -package edu.cmu.graphchi.walks.deprecated; - -import edu.cmu.graphchi.ChiFilenames; -import edu.cmu.graphchi.ChiVertex; -import edu.cmu.graphchi.GraphChiContext; -import edu.cmu.graphchi.GraphChiProgram; -import edu.cmu.graphchi.vertexdata.VertexAggregator; -import edu.cmu.graphchi.datablocks.IntConverter; -import edu.cmu.graphchi.engine.GraphChiEngine; -import edu.cmu.graphchi.engine.VertexInterval; -import edu.cmu.graphchi.util.IdInt; -import edu.cmu.graphchi.util.Toplist; -import edu.cmu.graphchi.walks.WalkManagerForPaths; -import edu.cmu.graphchi.walks.WalkPathAnalyzer; -import edu.cmu.graphchi.walks.WalkSnapshotForPaths; - -import java.io.File; -import java.util.TreeSet; - -/** - * Launch millions (?) of random walks and record each hop - * for the walks. Each walk has an unique id. This version thus - * uses twice amount of memory as the DrunkardMob which only - * can be used for computing distributions of source-destinations. - * @author Aapo Kyrola, akyrola@cs.cmu.edu - */ -public class DrunkardMobForPaths implements GraphChiProgram { - - private WalkManagerForPaths walkManager; - private WalkSnapshotForPaths curWalkSnapshot; - private int maxHops; - private String basefileName; - - public DrunkardMobForPaths(int maxHops, String basefileName) { - this.maxHops = maxHops; - this.basefileName = basefileName; - } - - public void update(ChiVertex vertex, GraphChiContext context) { - long[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId()); - if (context.getIteration() == 0) vertex.setValue(0); - if (walksAtMe == null) return; - - int numWalks = 0; - for(int i=0; i < walksAtMe.length; i++) { - long walk = walksAtMe[i]; - int hop = walkManager.hop(walk); - if (hop > 0) numWalks++; - if (hop < maxHops) { - // Choose a random destination and move the walk forward - int dst; - if (vertex.numEdges() > 0) { - dst = vertex.getRandomNeighbor(); - } else { - // Dead end! - continue; // Ignore this walk - } - walkManager.updateWalk(walkManager.walkId(walk), dst, hop + 1); - context.getScheduler().addTask(dst); - } - } - vertex.setValue(vertex.getValue() + numWalks); - } - - - public void beginIteration(GraphChiContext ctx) { - if (ctx.getIteration() == 0) { - ctx.getScheduler().removeAllTasks(); - walkManager.populateSchedulerWithSources(ctx.getScheduler()); - } - } - - public void endIteration(GraphChiContext ctx) { - - } - - /** - * At the start of interval - grab the snapshot of walks - */ - public void beginSubInterval(GraphChiContext ctx, final VertexInterval interval) { - long t = System.currentTimeMillis(); - curWalkSnapshot = walkManager.grabSnapshot(interval.getFirstVertex(), interval.getLastVertex()); - System.out.println("Grab snapshot took " + (System.currentTimeMillis() - t) + " ms."); - - String walkDir = System.getProperty("walk.dir", "."); - final String filename = walkDir + "/walks_" + interval.getFirstVertex() + "-" + interval.getLastVertex() + ".dat"; - if (ctx.getIteration() == 0) { // NOTE, temporary hack to save disk space but have the same I/O cost for testing - new File(filename).delete(); - } - // Launch a thread to dump - Thread dumperThread = new Thread(new Runnable() { - public void run() { - try { - walkManager.dumpToFile(curWalkSnapshot, filename); - } catch (Exception err) { - err.printStackTrace(); - } - } - }); - dumperThread.start(); - } - - public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { - curWalkSnapshot = null; // Release memory - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) {} - - public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - - public static void main(String[] args) throws Exception { - String baseFilename = args[0]; - - - if (args.length > 1) { - int nShards = Integer.parseInt(args[1]); - int nSources = Integer.parseInt(args[2]); - int walksPerSource = Integer.parseInt(args[3]); - int maxHops = Integer.parseInt(args[4]); - - System.out.println("Path-recording walks will start from " + nSources + " sources."); - System.out.println("Going to start " + walksPerSource + " walks per source."); - System.out.println("Max hops: " + maxHops); - - /* Delete vertex data */ - File vertexDataFile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, new IntConverter(), false)); - if (vertexDataFile.exists()) { - vertexDataFile.delete(); - } - - /* Initialize GraphChi engine */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - - engine.setEdataConverter(null); - engine.setVertexDataConverter(new IntConverter()); - engine.setModifiesInedges(false); - engine.setModifiesOutedges(false); - engine.setEnableScheduler(true); - engine.setOnlyAdjacency(true); - engine.setDisableInedges(false); // NOTE! In-edges are enabled - engine.setMemoryBudgetMb(1200); - engine.setUseStaticWindowSize(false); // Disable dynamic window size detection - engine.setEnableDeterministicExecution(false); - engine.setMaxWindow(2000000); // Handle maximum 2M vertices a time. - - long t1 = System.currentTimeMillis(); - - /* Initialize application object */ - DrunkardMobForPaths mob = new DrunkardMobForPaths(maxHops, baseFilename); - - /* Initialize Random walks */ - int nVertices = engine.numVertices(); - mob.walkManager = new WalkManagerForPaths(nVertices); - - /* NOTE: This starts walks from random nodes - you probably want something different */ - for(int i=0; i < nSources; i++) { - int source = (int) (Math.random() * nVertices); - mob.walkManager.addWalkBatch(source, walksPerSource); - } - mob.walkManager.initializeWalks(); - - System.out.println("Configured " + mob.walkManager.getTotalWalks() + " walks in " + - (System.currentTimeMillis() - t1) + " ms"); - - - /* Run */ - engine.run(mob, maxHops + 1); - - /* Analyze */ - WalkPathAnalyzer analyzer = new WalkPathAnalyzer(new File(".")); - analyzer.analyze(0, mob.walkManager.getTotalWalks() - 1, maxHops); - - System.out.println("Ready. Going to output..."); - - /* Output top 20 of visited vertices. */ - TreeSet top20 = Toplist.topListInt(baseFilename, engine.numVertices(), 20); - int i = 0; - for(IdInt vertexRank : top20) { - System.out.println(++i + ": " + vertexRank.getVertexId() + " = " + vertexRank.getValue()); - } - System.out.println("Finished."); - - long sumWalks = VertexAggregator.sumInt(engine.numVertices(), baseFilename); - System.out.println("Total hops (in file): " + sumWalks); - } - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java b/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java deleted file mode 100644 index 9e10d3b1..00000000 --- a/src/main/java/edu/cmu/graphchi/walks/deprecated/DrunkardMobWithCompanion.java +++ /dev/null @@ -1,451 +0,0 @@ -package edu.cmu.graphchi.walks.deprecated; - -import edu.cmu.graphchi.walks.GrabbedBucketConsumer; -import edu.cmu.graphchi.walks.WalkManager; -import edu.cmu.graphchi.walks.WalkSnapshot; -import edu.cmu.graphchi.walks.WeightedHopper; -import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; -import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; -import com.yammer.metrics.Metrics; -import com.yammer.metrics.core.Timer; -import com.yammer.metrics.core.TimerContext; -import edu.cmu.graphchi.ChiFilenames; -import edu.cmu.graphchi.ChiVertex; -import edu.cmu.graphchi.GraphChiContext; -import edu.cmu.graphchi.GraphChiProgram; -import edu.cmu.graphchi.datablocks.FloatConverter; -import edu.cmu.graphchi.datablocks.IntConverter; -import edu.cmu.graphchi.engine.GraphChiEngine; -import edu.cmu.graphchi.engine.VertexInterval; - -import java.io.File; -import java.rmi.Naming; -import java.rmi.RemoteException; -import java.util.ArrayList; -import java.util.Random; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -/** - * Launch millions (?) of random walks and record the - * hops for each source. Uses a remote DrunkardCompanion to - * keep track of the distribution. - * @author Aapo Kyrola, akyrola@cs.cmu.edu - */ -public class DrunkardMobWithCompanion implements GraphChiProgram, GrabbedBucketConsumer { - - private static final int[] DEBUGIDS = new int[] {0}; - - private WalkManager walkManager; - private WalkSnapshot curWalkSnapshot; - private final RemoteDrunkardCompanion companion; - - private final static double RESETPROB = 0.15; - private LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); - private boolean finished = false; - private Thread dumperThread; - private final Timer purgeTimer = - Metrics.defaultRegistry().newTimer(DrunkardMobWithCompanion.class, "purge-localwalks", TimeUnit.SECONDS, TimeUnit.MINUTES); - - private boolean weighted; - - private AtomicLong pendingWalksToSubmit = new AtomicLong(0); - - public DrunkardMobWithCompanion(String companionAddress, boolean weighted) throws Exception { - this.weighted = weighted; - - if (companionAddress.equals("local")) { - companion = new DrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); - } else { - companion = (RemoteDrunkardCompanion) Naming.lookup(companionAddress); - } - System.out.println("Found companion: " + companion); - - // Launch a thread to send to the companion - dumperThread = new Thread(new Runnable() { - public void run() { - int[] walks = new int[256 * 1024]; - int[] vertices = new int[256 * 1024]; - int idx = 0; - - while(!finished || bucketQueue.size() > 0) { - BucketsToSend bucket = null; - try { - bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - } - if (bucket != null) { - pendingWalksToSubmit.addAndGet(-bucket.length); - for(int i=0; i= walks.length) { - try { - companion.processWalks(walks, vertices); - } catch (Exception err) { - err.printStackTrace(); - } - idx = 0; - } - - } - } - } - - // Send rest - try { - int[] tmpwalks = new int[idx]; - int[] tmpvertices = new int[idx]; - System.arraycopy(walks, 0, tmpwalks, 0, idx); - System.arraycopy(vertices, 0, tmpvertices, 0, idx); - companion.processWalks(tmpwalks, tmpvertices); - } catch (Exception err) { - err.printStackTrace(); - } - } - }); - dumperThread.start(); - } - - private static class BucketsToSend { - int firstVertex; - int[] walks; - int length; - - BucketsToSend(int firstVertex, int[] walks, int length) { - this.firstVertex = firstVertex; - this.walks = walks; - this.length = length; - } - } - - public void consume(int firstVertexInBucket, int[] walkBucket, int len) { - try { - pendingWalksToSubmit.addAndGet(len); - bucketQueue.put(new BucketsToSend(firstVertexInBucket, walkBucket, len)); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - private void initCompanion() throws Exception { - /* Tell companion the sources */ - companion.setSources(walkManager.getSources()); - } - - public void update(ChiVertex vertex, GraphChiContext context) { - - if (context.getThreadLocal() == null) { - LocalWalkBuffer buf = new LocalWalkBuffer(); - context.setThreadLocal(buf); - synchronized (localBuffers) { - localBuffers.add(buf); - } - } - - LocalWalkBuffer localBuf = (LocalWalkBuffer) context.getThreadLocal(); - - try { - // Flow control - while (pendingWalksToSubmit.get() > walkManager.getTotalWalks() / 40) { - System.out.println("Too many walks waiting for delivery: " + pendingWalksToSubmit.get()); - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - } - } - - boolean firstIteration = (context.getIteration() == 0); - int[] walksAtMe = curWalkSnapshot.getWalksAtVertex(vertex.getId(), true); - - for(int j=0; j < DEBUGIDS.length; j++) { - if (vertex.getId() == DEBUGIDS[j]) { - System.out.println(vertex.getId() + " walks: " + walksAtMe.length); - // for(int i=0; i 0) { - - if (weighted) { - hops = (numOutEdges < 16 || walkLength < 8 ? WeightedHopper.generateRandomHopsOut(r, vertex, walkLength) : - WeightedHopper.generateRandomHopsAliasMethodOut(r, vertex, walkLength)); - for(int j=0; j 0 && (firstIteration || Math.random() > RESETPROB)) { - dst = nextHop; - } else { - // Dead end or reset - dst = walkManager.getSourceVertex(walk); - atleastSecondHop = false; - } - localBuf.add(src, dst, atleastSecondHop); - } - } catch (RemoteException re) { - throw new RuntimeException(re); - } - } - - - private class LocalWalkBuffer { - int[] walkBufferDests; - int[] walkSourcesAndHops; - Random random = new Random(); - - int idx = 0; - LocalWalkBuffer() { - walkBufferDests = new int[65536]; - walkSourcesAndHops = new int[65536]; - } - - private void add(int src, int dst, boolean hop) { - if (idx == walkSourcesAndHops.length) { - int[] tmp = walkSourcesAndHops; - walkSourcesAndHops = new int[tmp.length * 2]; - System.arraycopy(tmp, 0, walkSourcesAndHops, 0, tmp.length); - - tmp = walkBufferDests; - walkBufferDests = new int[tmp.length * 2]; - System.arraycopy(tmp, 0, walkBufferDests, 0, tmp.length); - } - walkBufferDests[idx] = dst; - walkSourcesAndHops[idx] = (hop ? -1 : 1) * (1 + src); // Note +1 so zero will be handled correctly - idx++; - } - - private void purge() { - for(int i=0; i 0) { - try { - System.out.println("Waiting ..." + bucketQueue.size()); - Thread.sleep(500); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - try { - dumperThread.join(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - private ArrayList localBuffers = new ArrayList(); - - /** - * At the start of interval - grab the snapshot of walks - */ - public void beginSubInterval(GraphChiContext ctx, final VertexInterval interval) { - long t = System.currentTimeMillis(); - curWalkSnapshot = walkManager.grabSnapshot(interval.getFirstVertex(), interval.getLastVertex()); - System.out.println("Grab snapshot took " + (System.currentTimeMillis() - t) + " ms."); - - while(localBuffers.size() > 0) { - try { - Thread.sleep(100); - } catch (InterruptedException e) { - } - System.out.println("Waiting for purge to finish..."); - } - } - - public void endSubInterval(GraphChiContext ctx, final VertexInterval interval) { - curWalkSnapshot.restoreUngrabbed(); - curWalkSnapshot = null; // Release memory - - /* Purge local buffers */ - /* TODO: do in separate thread */ - Thread t = new Thread(new Runnable() { - public void run() { - synchronized (localBuffers) { - final TimerContext _timer = purgeTimer.time(); - for (LocalWalkBuffer buf : localBuffers) { - buf.purge(); - } - localBuffers.clear(); - _timer.stop(); - } - }}); - t.start(); - } - - public void beginInterval(GraphChiContext ctx, VertexInterval interval) { - /* Count walks */ - long initializedWalks = walkManager.getTotalWalks(); - long activeWalks = walkManager.getNumOfActiveWalks(); - - System.out.println("====================================="); - System.out.println("Active walks: " + activeWalks + ", initialized=" + initializedWalks); - System.out.println("====================================="); - - walkManager.populateSchedulerForInterval(ctx.getScheduler(), interval); - walkManager.setBucketConsumer(this); - } - - public void endInterval(GraphChiContext ctx, VertexInterval interval) {} - - public static void main(String[] args) throws Exception { - - String baseFilename = args[0]; - - if (args.length > 1) { - int nShards = Integer.parseInt(args[1]); - int nSources = Integer.parseInt(args[2]); - int walksPerSource = Integer.parseInt(args[3]); - int maxHops = Integer.parseInt(args[4]); - int firstSource = Integer.parseInt(args[5]); - String companionAddress = args[6]; - boolean weightedGraph = (1 == Integer.parseInt(args[7])); - - - System.out.println("Walks will start from vertices " + firstSource + " -- " + (firstSource + nSources - 1) ); - System.out.println("Going to start " + walksPerSource + " walks per source."); - System.out.println("Max hops: " + maxHops); - System.out.println("Companion: " + companionAddress); - System.out.println("Weighted: " + weightedGraph); - - /* Delete vertex data */ - File vertexDataFile = new File(ChiFilenames.getFilenameOfVertexData(baseFilename, new IntConverter(), false)); - if (vertexDataFile.exists()) { - vertexDataFile.delete(); - } - - /* Initialize GraphChi engine */ - GraphChiEngine engine = new GraphChiEngine(baseFilename, nShards); - engine.setEdataConverter(weightedGraph ? new FloatConverter() : null); - engine.setModifiesInedges(false); - engine.setModifiesOutedges(false); - engine.setEnableScheduler(true); - engine.setOnlyAdjacency(!weightedGraph); - engine.setDisableInedges(true); - - int memoryBudget = 1200; - if (System.getProperty("membudget") != null) memoryBudget = Integer.parseInt(System.getProperty("membudget")); - - System.out.println("Memory budget: " + memoryBudget); - engine.setMemoryBudgetMb(memoryBudget); - engine.setEnableDeterministicExecution(false); - engine.setAutoLoadNext(false); - engine.setVertexDataConverter(null); - engine.setMaxWindow(10000000); // Handle maximum 10M vertices a time. - - long t1 = System.currentTimeMillis(); - - /* Initialize application object */ - DrunkardMobWithCompanion mob = new DrunkardMobWithCompanion(companionAddress, weightedGraph); - - /* Initialize Random walks */ - int nVertices = engine.numVertices(); - mob.walkManager = new WalkManager(nVertices, nSources); - - for(int i=0; i < nSources; i++) { - mob.walkManager.addWalkBatch(i + firstSource, walksPerSource); - } - - System.out.println("Initializing walks..."); - mob.walkManager.initializeWalks(); - - mob.initCompanion(); - - System.out.println("Configured " + mob.walkManager.getTotalWalks() + " walks in " + - (System.currentTimeMillis() - t1) + " ms"); - - - /* Run */ - engine.run(mob, maxHops + 1); - - // TODO: ensure that we have sent all walks! - mob.spinUntilFinish(); - - mob.companion.outputDistributions(new File(baseFilename).getName() + "_" + firstSource); - - } - - } -} diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java index 3b64411d..308bc10d 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/DrunkardCompanion.java @@ -1,7 +1,7 @@ package edu.cmu.graphchi.walks.distributions; import edu.cmu.graphchi.ChiLogger; -import edu.cmu.graphchi.walks.WalkManager; +import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.util.IdCount; import edu.cmu.graphchi.util.IntegerBuffer; @@ -25,13 +25,13 @@ * Done partially during internship at Twitter, Fall 2012 * @author Aapo Kyrola, akyrola@cs.cmu.edu */ -public class DrunkardCompanion extends UnicastRemoteObject implements RemoteDrunkardCompanion { +public abstract class DrunkardCompanion extends UnicastRemoteObject implements RemoteDrunkardCompanion { protected static class WalkSubmission { - int[] walks; + WalkArray walks; int[] atVertices; - private WalkSubmission(int[] walks, int[] atVertices) { + private WalkSubmission(WalkArray walks, int[] atVertices) { this.walks = walks; this.atVertices = atVertices; } @@ -158,7 +158,7 @@ public void run() { WalkSubmission subm = pendingQueue.poll(2000, TimeUnit.MILLISECONDS); if (subm != null) { _processWalks(subm.walks, subm.atVertices); - unpurgedWalks += subm.walks.length; + unpurgedWalks += subm.walks.size(); } if (sourceVertexIds != null) { if (unpurgedWalks > sourceVertexIds.length * 10 || (subm == null && unpurgedWalks > 100000)) { @@ -270,31 +270,7 @@ public void run() { }, 5000, 60000); } - - - - - protected void _processWalks(int[] walks, int[] atVertices) { - long t1 = System.currentTimeMillis(); - for(int i=0; i < walks.length; i++) { - int w = walks[i]; - int atVertex = atVertices[i]; - int sourceIdx = WalkManager.sourceIdx(w); - - if (atVertex == sourceVertexIds[sourceIdx]) { - continue; - } - - synchronized (buffers[sourceIdx]) { - buffers[sourceIdx].add(atVertex); - } - } - - long tt = (System.currentTimeMillis() - t1); - if (tt > 1000) { - logger.info("Processing " + walks.length + " took " + tt + " ms."); - } - } + protected abstract void _processWalks(WalkArray walkArray, int[] atVertices); @Override public IdCount[] getTop(int vertexId, int nTop) throws RemoteException { @@ -320,7 +296,7 @@ protected void drainBuffer(int sourceIdx) { } @Override - public void processWalks(final int[] walks, final int[] atVertices) throws RemoteException { + public void processWalks(final WalkArray walks, final int[] atVertices) throws RemoteException { try { pendingQueue.put(new WalkSubmission(walks, atVertices)); int pending = pendingQueue.size(); @@ -392,7 +368,9 @@ public static void main(String[] args) throws Exception { } catch (Exception err) { logger.info("Registry already created?"); } - Naming.rebind(bindAddress, new DrunkardCompanion(4, (long) (Runtime.getRuntime().maxMemory() * 0.75))); + // TODO? Not sure what the main class is used for; just for testing? This may need to be + // put into the subclass. + Naming.rebind(bindAddress, new IntDrunkardCompanion(4, (long) (Runtime.getRuntime().maxMemory() * 0.75))); logger.info("Prune fraction: " + pruneFraction); } diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java new file mode 100644 index 00000000..1673e0f8 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/IntDrunkardCompanion.java @@ -0,0 +1,42 @@ +package edu.cmu.graphchi.walks.distributions; + +import edu.cmu.graphchi.walks.IntWalkManager; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.IntWalkArray; + +import java.rmi.RemoteException; + +public class IntDrunkardCompanion extends DrunkardCompanion { + private IntWalkManager manager; + + public IntDrunkardCompanion( final int numThreads, final long maxMemoryBytes) + throws RemoteException { + super(numThreads, maxMemoryBytes); + // TODO: may be better to pass this in... + manager = new IntWalkManager(0, 0); + } + + @Override + protected void _processWalks(WalkArray walkArray, int[] atVertices) { + int[] walks = ((IntWalkArray)walkArray).getArray(); + long t1 = System.currentTimeMillis(); + for(int i=0; i < walks.length; i++) { + int w = walks[i]; + int atVertex = atVertices[i]; + int sourceIdx = manager.sourceIdx(w); + + if (atVertex == sourceVertexIds[sourceIdx]) { + continue; + } + + synchronized (buffers[sourceIdx]) { + buffers[sourceIdx].add(atVertex); + } + } + + long tt = (System.currentTimeMillis() - t1); + if (tt > 1000) { + logger.info("Processing " + walks.length + " took " + tt + " ms."); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java new file mode 100644 index 00000000..a20cee01 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/LongDrunkardCompanion.java @@ -0,0 +1,42 @@ +package edu.cmu.graphchi.walks.distributions; + +import edu.cmu.graphchi.walks.LongWalkManager; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.LongWalkArray; + +import java.rmi.RemoteException; + +public class LongDrunkardCompanion extends DrunkardCompanion { + private LongWalkManager manager; + + public LongDrunkardCompanion( final int numThreads, final long maxMemoryBytes) + throws RemoteException { + super(numThreads, maxMemoryBytes); + // TODO: may be better to pass this in... + manager = new LongWalkManager(0, 0); + } + + @Override + protected void _processWalks(WalkArray walkArray, int[] atVertices) { + long[] walks = ((LongWalkArray)walkArray).getArray(); + long t1 = System.currentTimeMillis(); + for(int i=0; i < walks.length; i++) { + long w = walks[i]; + int atVertex = atVertices[i]; + int sourceIdx = manager.sourceIdx(w); + + if (atVertex == sourceVertexIds[sourceIdx]) { + continue; + } + + synchronized (buffers[sourceIdx]) { + buffers[sourceIdx].add(atVertex); + } + } + + long tt = (System.currentTimeMillis() - t1); + if (tt > 1000) { + logger.info("Processing " + walks.length + " took " + tt + " ms."); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java index 6365523b..c5c0afd9 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/RemoteDrunkardCompanion.java @@ -1,6 +1,7 @@ package edu.cmu.graphchi.walks.distributions; import edu.cmu.graphchi.util.IdCount; +import edu.cmu.graphchi.walks.WalkArray; import java.rmi.Remote; import java.rmi.RemoteException; @@ -20,7 +21,7 @@ public interface RemoteDrunkardCompanion extends Remote { void setSources(int[] sources) throws RemoteException; - void processWalks(int[] walks, int[] atVertices) throws RemoteException; + void processWalks(WalkArray walks, int[] atVertices) throws RemoteException; void outputDistributions(String outputFile) throws RemoteException; diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java new file mode 100644 index 00000000..b84ea876 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java @@ -0,0 +1,415 @@ +package edu.cmu.graphchi.walks.distributions; + +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.walks.WalkArray; +import edu.cmu.graphchi.walks.LongWalkArray; +import edu.cmu.graphchi.walks.distributions.DiscreteDistribution; +import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; +import edu.cmu.graphchi.util.IdCount; +import edu.cmu.graphchi.util.IntegerBuffer; + +import java.io.*; +import java.rmi.Naming; +import java.rmi.RemoteException; +import java.rmi.registry.LocateRegistry; +import java.rmi.server.UnicastRemoteObject; +import java.text.NumberFormat; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; + + +/** + * A DrunkardCompanion object that has two keys to get to a DiscreteDistribution, instead of one. + * Where DrunkardCompanion represents a matrix of values (one key to get to a DiscreteDistribution + * vector), this represents a rank 3 tensor (two keys to get to a DiscreteDistribution). This is + * suitable for collecting more complicated statistics than DrunkardCompanion, though the current + * implementation is perhaps a little slower than it could be, using nested hash maps instead of a + * more efficient data structure. + */ +public abstract class TwoKeyCompanion extends UnicastRemoteObject + implements RemoteDrunkardCompanion { + + protected static class WalkSubmission { + WalkArray walks; + int[] atVertices; + + private WalkSubmission(WalkArray walks, int[] atVertices) { + this.walks = walks; + this.atVertices = atVertices; + } + } + + protected static final int BUFFER_CAPACITY = 128; + protected static final int BUFFER_MAX = 128; + + boolean isLowInMemory = false; + + // Using hash maps of hash maps isn't the most efficient thing to do here, but it'll do for + // now. + protected ConcurrentHashMap> distributions; + protected ConcurrentHashMap> buffers; + protected ConcurrentHashMap> distrLocks; + protected AtomicInteger outstanding = new AtomicInteger(0); + + protected ExecutorService parallelExecutor; + protected long maxMemoryBytes; + + protected LinkedBlockingQueue pendingQueue = new LinkedBlockingQueue(); + + protected static Logger logger = ChiLogger.getLogger("pathcompanion"); + protected Timer timer = new Timer(true); + + /** + * Prints estimate of memory usage + */ + private long memoryAuditReport() { + long companionOverHeads = 0; + + long bufferMem = 0; + long maxMem = 0; + int bufferCount = 0; + for (ConcurrentHashMap map : buffers.values()) { + companionOverHeads += 4; + for(IntegerBuffer buf : map.values()) { + bufferCount += 1; + companionOverHeads += 4; + long est = buf.memorySizeEst(); + bufferMem += est; + maxMem = Math.max(maxMem, est); + } + } + + long distributionMem = 0; + long maxDistMem = 0; + long avoidMem = 0; + int distCount = 0; + for (ConcurrentHashMap map : distributions.values()) { + companionOverHeads += 4; + for(DiscreteDistribution dist : map.values()) { + distCount += 1; + companionOverHeads += 4; + long est = dist.memorySizeEst(); + distributionMem += est; + maxDistMem = Math.max(est, maxDistMem); + avoidMem += dist.avoidCount() * 6; + } + } + + NumberFormat nf = NumberFormat.getInstance(Locale.US); + + logger.info("======= MEMORY REPORT ======"); + logger.info("Companion internal: " + nf.format(companionOverHeads / 1024. / 1024.) + " mb"); + + logger.info("Buffer mem: " + nf.format(bufferMem / 1024. / 1024.) + " mb"); + logger.info("Avg bytes per buffer: " + + nf.format(bufferMem * 1.0 / bufferCount / 1024.) + " kb"); + logger.info("Max buffer was: " + nf.format(maxMem / 1024.) + "kb"); + + logger.info("Distribution mem: " + nf.format(distributionMem / 1024. / 1024.) + " mb"); + logger.info("- of which avoids: " + nf.format(avoidMem / 1024. / 1024.) + " mb"); + + logger.info("Avg bytes per distribution: " + + nf.format((distributionMem * 1.0 / distCount / 1024.)) + " kb"); + logger.info("Max distribution: " + nf.format(maxDistMem / 1024.) + " kb"); + + long totalMem = companionOverHeads + bufferMem + distributionMem; + logger.info("** Total: " + nf.format(totalMem / 1024. / 1024. / 1024.) + + " GB (low-mem limit " + + Runtime.getRuntime().maxMemory() * 0.75 / 1024. / 1024. / 1024. + "GB)" ); + isLowInMemory = totalMem > maxMemoryBytes; + + if (isLowInMemory) { + compactMemoryUsage(); + } + + return totalMem; + } + + /** + * Removes tails from distributions to save memory + */ + private void compactMemoryUsage() { + long before=0; + long after=0; + + for (Integer firstKey : distributions.keySet()) { + ConcurrentHashMap map = distributions.get(firstKey); + for (Integer secondKey : map.keySet()) { + DiscreteDistribution prevDist, newDist; + synchronized (distrLocks.get(firstKey).get(secondKey)) { + prevDist = map.get(secondKey); + newDist = prevDist.filteredAndShift(2); + map.put(secondKey, newDist); + } + before += prevDist.memorySizeEst(); + after += newDist.memorySizeEst(); + } + } + + logger.info("** Compacted: " + (before / 1024. / 1024. / 1024.) + " GB --> " + + (after / 1024. / 1024. / 1024.) + " GB"); + } + + + /** + * Creates the TwoKeyCompanion object + * @param numThreads number of worker threads (4 is common) + * @param maxMemoryBytes maximum amount of memory to use for storing the distributions + */ + public TwoKeyCompanion(int numThreads, long maxMemoryBytes) throws RemoteException { + this.maxMemoryBytes = maxMemoryBytes; + parallelExecutor = + Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + + buffers = new ConcurrentHashMap>(); + distrLocks = new ConcurrentHashMap>(); + distributions = new ConcurrentHashMap>(); + + + for(int threadId=0; threadId < numThreads; threadId++) { + Thread processingThread = new Thread(new ProcessingThread(threadId, numThreads)); + processingThread.setDaemon(true); + processingThread.start(); + } + } + + private class ProcessingThread implements Runnable { + private int id; + private int numThreads; + + public ProcessingThread(int id, int numThreads) { + this.id = id; + this.numThreads = numThreads; + } + @Override + public void run() { + try { + long unpurgedWalks = 0; + while(true) { + WalkSubmission subm = pendingQueue.poll(2000, TimeUnit.MILLISECONDS); + if (subm != null) { + _processWalks(subm.walks, subm.atVertices); + unpurgedWalks += subm.walks.size(); + } + if (distributions != null) { + if (unpurgedWalks > distributions.size() * 10 || + (subm == null && unpurgedWalks > 100000)) { + logger.fine("Purge:" + unpurgedWalks); + unpurgedWalks = 0; + + // Loop to see what to drain. Every thread looks for + // different buffers. + for (Integer firstKey : buffers.keySet()) { + ConcurrentHashMap map = + buffers.get(firstKey); + for (Integer secondKey : map.keySet()) { + if ((firstKey + secondKey) % numThreads != id) { + continue; + } + // Drain asynchronously + outstanding.incrementAndGet(); + final IntegerBuffer toDrain = map.get(secondKey); + final int first = firstKey; + final int second = secondKey; + + synchronized (toDrain) { + map.put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); + } + parallelExecutor.submit(new Runnable() { public void run() { + try { + int[] d = toDrain.toIntArray(); + Arrays.sort(d); + DiscreteDistribution dist = new DiscreteDistribution(d); + mergeWith(first, second, dist); + } catch (Exception err ) { + err.printStackTrace(); + } finally { + outstanding.decrementAndGet(); + } + }}); + } + } + } + } + } + } catch (Exception err) { + err.printStackTrace(); + } + } + } + + protected void ensureExists(int firstKey, int secondKey) { + ConcurrentHashMap map = distrLocks.get(firstKey); + if (map == null) { + ConcurrentHashMap new_map = new ConcurrentHashMap(); + map = distrLocks.putIfAbsent(firstKey, new_map); + if (map == null) { + map = new_map; + } + } + Object lock = map.get(secondKey); + if (lock == null) { + Object new_lock = new Object(); + lock = map.putIfAbsent(secondKey, new_lock); + if (lock == null) { + synchronized(new_lock) { + ConcurrentHashMap dmap = + distributions.get(firstKey); + if (dmap == null) { + dmap = new ConcurrentHashMap(); + distributions.put(firstKey, dmap); + } + dmap.put(secondKey, new DiscreteDistribution()); + ConcurrentHashMap bmap = buffers.get(firstKey); + if (bmap == null) { + bmap = new ConcurrentHashMap(); + buffers.put(firstKey, bmap); + } + bmap.put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); + } + } + } + } + + private void mergeWith(int firstKey, int secondKey, DiscreteDistribution distr) { + ensureExists(firstKey, secondKey); + synchronized (distrLocks.get(firstKey).get(secondKey)) { + DiscreteDistribution mergeInto = distributions.get(firstKey).get(secondKey); + DiscreteDistribution merged = DiscreteDistribution.merge(mergeInto, distr); + distributions.get(firstKey).put(secondKey, merged); + } + } + + @Override + public void setAvoidList(int sourceIdx, int[] avoidList) throws RemoteException { + // We don't need this, so this is a no-op + } + + @Override + public IdCount[] getTop(int vertexId, int nTop) throws RemoteException { + // Not really useful for us + return null; + } + + @Override + public void setSources(int[] sources) throws RemoteException { + // We don't use an array of source indices, so we just take the opportunity to initialize + // our objects. + + // Restart timer + timer.cancel(); + timer = new Timer(true); + + timer.schedule(new TimerTask() { + @Override + public void run() { + memoryAuditReport(); + } + }, 5000, 60000); + } + + protected void _processWalks(WalkArray walkArray, int[] atVertices) { + long[] walks = ((LongWalkArray)walkArray).getArray(); + long t1 = System.currentTimeMillis(); + for(int i=0; i < walks.length; i++) { + long w = walks[i]; + if (ignoreWalk(w)) { + continue; + } + int atVertex = atVertices[i]; + int firstKey = getFirstKey(w, atVertex); + int secondKey = getSecondKey(w, atVertex); + int value = getValue(w, atVertex); + + ensureExists(firstKey, secondKey); + IntegerBuffer buffer = buffers.get(firstKey).get(secondKey); + synchronized (buffer) { + buffer.add(value); + } + } + + long tt = (System.currentTimeMillis() - t1); + if (tt > 1000) { + logger.info("Processing " + walks.length + " took " + tt + " ms."); + } + } + + protected boolean ignoreWalk(long walk) { + if (walk == 0) { + return true; + } + return false; + } + + protected abstract int getFirstKey(long walk, int atVertex); + + protected abstract int getSecondKey(long walk, int atVertex); + + protected abstract int getValue(long walk, int atVertex); + + protected void drainBuffer(int firstKey, int secondKey) { + IntegerBuffer buffer = buffers.get(firstKey).get(secondKey); + int[] arr; + synchronized (buffer) { + arr = buffer.toIntArray(); + buffers.get(firstKey).put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); + } + Arrays.sort(arr); + DiscreteDistribution dist = new DiscreteDistribution(arr); + mergeWith(firstKey, secondKey, dist); + } + + @Override + public void processWalks(final WalkArray walks, final int[] atVertices) throws RemoteException { + try { + pendingQueue.put(new WalkSubmission(walks, atVertices)); + int pending = pendingQueue.size(); + if (pending > 50 && pending % 20 == 0) { + logger.info("Warning, pending queue size: " + pending); + } + } catch (Exception err) { + err.printStackTrace(); + } + } + + protected void waitForFinish() { + logger.info("Waiting for processing to finish"); + while (pendingQueue.size() > 0) { + logger.info("..."); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + while(outstanding.get() > 0) { + logger.info("..."); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + + @Override + public abstract void outputDistributions(String outputFile) throws RemoteException; + + @Override + public void outputDistributions(String outputFile, int nTop) throws RemoteException { + outputDistributions(outputFile); + } + + public void close() { + parallelExecutor.shutdown(); + timer.cancel(); + } +} diff --git a/test/edu/cmu/graphchi/walks/TestWalkManager.java b/test/edu/cmu/graphchi/walks/TestWalkManager.java index b79974de..1849059a 100644 --- a/test/edu/cmu/graphchi/walks/TestWalkManager.java +++ b/test/edu/cmu/graphchi/walks/TestWalkManager.java @@ -13,60 +13,111 @@ public class TestWalkManager { + // There's a lot of duplicated code in here to separately test the int and long versions of + // WalkManager; that could probably be fixed, to just test the parts that are necessary for + // each one... TODO + @Test - public void testWalkEncodings() { - WalkManager wmgr = new WalkManager(1000, 10000); + public void testIntWalkEncodings() { + IntWalkManager wmgr = new IntWalkManager(1000, 10000); int x = wmgr.encode(3, true, 114); System.out.println("X = " + x); - boolean hop = wmgr.hop(x); + boolean trackBit = wmgr.trackBit(x); int src = wmgr.sourceIdx(x); int off = wmgr.off(x); assertEquals(3, src); - assertEquals(true, hop); + assertEquals(true, trackBit); assertEquals(114, off); x = wmgr.encode(16777200, false, 126); - hop = wmgr.hop(x); + trackBit = wmgr.trackBit(x); src = wmgr.sourceIdx(x); off = wmgr.off(x); assertEquals(16777200, src); - assertEquals(false, hop); + assertEquals(false, trackBit); assertEquals(126, off); for(int v=0; v<15000000; v+=29) { for (int o=0; o<128; o++) { - x = WalkManager.encode(v, true, o); - int y = WalkManager.encode(v, false, o); - assertEquals(v, WalkManager.sourceIdx(x)); - assertEquals(v, WalkManager.sourceIdx(y)); + x = wmgr.encode(v, true, o); + int y = wmgr.encode(v, false, o); + assertEquals(v, wmgr.sourceIdx(x)); + assertEquals(v, wmgr.sourceIdx(y)); - assertEquals(o, WalkManager.off(x)); - assertEquals(o, WalkManager.off(y)); + assertEquals(o, wmgr.off(x)); + assertEquals(o, wmgr.off(y)); - assertEquals(true, WalkManager.hop(x)); - assertEquals(false, WalkManager.hop(y)); + assertEquals(true, wmgr.trackBit(x)); + assertEquals(false, wmgr.trackBit(y)); } } x = wmgr.encode(16367, true, 0); - hop = wmgr.hop(x); + trackBit = wmgr.trackBit(x); src = wmgr.sourceIdx(x); off = wmgr.off(x); assertEquals(16367, src); - assertEquals(true, hop); + assertEquals(true, trackBit); assertEquals(0, off); } + @Test + public void testLongWalkEncodings() { + LongWalkManager wmgr = new LongWalkManager(1000, 10000); + long x = wmgr.encode(3, true, 114); + + System.out.println("X = " + x); + + boolean trackBit = wmgr.trackBit(x); + int src = wmgr.sourceIdx(x); + int off = wmgr.off(x); + assertEquals(3, src); + assertEquals(true, trackBit); + assertEquals(114, off); + + x = wmgr.encode(16777200, false, 126); + trackBit = wmgr.trackBit(x); + src = wmgr.sourceIdx(x); + off = wmgr.off(x); + assertEquals(16777200, src); + assertEquals(false, trackBit); + assertEquals(126, off); + + + for(int v=0; v<15000000; v+=29) { + for (int o=0; o<128; o++) { + x = wmgr.encode(v, true, o); + long y = wmgr.encode(v, false, o); + assertEquals(v, wmgr.sourceIdx(x)); + assertEquals(v, wmgr.sourceIdx(y)); + + assertEquals(o, wmgr.off(x)); + assertEquals(o, wmgr.off(y)); + + assertEquals(true, wmgr.trackBit(x)); + assertEquals(false, wmgr.trackBit(y)); + } + } + + + x = wmgr.encode(16367, true, 0); + trackBit = wmgr.trackBit(x); + src = wmgr.sourceIdx(x); + off = wmgr.off(x); + assertEquals(16367, src); + assertEquals(true, trackBit); + assertEquals(0, off); + } @Test - public void testWalkManager() throws IOException { + public void testIntWalkManager() throws IOException { int nvertices = 33333; - WalkManager wmgr = new WalkManager(nvertices, 40000); + IntWalkManager wmgr = new IntWalkManager(nvertices, 40000); int tot = 0; for(int j=877; j < 3898; j++) { wmgr.addWalkBatch(j, (j % 100) + 10); @@ -80,12 +131,12 @@ public void testWalkManager() throws IOException { // Now get two snapshots WalkSnapshot snapshot1 = wmgr.grabSnapshot(890, 1300); for(int j=890; j <= 1300; j++) { - int[] vertexwalks = snapshot1.getWalksAtVertex(j, true); - assertEquals((j % 100) + 10, WalkManager.getWalkLength(vertexwalks)); + WalkArray vertexwalks = snapshot1.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); - for(int w : vertexwalks) { + for(int w : ((IntWalkArray)vertexwalks).getArray()) { if (w != -1) - assertEquals(false, wmgr.hop(w)); + assertEquals(false, wmgr.trackBit(w)); } } assertEquals(890, snapshot1.getFirstVertex()); @@ -94,26 +145,26 @@ public void testWalkManager() throws IOException { // Next snapshot should be empty WalkSnapshot snapshot2 = wmgr.grabSnapshot(890, 1300); for(int j=890; j <= 1300; j++) { - int[] vertexwalks = snapshot2.getWalksAtVertex(j, true); + WalkArray vertexwalks = snapshot2.getWalksAtVertex(j, true); assertNull(vertexwalks); } WalkSnapshot snapshot3 = wmgr.grabSnapshot(877, 889); for(int j=877; j <= 889; j++) { - int[] vertexwalks = snapshot3.getWalksAtVertex(j, true); - assertEquals((j % 100) + 10, WalkManager.getWalkLength(vertexwalks)); + WalkArray vertexwalks = snapshot3.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); } WalkSnapshot snapshot4 = wmgr.grabSnapshot(877, 889); for(int j=877; j <= 889; j++) { - int[] vertexwalks = snapshot4.getWalksAtVertex(j, true); + WalkArray vertexwalks = snapshot4.getWalksAtVertex(j, true); assertNull(vertexwalks); } WalkSnapshot snapshot5 = wmgr.grabSnapshot(1301, 3898); for(int j=1301; j < 3898; j++) { - int[] vertexwalks = snapshot5.getWalksAtVertex(j, true); - assertEquals((j % 100) + 10, WalkManager.getWalkLength(vertexwalks)); + WalkArray vertexwalks = snapshot5.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); } @@ -122,26 +173,108 @@ public void testWalkManager() throws IOException { WalkSnapshot snapshot6 = wmgr.grabSnapshot(1301, 3898); for(int j=1301; j < 3898; j++) { - int[] vertexwalks = snapshot6.getWalksAtVertex(j, true); + WalkArray vertexwalks = snapshot6.getWalksAtVertex(j, true); assertNull(vertexwalks); } /* Then update some walks */ - wmgr.updateWalk(88, 22098, true); - wmgr.updateWalk(41, 76, false); + int w = wmgr.encode(41, false, 0); + wmgr.moveWalk(w, 76, false); + w = wmgr.encode(88, false, 0); + wmgr.moveWalk(w, 22098, true); WalkSnapshot snapshot7 = wmgr.grabSnapshot(76, 22098); - int[] w1 = snapshot7.getWalksAtVertex(76, true); - assertEquals(1, WalkManager.getWalkLength(w1)); - int w = w1[0]; + WalkArray w1 = snapshot7.getWalksAtVertex(76, true); + assertEquals(1, wmgr.getWalkLength(w1)); + w = ((IntWalkArray)w1).getArray()[0]; assertEquals(41, wmgr.sourceIdx(w)); - assertEquals(false, wmgr.hop(w)); + assertEquals(false, wmgr.trackBit(w)); - int[] w2 = snapshot7.getWalksAtVertex(22098, true); - w = w2[0]; + WalkArray w2 = snapshot7.getWalksAtVertex(22098, true); + w = ((IntWalkArray)w2).getArray()[0]; assertEquals(88, wmgr.sourceIdx(w)); - assertEquals(true, wmgr.hop(w)); - + assertEquals(true, wmgr.trackBit(w)); } + @Test + public void testLongWalkManager() throws IOException { + int nvertices = 33333; + LongWalkManager wmgr = new LongWalkManager(nvertices, 40000); + int tot = 0; + for(int j=877; j < 3898; j++) { + wmgr.addWalkBatch(j, (j % 100) + 10); + tot += (j % 100) + 10; + } + + wmgr.initializeWalks(); + + assertEquals(tot, wmgr.getTotalWalks()); + + // Now get two snapshots + WalkSnapshot snapshot1 = wmgr.grabSnapshot(890, 1300); + for(int j=890; j <= 1300; j++) { + WalkArray vertexwalks = snapshot1.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); + + for(long w : ((LongWalkArray)vertexwalks).getArray()) { + if (w != -1) + assertEquals(false, wmgr.trackBit(w)); + } + } + assertEquals(890, snapshot1.getFirstVertex()); + assertEquals(1300, snapshot1.getLastVertex()); + + // Next snapshot should be empty + WalkSnapshot snapshot2 = wmgr.grabSnapshot(890, 1300); + for(int j=890; j <= 1300; j++) { + WalkArray vertexwalks = snapshot2.getWalksAtVertex(j, true); + assertNull(vertexwalks); + } + + WalkSnapshot snapshot3 = wmgr.grabSnapshot(877, 889); + for(int j=877; j <= 889; j++) { + WalkArray vertexwalks = snapshot3.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); + } + + WalkSnapshot snapshot4 = wmgr.grabSnapshot(877, 889); + for(int j=877; j <= 889; j++) { + WalkArray vertexwalks = snapshot4.getWalksAtVertex(j, true); + assertNull(vertexwalks); + } + + WalkSnapshot snapshot5 = wmgr.grabSnapshot(1301, 3898); + for(int j=1301; j < 3898; j++) { + WalkArray vertexwalks = snapshot5.getWalksAtVertex(j, true); + assertEquals((j % 100) + 10, wmgr.getWalkLength(vertexwalks)); + } + + + // wmgr.dumpToFile(snapshot5, "tmp/snapshot5"); + + + WalkSnapshot snapshot6 = wmgr.grabSnapshot(1301, 3898); + for(int j=1301; j < 3898; j++) { + WalkArray vertexwalks = snapshot6.getWalksAtVertex(j, true); + assertNull(vertexwalks); + } + + /* Then update some walks */ + long w = wmgr.encode(41, false, 0); + wmgr.moveWalk(w, 76, false); + w = wmgr.encode(88, false, 0); + wmgr.moveWalk(w, 22098, true); + + WalkSnapshot snapshot7 = wmgr.grabSnapshot(76, 22098); + WalkArray w1 = snapshot7.getWalksAtVertex(76, true); + assertEquals(1, wmgr.getWalkLength(w1)); + w = ((LongWalkArray)w1).getArray()[0]; + assertEquals(41, wmgr.sourceIdx(w)); + assertEquals(false, wmgr.trackBit(w)); + + WalkArray w2 = snapshot7.getWalksAtVertex(22098, true); + w = ((LongWalkArray)w2).getArray()[0]; + assertEquals(88, wmgr.sourceIdx(w)); + assertEquals(true, wmgr.trackBit(w)); + } } diff --git a/test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java b/test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java deleted file mode 100644 index 8aafd6f7..00000000 --- a/test/edu/cmu/graphchi/walks/TestWalkManagerWithPaths.java +++ /dev/null @@ -1,121 +0,0 @@ -package edu.cmu.graphchi.walks; - -import org.junit.Test; - -import java.io.IOException; - -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNull; - -/** - * @author Aapo Kyrola, akyrola@cs.cmu.edu, akyrola@twitter.com - */ -public class TestWalkManagerWithPaths { - - @Test - public void testWalkEncodings() { - WalkManagerForPaths wmgr = new WalkManagerForPaths(1000); - long x = wmgr.encode(3, 2, 284); - int hop = wmgr.hop(x); - int id = wmgr.walkId(x); - int off = wmgr.off(x); - assertEquals(3, id); - assertEquals(2, hop); - assertEquals(284, off); - - x = wmgr.encode(878, 0, 999); - hop = wmgr.hop(x); - id = wmgr.walkId(x); - off = wmgr.off(x); - assertEquals(878, id); - assertEquals(0, hop); - assertEquals(999, off); - - x = wmgr.encode(1987000001, 8, 0); - hop = wmgr.hop(x); - id = wmgr.walkId(x); - off = wmgr.off(x); - assertEquals(1987000001, id); - assertEquals(8, hop); - assertEquals(0, off); - } - - - - @Test - public void testWalkManagerWithPaths() throws IOException { - int nvertices = 33333; - WalkManagerForPaths wmgr = new WalkManagerForPaths(nvertices); - int tot = 0; - for(int j=877; j < 3898; j++) { - wmgr.addWalkBatch(j, (j % 100) + 10); - tot += (j % 100) + 10; - } - - wmgr.initializeWalks(); - - assertEquals(tot, wmgr.getTotalWalks()); - - // Now get two snapshots - WalkSnapshotForPaths snapshot1 = wmgr.grabSnapshot(890, 1300); - for(int j=890; j <= 1300; j++) { - long[] vertexwalks = snapshot1.getWalksAtVertex(j); - assertEquals((j % 100) + 10, vertexwalks.length); - - for(long w : vertexwalks) { - if (w != -1) - assertEquals(0, wmgr.hop(w)); - } - } - assertEquals(890, snapshot1.getFirstVertex()); - assertEquals(1300, snapshot1.getLastVertex()); - - // Next snapshot should be empty - WalkSnapshotForPaths snapshot2 = wmgr.grabSnapshot(890, 1300); - for(int j=890; j <= 1300; j++) { - long[] vertexwalks = snapshot2.getWalksAtVertex(j); - assertNull(vertexwalks); - } - - WalkSnapshotForPaths snapshot3 = wmgr.grabSnapshot(877, 889); - for(int j=877; j <= 889; j++) { - long[] vertexwalks = snapshot3.getWalksAtVertex(j); - assertEquals((j % 100) + 10, vertexwalks.length); - } - - WalkSnapshotForPaths snapshot4 = wmgr.grabSnapshot(877, 889); - for(int j=877; j <= 889; j++) { - long[] vertexwalks = snapshot4.getWalksAtVertex(j); - assertNull(vertexwalks); - } - - WalkSnapshotForPaths snapshot5 = wmgr.grabSnapshot(1301, 3898); - for(int j=1301; j < 3898; j++) { - long[] vertexwalks = snapshot5.getWalksAtVertex(j); - assertEquals((j % 100) + 10, vertexwalks.length); - } - // wmgr.dumpToFile(snapshot5, "snapshot5"); - - - WalkSnapshotForPaths snapshot6 = wmgr.grabSnapshot(1301, 3898); - for(int j=1301; j < 3898; j++) { - long[] vertexwalks = snapshot6.getWalksAtVertex(j); - assertNull(vertexwalks); - } - - /* Then update some walks */ - wmgr.updateWalk(88, 22098, 5); - wmgr.updateWalk(41, 76, 3); - - WalkSnapshotForPaths snapshot7 = wmgr.grabSnapshot(76, 22098); - long[] w1 = snapshot7.getWalksAtVertex(76); - assertEquals(1, w1.length); - long w = w1[0]; - assertEquals(3, wmgr.hop(w)); - - long[] w2 = snapshot7.getWalksAtVertex(22098); - w = w2[0]; - assertEquals(5, wmgr.hop(w)); - - } -} From 00a36dae85a466fa08e78a1484342ba478da8ddf Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Tue, 26 Aug 2014 14:55:40 -0700 Subject: [PATCH 17/29] Updated the README, and added deployment stuff to pom.xml --- README.md | 21 ++++ pom.xml | 286 +++++++++++++++++++++++++++++++----------------------- 2 files changed, 185 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index 823fdb37..9b740b66 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,27 @@ # GraphChi-java Version 0.2 +## Fork Info + +I made this fork around April 2013, working with Aapo when he was at CMU. The +point was to allow for storing more information in each walk than an int would +allow for. If we used a long instead, we could store more information within +each random walk (such as a random walk type or a walk id). But using +generics and autoboxing would take too much of a performance hit. So I +refactored the code and made it so you could use ints and longs directly, only +making the higher-level parts abstract. This worked well, and I could +successfully use GraphChi for doing random walks with either ints or longs +representing the walk, depending on how much information needed to be in the +walk for the particular application. + +The code changes I made never made it into GraphChi proper, though I used them +in my PRA code. Now I'm trying to make my PRA code available via maven, and +it's convenient to have this modified version of GraphChi available as well. +So, in August 2014, I forked the current graphchi-java code on github and +ported the changes that I made over to it, then added a mvn-repo branch so +that it's accessible via maven. I followed the instructions for doing that +here: +http://stackoverflow.com/questions/14013644/hosting-a-maven-repository-on-github. ## News diff --git a/pom.xml b/pom.xml index 6e07afc6..f80da6b9 100644 --- a/pom.xml +++ b/pom.xml @@ -1,140 +1,182 @@ - 4.0.0 + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + 4.0.0 - groupId - graphchi-java - 0.2 + groupId + graphchi-java + 0.2 - + + + github + + - SonatypeNexusSnapshots - Sonatype Nexus Snapshots - https://oss.sonatype.org/content/repositories/snapshots/ - true + SonatypeNexusSnapshots + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + true - - scala-tools.org - Scala-tools Maven2 Repository - http://scala-tools.org/repo-releases + + scala-tools.org + Scala-tools Maven2 Repository + http://scala-tools.org/repo-releases - - - com.yammer.metrics - metrics-core - 2.2.0 + + + internal.repo + Temporary Staging Repository + file:://${project.build.directory}/mvn-repo + + + + + + com.yammer.metrics + metrics-core + 2.2.0 - - - org.scala-lang - scala-library - 2.9.0-1 - - - mysql - mysql-connector-java - 5.1.6 - - - junit - junit - 4.10 - jar - test - true - - - org.apache.pig - pig - compile - 0.10.0 - - - org.apache.hadoop - hadoop-core - 0.20.2 - - - org.apache.commons - commons-math - 2.0 - - - org.apache.commons - commons-math - 2.1 - - - commons-cli - commons-cli - 1.2 - - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - + + + org.scala-lang + scala-library + 2.9.0-1 + + + mysql + mysql-connector-java + 5.1.6 + + + junit + junit + 4.10 + jar + test + true + + + org.apache.pig + pig + compile + 0.10.0 + + + org.apache.hadoop + hadoop-core + 0.20.2 + + + org.apache.commons + commons-math + 2.0 + + + org.apache.commons + commons-math + 2.1 + + + commons-cli + commons-cli + 1.2 + + - - compile - - compile - - compile - - - test-compile - - testCompile - - test-compile - - - process-resources - - compile - - - - + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + - - maven-compiler-plugin - 2.3.2 + + compile + + compile + + compile + + + test-compile + + testCompile + + test-compile + + + process-resources + + compile + + + + - - 1.6 - 1.6 - - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.2.2 - - - - - src/main/assembly/assembly.xml - - - + + maven-compiler-plugin + 2.3.2 - - target/test-classes - test - + + 1.6 + 1.6 + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.2.2 + + + + + src/main/assembly/assembly.xml + + + + + maven-deploy-plugin + 2.8.1 + + internal.repo::default::file://${project.build.directory}/mvn-repo + + + + com.github.github + site-maven-plugin + 0.9 + + Maven artifacts for ${project.version} + true + ${project.build.directory}/mvn-repo + refs/heads/mvn-repo + **/* + graphchi-java + matt-gardner + + + + + + site + + deploy + + + + + target/test-classes + test + From 6d552b4ba68e8b8a57bd4673d58185c7eb8cce7b Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Tue, 23 Sep 2014 08:48:17 -0700 Subject: [PATCH 18/29] Fixed a long-standing synchronization bug in TwoKeyCompanion --- .gitignore | 1 + .../cmu/graphchi/walks/distributions/TwoKeyCompanion.java | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index d0a07d50..015359ec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.class +*.swp # Package Files # *.jar diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java index b84ea876..f4250afa 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java @@ -275,6 +275,12 @@ protected void ensureExists(int firstKey, int secondKey) { } bmap.put(secondKey, new IntegerBuffer(BUFFER_CAPACITY)); } + } else { + synchronized(lock) { + // We're just waiting for the other thread to release the lock, so that we can + // get the buffer without crashing later. Another thread actually added it, + // but we have to wait for them. + } } } } From 8efd9acab9db9ef20dea4ea0a1f14a3ca1118e80 Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Tue, 23 Sep 2014 08:52:38 -0700 Subject: [PATCH 19/29] Updated the github site-maven-plugin, which was broken at 0.9 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f80da6b9..177f3ac3 100644 --- a/pom.xml +++ b/pom.xml @@ -153,7 +153,7 @@ com.github.github site-maven-plugin - 0.9 + 0.10 Maven artifacts for ${project.version} true From 6d6827b99043a57b711f0073f00f77a1b0b388ad Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Tue, 23 Sep 2014 14:07:47 -0700 Subject: [PATCH 20/29] Cleared memory in the close() method of TwoKeyCompanion Because it looks like somehow the objects are not getting released (probably due to threads living on with references to the objects), we need to clear the memory when we're done, or we end up growing enormously huge by the end of a long series of runs. --- .../cmu/graphchi/walks/distributions/TwoKeyCompanion.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java index f4250afa..e6463743 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java @@ -417,5 +417,12 @@ public void outputDistributions(String outputFile, int nTop) throws RemoteExcept public void close() { parallelExecutor.shutdown(); timer.cancel(); + clearMemory(); + } + + protected void clearMemory() { + distributions.clear(); + buffers.clear(); + distrLocks.clear(); } } From 1123e6b81d3748f2862166e15358f0e42d031e76 Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Tue, 30 Sep 2014 12:52:36 -0700 Subject: [PATCH 21/29] Moved the dumper thread out of the inner class, to resolve some unchecked exceptions in client code --- .gitignore | 3 + .../cmu/graphchi/walks/DrunkardDriver.java | 50 +++++---------- .../edu/cmu/graphchi/walks/DumperThread.java | 43 +++++++++++++ .../cmu/graphchi/walks/IntDrunkardDriver.java | 57 ++--------------- .../cmu/graphchi/walks/IntDumperThread.java | 63 ++++++++++++++++++ .../graphchi/walks/LongDrunkardDriver.java | 57 ++--------------- .../cmu/graphchi/walks/LongDumperThread.java | 64 +++++++++++++++++++ 7 files changed, 200 insertions(+), 137 deletions(-) create mode 100644 src/main/java/edu/cmu/graphchi/walks/DumperThread.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java create mode 100644 src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java diff --git a/.gitignore b/.gitignore index 015359ec..cdeb645d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ *.class *.swp +.classpath +.project +.settings # Package Files # *.jar diff --git a/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java index cded89ed..8865674b 100644 --- a/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java +++ b/src/main/java/edu/cmu/graphchi/walks/DrunkardDriver.java @@ -1,19 +1,23 @@ package edu.cmu.graphchi.walks; -import com.yammer.metrics.Metrics; -import com.yammer.metrics.core.Timer; -import com.yammer.metrics.core.TimerContext; -import edu.cmu.graphchi.*; -import edu.cmu.graphchi.engine.VertexInterval; -import edu.cmu.graphchi.preprocessing.VertexIdTranslate; - import java.rmi.RemoteException; -import java.util.*; +import java.util.ArrayList; +import java.util.Random; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Logger; +import com.yammer.metrics.Metrics; +import com.yammer.metrics.core.Timer; +import com.yammer.metrics.core.TimerContext; + +import edu.cmu.graphchi.ChiLogger; +import edu.cmu.graphchi.ChiVertex; +import edu.cmu.graphchi.GraphChiContext; +import edu.cmu.graphchi.engine.VertexInterval; + /** * Class to encapsulate the graphchi program running the show. * Due to several optimizations, it is quite complicated! @@ -24,9 +28,9 @@ public abstract class DrunkardDriver implements Gr protected static Logger logger = ChiLogger.getLogger("drunkard-driver"); protected LinkedBlockingQueue bucketQueue = new LinkedBlockingQueue(); - private boolean finished = false; + protected AtomicBoolean finished = new AtomicBoolean(false); + protected AtomicLong pendingWalksToSubmit = new AtomicLong(0); private Thread dumperThread; - private AtomicLong pendingWalksToSubmit = new AtomicLong(0); WalkUpdateFunction callback; private final Timer purgeTimer = @@ -45,30 +49,6 @@ public abstract class DrunkardDriver implements Gr protected abstract DumperThread createDumperThread(); - protected abstract class DumperThread implements Runnable { - - public void run() { - while(!finished || bucketQueue.size() > 0) { - BucketsToSend bucket = null; - try { - bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - } - if (bucket != null) { - pendingWalksToSubmit.addAndGet(-bucket.length); - for(int i=0; i 0) { try { System.out.println("Waiting ..." + bucketQueue.size()); diff --git a/src/main/java/edu/cmu/graphchi/walks/DumperThread.java b/src/main/java/edu/cmu/graphchi/walks/DumperThread.java new file mode 100644 index 00000000..ade4b471 --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/DumperThread.java @@ -0,0 +1,43 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public abstract class DumperThread implements Runnable { + + private final LinkedBlockingQueue bucketQueue; + private final AtomicLong pendingWalksToSubmit; + private final AtomicBoolean finished; + + public DumperThread(LinkedBlockingQueue bucketQueue, + AtomicLong pendingWalksToSubmit, + AtomicBoolean finished) { + this.bucketQueue = bucketQueue; + this.pendingWalksToSubmit = pendingWalksToSubmit; + this.finished = finished; + } + + public void run() { + while(!finished.get() || bucketQueue.size() > 0) { + BucketsToSend bucket = null; + try { + bucket = bucketQueue.poll(1000, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + } + if (bucket != null) { + pendingWalksToSubmit.addAndGet(-bucket.length); + for(int i=0; i @Override protected IntDumperThread createDumperThread() { - return new IntDumperThread(); - } - - protected class IntDumperThread extends DrunkardDriver.DumperThread { - private int[] walks = new int[256 * 1024]; - private int[] vertices = new int[256 * 1024]; - private int idx = 0; - - @Override - protected void processWalks(BucketsToSend bucket, int i) { - IntWalkManager manager = (IntWalkManager) job.getWalkManager(); - IntWalkArray bucketWalks = (IntWalkArray) bucket.walks; - int w = bucketWalks.getArray()[i]; - int v = manager.off(w) + bucket.firstVertex; - - - // Skip walks with the track-bit (hop-bit) not set - boolean trackBit = manager.trackBit(w); - - if (!trackBit) { - return; - } - - walks[idx] = w; - vertices[idx] = v; - idx++; - - if (idx >= walks.length) { - try { - job.getCompanion().processWalks(new IntWalkArray(walks), vertices); - } catch (Exception err) { - err.printStackTrace(); - } - idx = 0; - } - } - - @Override - protected void sendRest() { - // Send rest - try { - int[] tmpWalks = new int[idx]; - int[] tmpVertices = new int[idx]; - System.arraycopy(walks, 0, tmpWalks, 0, idx); - System.arraycopy(vertices, 0, tmpVertices, 0, idx); - job.getCompanion().processWalks(new IntWalkArray(tmpWalks), tmpVertices); - } catch (Exception err) { - err.printStackTrace(); - } - } + return new IntDumperThread(bucketQueue, pendingWalksToSubmit, finished, job); } @Override diff --git a/src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java b/src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java new file mode 100644 index 00000000..7065194a --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/IntDumperThread.java @@ -0,0 +1,63 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public class IntDumperThread extends DumperThread { + private final DrunkardJob job; + private int[] walks = new int[256 * 1024]; + private int[] vertices = new int[256 * 1024]; + private int idx = 0; + + public IntDumperThread(LinkedBlockingQueue bucketQueue, + AtomicLong pendingWalksToSubmit, + AtomicBoolean finished, + DrunkardJob job) { + super(bucketQueue, pendingWalksToSubmit, finished); + this.job = job; + } + + @Override + protected void processWalks(BucketsToSend bucket, int i) { + IntWalkManager manager = (IntWalkManager) job.getWalkManager(); + IntWalkArray bucketWalks = (IntWalkArray) bucket.walks; + int w = bucketWalks.getArray()[i]; + int v = manager.off(w) + bucket.firstVertex; + + + // Skip walks with the track-bit (hop-bit) not set + boolean trackBit = manager.trackBit(w); + + if (!trackBit) { + return; + } + + walks[idx] = w; + vertices[idx] = v; + idx++; + + if (idx >= walks.length) { + try { + job.getCompanion().processWalks(new IntWalkArray(walks), vertices); + } catch (Exception err) { + err.printStackTrace(); + } + idx = 0; + } + } + + @Override + protected void sendRest() { + // Send rest + try { + int[] tmpWalks = new int[idx]; + int[] tmpVertices = new int[idx]; + System.arraycopy(walks, 0, tmpWalks, 0, idx); + System.arraycopy(vertices, 0, tmpVertices, 0, idx); + job.getCompanion().processWalks(new IntWalkArray(tmpWalks), tmpVertices); + } catch (Exception err) { + err.printStackTrace(); + } + } +} diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java index c9d1c70c..4be6bc17 100644 --- a/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java +++ b/src/main/java/edu/cmu/graphchi/walks/LongDrunkardDriver.java @@ -1,6 +1,10 @@ package edu.cmu.graphchi.walks; -import edu.cmu.graphchi.*; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +import edu.cmu.graphchi.GraphChiContext; import edu.cmu.graphchi.preprocessing.VertexIdTranslate; /** @@ -17,56 +21,7 @@ public LongDrunkardDriver(final DrunkardJob job, @Override protected LongDumperThread createDumperThread() { - return new LongDumperThread(); - } - - protected class LongDumperThread extends DrunkardDriver.DumperThread { - protected long[] walks = new long[256 * 1024]; - protected int[] vertices = new int[256 * 1024]; - protected int idx = 0; - - @Override - protected void processWalks(BucketsToSend bucket, int i) { - LongWalkArray bucketWalks = (LongWalkArray) bucket.walks; - long w = bucketWalks.getArray()[i]; - LongWalkManager manager = (LongWalkManager) job.getWalkManager(); - int v = manager.off(w) + bucket.firstVertex; - - - // Skip walks with the track-bit (hop-bit) not set - boolean trackBit = manager.trackBit(w); - - if (!trackBit) { - return; - } - - walks[idx] = w; - vertices[idx] = v; - idx++; - - if (idx >= walks.length) { - try { - job.getCompanion().processWalks(new LongWalkArray(walks), vertices); - } catch (Exception err) { - err.printStackTrace(); - } - idx = 0; - } - } - - @Override - protected void sendRest() { - // Send rest - try { - long[] tmpWalks = new long[idx]; - int[] tmpVertices = new int[idx]; - System.arraycopy(walks, 0, tmpWalks, 0, idx); - System.arraycopy(vertices, 0, tmpVertices, 0, idx); - job.getCompanion().processWalks(new LongWalkArray(tmpWalks), tmpVertices); - } catch (Exception err) { - err.printStackTrace(); - } - } + return new LongDumperThread(bucketQueue, pendingWalksToSubmit, finished, job); } @Override diff --git a/src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java b/src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java new file mode 100644 index 00000000..ec37e7cc --- /dev/null +++ b/src/main/java/edu/cmu/graphchi/walks/LongDumperThread.java @@ -0,0 +1,64 @@ +package edu.cmu.graphchi.walks; + +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public class LongDumperThread extends DumperThread { + protected final DrunkardJob job; + protected long[] walks = new long[256 * 1024]; + protected int[] vertices = new int[256 * 1024]; + protected int idx = 0; + + public LongDumperThread(LinkedBlockingQueue bucketQueue, + AtomicLong pendingWalksToSubmit, + AtomicBoolean finished, + DrunkardJob job) { + super(bucketQueue, pendingWalksToSubmit, finished); + this.job = job; + } + + @Override + protected void processWalks(BucketsToSend bucket, int i) { + LongWalkArray bucketWalks = (LongWalkArray) bucket.walks; + long w = bucketWalks.getArray()[i]; + LongWalkManager manager = (LongWalkManager) job.getWalkManager(); + int v = manager.off(w) + bucket.firstVertex; + + + // Skip walks with the track-bit (hop-bit) not set + boolean trackBit = manager.trackBit(w); + + if (!trackBit) { + return; + } + + walks[idx] = w; + vertices[idx] = v; + idx++; + + if (idx >= walks.length) { + try { + job.getCompanion().processWalks(new LongWalkArray(walks), vertices); + } catch (Exception err) { + err.printStackTrace(); + } + idx = 0; + } + } + + @Override + protected void sendRest() { + // Send rest + try { + long[] tmpWalks = new long[idx]; + int[] tmpVertices = new int[idx]; + System.arraycopy(walks, 0, tmpWalks, 0, idx); + System.arraycopy(vertices, 0, tmpVertices, 0, idx); + job.getCompanion().processWalks(new LongWalkArray(tmpWalks), tmpVertices); + } catch (Exception err) { + err.printStackTrace(); + } + } +} + From 31e8c40f6bbbffa1b358ef7df4a7f4a380a31a49 Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 1 Oct 2014 13:26:36 -0400 Subject: [PATCH 22/29] Added build.sbt, removed fork info from README --- README.md | 24 +----------------------- build.sbt | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 23 deletions(-) create mode 100644 build.sbt diff --git a/README.md b/README.md index 9b740b66..9ad35b94 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,5 @@ # GraphChi-java -Version 0.2 - -## Fork Info - -I made this fork around April 2013, working with Aapo when he was at CMU. The -point was to allow for storing more information in each walk than an int would -allow for. If we used a long instead, we could store more information within -each random walk (such as a random walk type or a walk id). But using -generics and autoboxing would take too much of a performance hit. So I -refactored the code and made it so you could use ints and longs directly, only -making the higher-level parts abstract. This worked well, and I could -successfully use GraphChi for doing random walks with either ints or longs -representing the walk, depending on how much information needed to be in the -walk for the particular application. - -The code changes I made never made it into GraphChi proper, though I used them -in my PRA code. Now I'm trying to make my PRA code available via maven, and -it's convenient to have this modified version of GraphChi available as well. -So, in August 2014, I forked the current graphchi-java code on github and -ported the changes that I made over to it, then added a mvn-repo branch so -that it's accessible via maven. I followed the instructions for doing that -here: -http://stackoverflow.com/questions/14013644/hosting-a-maven-repository-on-github. +Version 0.2.1 ## News diff --git a/build.sbt b/build.sbt new file mode 100644 index 00000000..2d933653 --- /dev/null +++ b/build.sbt @@ -0,0 +1,54 @@ +organization := "org.graphchi" + +name := "graphchi-java" + +version := "0.2.1" + +scalaVersion := "2.11.2" + +crossScalaVersions := Seq("2.11.2", "2.10.3") + +javaSource in Test := baseDirectory.value / "test" + +libraryDependencies ++= Seq( + "com.yammer.metrics" % "metrics-core" % "2.2.0", + "mysql" % "mysql-connector-java" % "5.1.6", + "org.apache.pig" % "pig" % "0.10.0", + "org.apache.hadoop" % "hadoop-core" % "0.20.2", + "org.apache.commons" % "commons-math" % "2.1", + "commons-cli" % "commons-cli" % "1.2", + "com.novocode" % "junit-interface" % "0.11" % "test", + "org.scalacheck" %% "scalacheck" % "1.11.4" % "test", + "org.scalatest" %% "scalatest" % "2.2.1" % "test" +) + +publishMavenStyle := true + +pomIncludeRepository := { _ => false } + +publishTo := { + val nexus = "https://oss.sonatype.org/" + if (isSnapshot.value) + Some("snapshots" at nexus + "content/repositories/snapshots") + else + Some("releases" at nexus + "service/local/staging/deploy/maven2") +} + +publishArtifact in Test := false + +licenses := Seq("Apache-2.0" -> url("http://www.opensource.org/licenses/Apache-2.0")) + +homepage := Some(url("http://github.com/GraphChi/graphchi-java")) + +pomExtra := ( + + git@github.com:GraphChi/graphchi-java.git + scm:git:git@github.com:GraphChi/graphchi-java.git + + + + matt-gardner + Matt Gardner + http://cs.cmu.edu/~mg1 + + ) From a82a1756bb7e4cdf6e2bad424395022bf1bca4ed Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 1 Oct 2014 13:27:20 -0400 Subject: [PATCH 23/29] Updated version in pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 177f3ac3..ed4450d1 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ groupId graphchi-java - 0.2 + 0.2.1 From 9c7b1510b99ebe98465b1d5a11e2e01f8a6ba1b8 Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 1 Oct 2014 13:32:16 -0700 Subject: [PATCH 24/29] Update groupId in pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ed4450d1..f8b92643 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - groupId + org.graphchi graphchi-java 0.2.1 From 9e861c931f456ccc5a24babd6cc4f5545083602b Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 1 Oct 2014 15:19:55 -0700 Subject: [PATCH 25/29] Add build and dependency information to the README --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ad35b94..c503c99d 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,21 @@ Project for developing the Java version of GraphChi ( http://www.graphchi.org ), ### How to use -Read the README.txt for information on how to build and run the example applications. You are going to need [Maven](http://maven.apache.org/download.cgi) for building. +Read the README.txt for information on how to build and run the example applications. You are going to need [Maven](http://maven.apache.org/download.cgi) or [sbt](http://www.scala-sbt.org/) for building. + +graphchi-java is hosted in the maven central repository, so you can include it as a managed dependency in your maven or sbt builds. For sbt, include the following line in your `build.sbt`: + +`libraryDependencies += "org.graphchi" %% "graphchi-java" % "0.2.1"` + +For maven, include the following in ``: + +``` + + org.graphchi + graphchi-java_2.11 + 0.2.1 + +``` It is a very good idea to study the example applications carefully. There are currently three example applications in the package **edu.cmu.graphchi.apps**: * [PageRank](https://github.com/GraphChi/graphchi-java/tree/master/src/main/java/edu/cmu/graphchi/apps/Pagerank.java) for computing the famous [PageRank](http://en.wikipedia.org/wiki/PageRank) ranking From 695b420c8484af31cdff76edc72ce1fb7e4361ce Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 7 Jan 2015 16:47:33 -0500 Subject: [PATCH 26/29] Removing stack trace on exit from TwoKeyCompanion --- build.sbt | 2 +- .../edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build.sbt b/build.sbt index 2d933653..b9e0e6fa 100644 --- a/build.sbt +++ b/build.sbt @@ -2,7 +2,7 @@ organization := "org.graphchi" name := "graphchi-java" -version := "0.2.1" +version := "0.2.2" scalaVersion := "2.11.2" diff --git a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java index e6463743..6c3b70e7 100644 --- a/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java +++ b/src/main/java/edu/cmu/graphchi/walks/distributions/TwoKeyCompanion.java @@ -241,7 +241,9 @@ public void run() { } } } catch (Exception err) { - err.printStackTrace(); + if (!(err instanceof InterruptedException)) { + err.printStackTrace(); + } } } } From 254149e8ff1cbe7686e115268820e13de71e6976 Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 7 Jan 2015 16:53:04 -0500 Subject: [PATCH 27/29] Bump version number in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c503c99d..95815e64 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # GraphChi-java -Version 0.2.1 +Version 0.2.2 ## News From 07358a44edcf58aa4faafc3390f55457a5d1f9be Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Wed, 7 Jan 2015 17:05:11 -0500 Subject: [PATCH 28/29] Updating some version numbers I missed --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 95815e64..95aaed29 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Read the README.txt for information on how to build and run the example applicat graphchi-java is hosted in the maven central repository, so you can include it as a managed dependency in your maven or sbt builds. For sbt, include the following line in your `build.sbt`: -`libraryDependencies += "org.graphchi" %% "graphchi-java" % "0.2.1"` +`libraryDependencies += "org.graphchi" %% "graphchi-java" % "0.2.2"` For maven, include the following in ``: @@ -28,7 +28,7 @@ For maven, include the following in ``: org.graphchi graphchi-java_2.11 - 0.2.1 + 0.2.2 ``` diff --git a/pom.xml b/pom.xml index f8b92643..52d0b868 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.graphchi graphchi-java - 0.2.1 + 0.2.2 From 0f1392c5c592e3329a7ac665b0f238ef327cc6df Mon Sep 17 00:00:00 2001 From: Matt Gardner Date: Mon, 25 Oct 2021 09:22:21 -0700 Subject: [PATCH 29/29] add apache 2 license --- LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..9b259bdf --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License.