From 779573d666c207694134f6c904ca3be67c1e5e80 Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Fri, 26 Jun 2015 12:42:33 -0500 Subject: [PATCH 01/17] db tool re-org --- src/MGRAST/Schema/m5nr_v4.cql | 2 +- src/MGRAST/{Schema => tools}/dump_analysis_for_cass.pl | 0 src/MGRAST/{Schema => tools}/dump_annotation_for_cass.pl | 0 src/MGRAST/{Schema => tools}/test_cass.py | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename src/MGRAST/{Schema => tools}/dump_analysis_for_cass.pl (100%) rename src/MGRAST/{Schema => tools}/dump_annotation_for_cass.pl (100%) rename src/MGRAST/{Schema => tools}/test_cass.py (100%) diff --git a/src/MGRAST/Schema/m5nr_v4.cql b/src/MGRAST/Schema/m5nr_v4.cql index 02d87d36..1066a5b2 100644 --- a/src/MGRAST/Schema/m5nr_v4.cql +++ b/src/MGRAST/Schema/m5nr_v4.cql @@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS md5_annotation ( source text, is_protein boolean, single text, - lca text, + lca list, accession list, function list, organism list, diff --git a/src/MGRAST/Schema/dump_analysis_for_cass.pl b/src/MGRAST/tools/dump_analysis_for_cass.pl similarity index 100% rename from src/MGRAST/Schema/dump_analysis_for_cass.pl rename to src/MGRAST/tools/dump_analysis_for_cass.pl diff --git a/src/MGRAST/Schema/dump_annotation_for_cass.pl b/src/MGRAST/tools/dump_annotation_for_cass.pl similarity index 100% rename from src/MGRAST/Schema/dump_annotation_for_cass.pl rename to src/MGRAST/tools/dump_annotation_for_cass.pl diff --git a/src/MGRAST/Schema/test_cass.py b/src/MGRAST/tools/test_cass.py similarity index 100% rename from src/MGRAST/Schema/test_cass.py rename to src/MGRAST/tools/test_cass.py From 000079e0f73dad3bacb30b2d833c520cb0ba83d8 Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Mon, 29 Jun 2015 10:53:47 -0500 Subject: [PATCH 02/17] cassandra bulk upload --- src/MGRAST/tools/BulkLoader/BulkLoader.java | 186 ++++++++++++++++++++ src/MGRAST/tools/BulkLoader/BulkLoader.sh | 62 +++++++ 2 files changed, 248 insertions(+) create mode 100644 src/MGRAST/tools/BulkLoader/BulkLoader.java create mode 100755 src/MGRAST/tools/BulkLoader/BulkLoader.sh diff --git a/src/MGRAST/tools/BulkLoader/BulkLoader.java b/src/MGRAST/tools/BulkLoader/BulkLoader.java new file mode 100644 index 00000000..e0bb90cc --- /dev/null +++ b/src/MGRAST/tools/BulkLoader/BulkLoader.java @@ -0,0 +1,186 @@ +import java.io.*; +import java.math.BigDecimal; +import java.net.HttpURLConnection; +import java.net.URL; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Arrays; +import java.util.ArrayList; + +import org.supercsv.io.CsvListReader; +import org.supercsv.prefs.CsvPreference; + +import org.apache.cassandra.config.Config; +import org.apache.cassandra.dht.Murmur3Partitioner; +import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.io.sstable.CQLSSTableWriter; + +public class BulkLoader { + + static String filename; + static String outdir; + static String schema; + static String insert; + + public static void main(String[] args) throws IOException { + if (args.length < 4) { + String concat = Arrays.toString(args); + System.out.println("Expecting 4 arguments - , , , "); + System.out.println("Got: " + concat.substring(1, concat.length() -1)); + System.exit(1); + } + + int lineNumber = 0; + long start = System.currentTimeMillis(); + String keyspace = args[0]; + String table = args[1]; + filename = args[2]; + outdir = args[3]; + + System.out.println("keyspace: "+keyspace); + System.out.println("table: "+table); + System.out.println("filename: "+filename); + System.out.println("outdir: "+outdir); + + // Schema and Insert for bulk load + if (table.equals("md5_id_annotation")) { + schema = String.format("CREATE TABLE %s.%s (" + + "id int, " + + "source text, " + + "md5 text, " + + "is_protein boolean, " + + "single text, " + + "lca list, " + + "accession list, " + + "function list, " + + "organism list, " + + "PRIMARY KEY (id, source) " + + ")", keyspace, table); + insert = String.format("INSERT INTO %s.%s (" + + "id, source, md5, is_protein, single, lca, accession, function, organism" + + ") VALUES (" + + "?, ?, ?, ?, ?, ?, ?, ?, ?" + + ")", keyspace, table); + } else if (table.equals("md5_annotation")) { + schema = String.format("CREATE TABLE %s.%s (" + + "md5 text, " + + "source text, " + + "is_protein boolean, " + + "single text, " + + "lca list, " + + "accession list, " + + "function list, " + + "organism list, " + + "PRIMARY KEY (md5, source) " + + ")", keyspace, table); + insert = String.format("INSERT INTO %s.%s (" + + "md5, source, is_protein, single, lca, accession, function, organism" + + ") VALUES (" + + "?, ?, ?, ?, ?, ?, ?, ?" + + ")", keyspace, table); + } else { + System.out.println("Unsupported table type: " + table); + System.exit(1); + } + + // magic! + Config.setClientMode(true); + + // Create output directory that has keyspace and table name in the path + File outputDir = new File(outdir + File.separator + keyspace + File.separator + table); + if (!outputDir.exists() && !outputDir.mkdirs()) { + throw new RuntimeException("Cannot create output directory: " + outputDir); + } + + // Prepare SSTable writer + CQLSSTableWriter.Builder builder = CQLSSTableWriter.builder(); + // set output directory + builder.inDirectory(outputDir) + // set target schema + .forTable(schema) + // set CQL statement to put data + .using(insert) + // set partitioner if needed - default is Murmur3Partitioner so set if you use different one. + .withPartitioner(new Murmur3Partitioner()); + CQLSSTableWriter writer = builder.build(); + + // set cvs reader / parser + try ( + BufferedReader reader = new BufferedReader(new FileReader(filename)); + CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE); + ) { + csvReader.getHeader(true); // skip the header + + // Write to SSTable while reading data + List line; + while ((line = csvReader.read()) != null) { + // We use Java types here based on + // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 + if (table.equals("md5_id_annotation")) { + writer.addRow(Integer.parseInt(line.get(0)), + line.get(1), + line.get(2), + Boolean.valueOf(line.get(3)), + line.get(4), + parseList(line.get(5)), + parseList(line.get(6)), + parseList(line.get(7)), + parseList(line.get(8))); + } else if (table.equals("md5_annotation")) { + writer.addRow(line.get(0), + line.get(1), + Boolean.valueOf(line.get(2)), + line.get(3), + parseList(line.get(4)), + parseList(line.get(5)), + parseList(line.get(6)), + parseList(line.get(7))); + } + // Print nK + lineNumber += 1; + if (lineNumber % 10000 == 0) { + System.out.println((lineNumber / 1000) + "K"); + } + } + } catch (InvalidRequestException | IOException e) { + e.printStackTrace(); + } + + try { + writer.close(); + } catch (IOException ignore) {} + + // done + long end = System.currentTimeMillis(); + System.out.println("Successfully parsed " + lineNumber + " lines."); + System.out.println("Execution time was " + ((end-start) / 1000) + " seconds."); + System.exit(0); + } + + public static List parseList (String listStr) { + List aList = new ArrayList(); + listStr = listStr.trim(); + if (listStr.isEmpty()) { + return aList; + } + // remove leading and trailing brackets + listStr = listStr.substring(1, listStr.length() - 1).trim(); + if (listStr.isEmpty()) { + return aList; + } + // split by comma + String[] parts = listStr.split(","); + for (int i=0; i 1) { + aList.add( item.substring(1, item.length() - 1).trim() ); + } else { + aList.add( new String() ); + } + } + return aList; + } + +} diff --git a/src/MGRAST/tools/BulkLoader/BulkLoader.sh b/src/MGRAST/tools/BulkLoader/BulkLoader.sh new file mode 100755 index 00000000..24a8c885 --- /dev/null +++ b/src/MGRAST/tools/BulkLoader/BulkLoader.sh @@ -0,0 +1,62 @@ +#!/bin/sh + +HELP=0 +KEYSPACE='' +TABLE='' +INFILE='' +OUTDIR='' +JAVA=`which java` +JARS="/root/cassandra/lib /root/super-csv" + +while getopts hk:t:i:o: option; do + case "${option}" + in + h) HELP=1;; + k) KEYSPACE=${OPTARG};; + t) TABLE=${OPTARG};; + i) INFILE=${OPTARG};; + o) OUTDIR=${OPTARG};; + esac +done + +USAGE="Usage: BulkLoader.sh [-h] -k -t
-i -o " + +# check options +if [ $HELP -eq 1 ]; then + echo $USAGE + exit +fi +if [ -z "$KEYSPACE" ] || [ -z "$TABLE" ] || [ -z "$INFILE" ]; then + echo "[error] missing parameter" + echo $USAGE + exit +fi +if [ ! -f "$INFILE" ]; then + echo "[error] file $INFILE does not exist" + echo $USAGE + exit +fi +if [ -z "$OUTDIR" ]; then + OUTDIR=/mnt/sstable +fi + +# check env +if [ -z "$CASSANDRA_CONFIG" ]; then + CASSANDRA_CONFIG=/root/cassandra/conf/cassandra.yaml +fi + +# set classpath +CLASSPATH=".:$CASSANDRA_CONFIG" +for path in $JARS; do + CLASSPATH="$CLASSPATH:$path/*" +done + +# Compile +echo "compile: javac -cp $CLASSPATH BulkLoader.java" +javac -cp $CLASSPATH BulkLoader.java + +# Import +echo +echo "run: $JAVA -ea -cp $CLASSPATH -Xms20G -Xmx20G -Dlog4j.configuration=log4j-tools.properties BulkLoader $KEYSPACE $TABLE $INFILE $OUTDIR" +$JAVA -ea -cp $CLASSPATH -Xms20G -Xmx20G -Dlog4j.configuration=log4j-tools.properties BulkLoader $KEYSPACE $TABLE $INFILE $OUTDIR + From cb02e46d9e9d663fc20c9425360cfe033fba9575 Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Mon, 29 Jun 2015 11:41:16 -0500 Subject: [PATCH 03/17] moved httpd conf to other repo --- conf/httpd.conf | 507 ------------------------------------------------ 1 file changed, 507 deletions(-) delete mode 100644 conf/httpd.conf diff --git a/conf/httpd.conf b/conf/httpd.conf deleted file mode 100644 index 623d3f1e..00000000 --- a/conf/httpd.conf +++ /dev/null @@ -1,507 +0,0 @@ -# -# This is the main Apache HTTP server configuration file. It contains the -# configuration directives that give the server its instructions. -# See for detailed information. -# In particular, see -# -# for a discussion of each configuration directive. -# -# Do NOT simply read the instructions in here without understanding -# what they do. They're here only as hints or reminders. If you are unsure -# consult the online docs. You have been warned. -# -# Configuration and logfile names: If the filenames you specify for many -# of the server's control files begin with "/" (or "drive:/" for Win32), the -# server will use that explicit path. If the filenames do *not* begin -# with "/", the value of ServerRoot is prepended -- so "logs/access_log" -# with ServerRoot set to "/usr/local/apache2" will be interpreted by the -# server as "/usr/local/apache2/logs/access_log", whereas "/logs/access_log" -# will be interpreted as '/logs/access_log'. - -# -# ServerRoot: The top of the directory tree under which the server's -# configuration, error, and log files are kept. -# -# Do not add a slash at the end of the directory path. If you point -# ServerRoot at a non-local disk, be sure to specify a local disk on the -# Mutex directive, if file-based mutexes are used. If you wish to share the -# same ServerRoot for multiple httpd daemons, you will need to change at -# least PidFile. -# -ServerRoot "/usr/local/apache2" - -Include /api-server-conf/api.metagenomics.conf - -# -# Mutex: Allows you to set the mutex mechanism and mutex file directory -# for individual mutexes, or change the global defaults -# -# Uncomment and change the directory if mutexes are file-based and the default -# mutex file directory is not on a local disk or is not appropriate for some -# other reason. -# -# Mutex default:logs - -# -# Listen: Allows you to bind Apache to specific IP addresses and/or -# ports, instead of the default. See also the -# directive. -# -# Change this to Listen on specific IP addresses as shown below to -# prevent Apache from glomming onto all bound IP addresses. -# -#Listen 12.34.56.78:80 -Listen 80 - -# -# Dynamic Shared Object (DSO) Support -# -# To be able to use the functionality of a module which was built as a DSO you -# have to place corresponding `LoadModule' lines at this location so the -# directives contained in it are actually available _before_ they are used. -# Statically compiled modules (those listed by `httpd -l') do not need -# to be loaded here. -# -# Example: -# LoadModule foo_module modules/mod_foo.so -# -LoadModule authn_file_module modules/mod_authn_file.so -#LoadModule authn_dbm_module modules/mod_authn_dbm.so -#LoadModule authn_anon_module modules/mod_authn_anon.so -#LoadModule authn_dbd_module modules/mod_authn_dbd.so -#LoadModule authn_socache_module modules/mod_authn_socache.so -LoadModule authn_core_module modules/mod_authn_core.so -LoadModule authz_host_module modules/mod_authz_host.so -LoadModule authz_groupfile_module modules/mod_authz_groupfile.so -LoadModule authz_user_module modules/mod_authz_user.so -#LoadModule authz_dbm_module modules/mod_authz_dbm.so -#LoadModule authz_owner_module modules/mod_authz_owner.so -#LoadModule authz_dbd_module modules/mod_authz_dbd.so -LoadModule authz_core_module modules/mod_authz_core.so -#LoadModule authnz_ldap_module modules/mod_authnz_ldap.so -LoadModule access_compat_module modules/mod_access_compat.so -LoadModule auth_basic_module modules/mod_auth_basic.so -#LoadModule auth_form_module modules/mod_auth_form.so -#LoadModule auth_digest_module modules/mod_auth_digest.so -#LoadModule allowmethods_module modules/mod_allowmethods.so -#LoadModule file_cache_module modules/mod_file_cache.so -#LoadModule cache_module modules/mod_cache.so -#LoadModule cache_disk_module modules/mod_cache_disk.so -#LoadModule cache_socache_module modules/mod_cache_socache.so -#LoadModule socache_shmcb_module modules/mod_socache_shmcb.so -#LoadModule socache_dbm_module modules/mod_socache_dbm.so -#LoadModule socache_memcache_module modules/mod_socache_memcache.so -#LoadModule macro_module modules/mod_macro.so -#LoadModule dbd_module modules/mod_dbd.so -#LoadModule dumpio_module modules/mod_dumpio.so -#LoadModule buffer_module modules/mod_buffer.so -#LoadModule ratelimit_module modules/mod_ratelimit.so -LoadModule reqtimeout_module modules/mod_reqtimeout.so -#LoadModule ext_filter_module modules/mod_ext_filter.so -#LoadModule request_module modules/mod_request.so -#LoadModule include_module modules/mod_include.so -LoadModule filter_module modules/mod_filter.so -#LoadModule substitute_module modules/mod_substitute.so -#LoadModule sed_module modules/mod_sed.so -#LoadModule deflate_module modules/mod_deflate.so -LoadModule mime_module modules/mod_mime.so -#LoadModule ldap_module modules/mod_ldap.so -LoadModule log_config_module modules/mod_log_config.so -#LoadModule log_debug_module modules/mod_log_debug.so -#LoadModule logio_module modules/mod_logio.so -LoadModule env_module modules/mod_env.so -#LoadModule expires_module modules/mod_expires.so -LoadModule headers_module modules/mod_headers.so -#LoadModule unique_id_module modules/mod_unique_id.so -LoadModule setenvif_module modules/mod_setenvif.so -LoadModule version_module modules/mod_version.so -#LoadModule remoteip_module modules/mod_remoteip.so -#LoadModule proxy_module modules/mod_proxy.so -#LoadModule proxy_connect_module modules/mod_proxy_connect.so -#LoadModule proxy_ftp_module modules/mod_proxy_ftp.so -#LoadModule proxy_http_module modules/mod_proxy_http.so -#LoadModule proxy_fcgi_module modules/mod_proxy_fcgi.so -#LoadModule proxy_scgi_module modules/mod_proxy_scgi.so -#LoadModule proxy_wstunnel_module modules/mod_proxy_wstunnel.so -#LoadModule proxy_ajp_module modules/mod_proxy_ajp.so -#LoadModule proxy_balancer_module modules/mod_proxy_balancer.so -#LoadModule proxy_express_module modules/mod_proxy_express.so -#LoadModule session_module modules/mod_session.so -#LoadModule session_cookie_module modules/mod_session_cookie.so -#LoadModule session_crypto_module modules/mod_session_crypto.so -#LoadModule session_dbd_module modules/mod_session_dbd.so -#LoadModule slotmem_shm_module modules/mod_slotmem_shm.so -#LoadModule ssl_module modules/mod_ssl.so -#LoadModule lbmethod_byrequests_module modules/mod_lbmethod_byrequests.so -#LoadModule lbmethod_bytraffic_module modules/mod_lbmethod_bytraffic.so -#LoadModule lbmethod_bybusyness_module modules/mod_lbmethod_bybusyness.so -#LoadModule lbmethod_heartbeat_module modules/mod_lbmethod_heartbeat.so -LoadModule unixd_module modules/mod_unixd.so -#LoadModule dav_module modules/mod_dav.so -LoadModule status_module modules/mod_status.so -LoadModule autoindex_module modules/mod_autoindex.so -#LoadModule info_module modules/mod_info.so -LoadModule cgid_module modules/mod_cgid.so -#LoadModule dav_fs_module modules/mod_dav_fs.so -#LoadModule vhost_alias_module modules/mod_vhost_alias.so -#LoadModule negotiation_module modules/mod_negotiation.so -LoadModule dir_module modules/mod_dir.so -#LoadModule actions_module modules/mod_actions.so -#LoadModule speling_module modules/mod_speling.so -#LoadModule userdir_module modules/mod_userdir.so -LoadModule alias_module modules/mod_alias.so -LoadModule rewrite_module modules/mod_rewrite.so - - -# -# If you wish httpd to run as a different user or group, you must run -# httpd as root initially and it will switch. -# -# User/Group: The name (or #number) of the user/group to run httpd as. -# It is usually good practice to create a dedicated user and group for -# running httpd, as with most system services. -# -User daemon -Group daemon - - - -# 'Main' server configuration -# -# The directives in this section set up the values used by the 'main' -# server, which responds to any requests that aren't handled by a -# definition. These values also provide defaults for -# any containers you may define later in the file. -# -# All of these directives may appear inside containers, -# in which case these default settings will be overridden for the -# virtual host being defined. -# - -# -# ServerAdmin: Your address, where problems with the server should be -# e-mailed. This address appears on some server-generated pages, such -# as error documents. e.g. admin@your-domain.com -# -ServerAdmin you@example.com - -# -# ServerName gives the name and port that the server uses to identify itself. -# This can often be determined automatically, but we recommend you specify -# it explicitly to prevent problems during startup. -# -# If your host doesn't have a registered DNS name, enter its IP address here. -# -#ServerName www.example.com:80 - -# -# Deny access to the entirety of your server's filesystem. You must -# explicitly permit access to web content directories in other -# blocks below. -# - - AllowOverride none - Require all denied - - -# -# Note that from this point forward you must specifically allow -# particular features to be enabled - so if something's not working as -# you might expect, make sure that you have specifically enabled it -# below. -# - -# -# DocumentRoot: The directory out of which you will serve your -# documents. By default, all requests are taken from this directory, but -# symbolic links and aliases may be used to point to other locations. -# -DocumentRoot "/usr/local/apache2/htdocs" - - # - # Possible values for the Options directive are "None", "All", - # or any combination of: - # Indexes Includes FollowSymLinks SymLinksifOwnerMatch ExecCGI MultiViews - # - # Note that "MultiViews" must be named *explicitly* --- "Options All" - # doesn't give it to you. - # - # The Options directive is both complicated and important. Please see - # http://httpd.apache.org/docs/2.4/mod/core.html#options - # for more information. - # - Options Indexes FollowSymLinks - - # - # AllowOverride controls what directives may be placed in .htaccess files. - # It can be "All", "None", or any combination of the keywords: - # AllowOverride FileInfo AuthConfig Limit - # - AllowOverride None - - # - # Controls who can get stuff from this server. - # - Require all granted - - -# -# DirectoryIndex: sets the file that Apache will serve if a directory -# is requested. -# - - DirectoryIndex index.html - - -# -# The following lines prevent .htaccess and .htpasswd files from being -# viewed by Web clients. -# - - Require all denied - - -# -# ErrorLog: The location of the error log file. -# If you do not specify an ErrorLog directive within a -# container, error messages relating to that virtual host will be -# logged here. If you *do* define an error logfile for a -# container, that host's errors will be logged there and not here. -# -ErrorLog /proc/self/fd/2 - -# -# LogLevel: Control the number of messages logged to the error_log. -# Possible values include: debug, info, notice, warn, error, crit, -# alert, emerg. -# -LogLevel warn - - - # - # The following directives define some format nicknames for use with - # a CustomLog directive (see below). - # - LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined - LogFormat "%h %l %u %t \"%r\" %>s %b" common - - - # You need to enable mod_logio.c to use %I and %O - LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio - - - # - # The location and format of the access logfile (Common Logfile Format). - # If you do not define any access logfiles within a - # container, they will be logged here. Contrariwise, if you *do* - # define per- access logfiles, transactions will be - # logged therein and *not* in this file. - # - CustomLog /proc/self/fd/1 common - - # - # If you prefer a logfile with access, agent, and referer information - # (Combined Logfile Format) you can use the following directive. - # - #CustomLog "logs/access_log" combined - - - - # - # Redirect: Allows you to tell clients about documents that used to - # exist in your server's namespace, but do not anymore. The client - # will make a new request for the document at its new location. - # Example: - # Redirect permanent /foo http://www.example.com/bar - - # - # Alias: Maps web paths into filesystem paths and is used to - # access content that does not live under the DocumentRoot. - # Example: - # Alias /webpath /full/filesystem/path - # - # If you include a trailing / on /webpath then the server will - # require it to be present in the URL. You will also likely - # need to provide a section to allow access to - # the filesystem path. - - # - # ScriptAlias: This controls which directories contain server scripts. - # ScriptAliases are essentially the same as Aliases, except that - # documents in the target directory are treated as applications and - # run by the server when requested rather than as documents sent to the - # client. The same rules about trailing "/" apply to ScriptAlias - # directives as to Alias. - # - ScriptAlias /cgi-bin/ "/usr/local/apache2/cgi-bin/" - - - - - # - # ScriptSock: On threaded servers, designate the path to the UNIX - # socket used to communicate with the CGI daemon of mod_cgid. - # - #Scriptsock cgisock - - -# -# "/usr/local/apache2/cgi-bin" should be changed to whatever your ScriptAliased -# CGI directory exists, if you have that configured. -# - - AllowOverride None - Options None - Require all granted - - - - # - # TypesConfig points to the file containing the list of mappings from - # filename extension to MIME-type. - # - TypesConfig conf/mime.types - - # - # AddType allows you to add to or override the MIME configuration - # file specified in TypesConfig for specific file types. - # - #AddType application/x-gzip .tgz - # - # AddEncoding allows you to have certain browsers uncompress - # information on the fly. Note: Not all browsers support this. - # - #AddEncoding x-compress .Z - #AddEncoding x-gzip .gz .tgz - # - # If the AddEncoding directives above are commented-out, then you - # probably should define those extensions to indicate media types: - # - AddType application/x-compress .Z - AddType application/x-gzip .gz .tgz - - # - # AddHandler allows you to map certain file extensions to "handlers": - # actions unrelated to filetype. These can be either built into the server - # or added with the Action directive (see below) - # - # To use CGI scripts outside of ScriptAliased directories: - # (You will also need to add "ExecCGI" to the "Options" directive.) - # - #AddHandler cgi-script .cgi - - # For type maps (negotiated resources): - #AddHandler type-map var - - # - # Filters allow you to process content before it is sent to the client. - # - # To parse .shtml files for server-side includes (SSI): - # (You will also need to add "Includes" to the "Options" directive.) - # - #AddType text/html .shtml - #AddOutputFilter INCLUDES .shtml - - -# -# The mod_mime_magic module allows the server to use various hints from the -# contents of the file itself to determine its type. The MIMEMagicFile -# directive tells the module where the hint definitions are located. -# -#MIMEMagicFile conf/magic - -# -# Customizable error responses come in three flavors: -# 1) plain text 2) local redirects 3) external redirects -# -# Some examples: -#ErrorDocument 500 "The server made a boo boo." -#ErrorDocument 404 /missing.html -#ErrorDocument 404 "/cgi-bin/missing_handler.pl" -#ErrorDocument 402 http://www.example.com/subscription_info.html -# - -# -# MaxRanges: Maximum number of Ranges in a request before -# returning the entire resource, or one of the special -# values 'default', 'none' or 'unlimited'. -# Default setting is to accept 200 Ranges. -#MaxRanges unlimited - -# -# EnableMMAP and EnableSendfile: On systems that support it, -# memory-mapping or the sendfile syscall may be used to deliver -# files. This usually improves server performance, but must -# be turned off when serving from networked-mounted -# filesystems or if support for these functions is otherwise -# broken on your system. -# Defaults: EnableMMAP On, EnableSendfile Off -# -#EnableMMAP off -#EnableSendfile on - -# Supplemental configuration -# -# The configuration files in the conf/extra/ directory can be -# included to add extra features or to modify the default configuration of -# the server, or you may simply copy their contents here and change as -# necessary. - -# Server-pool management (MPM specific) -#Include conf/extra/httpd-mpm.conf - -# Multi-language error messages -#Include conf/extra/httpd-multilang-errordoc.conf - -# Fancy directory listings -#Include conf/extra/httpd-autoindex.conf - -# Language settings -#Include conf/extra/httpd-languages.conf - -# User home directories -#Include conf/extra/httpd-userdir.conf - -# Real-time info on requests and configuration -#Include conf/extra/httpd-info.conf - -# Virtual hosts -#Include conf/extra/httpd-vhosts.conf - -# Local access to the Apache HTTP Server Manual -#Include conf/extra/httpd-manual.conf - -# Distributed authoring and versioning (WebDAV) -#Include conf/extra/httpd-dav.conf - -# Various default settings -#Include conf/extra/httpd-default.conf - -# Configure mod_proxy_html to understand HTML4/XHTML1 - -Include conf/extra/proxy-html.conf - - -# Secure (SSL/TLS) connections -#Include conf/extra/httpd-ssl.conf -# -# Note: The following must must be present to support -# starting without SSL on platforms with no /dev/random equivalent -# but a statically compiled-in mod_ssl. -# - -SSLRandomSeed startup builtin -SSLRandomSeed connect builtin - -# -# uncomment out the below to deal with user agents that deliberately -# violate open standards by misusing DNT (DNT *must* be a specific -# end-user choice) -# -# -#BrowserMatch "MSIE 10.0;" bad_DNT -# -# -#RequestHeader unset DNT env=bad_DNT -# - From 3904e925a41dfa14ff9f151e9560b270bf8d9433 Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Mon, 29 Jun 2015 13:04:29 -0500 Subject: [PATCH 04/17] added more dependencies --- dockerfiles/api/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dockerfiles/api/Dockerfile b/dockerfiles/api/Dockerfile index 0426db37..273876ff 100644 --- a/dockerfiles/api/Dockerfile +++ b/dockerfiles/api/Dockerfile @@ -1,3 +1,5 @@ +# MG-RAST API + FROM httpd:2.4.12 # MG-RAST dependencies @@ -6,6 +8,7 @@ RUN apt-get update && apt-get install -y \ libpq-dev \ make \ curl \ + r-base \ perl-modules \ liburi-perl \ libwww-perl \ @@ -40,10 +43,8 @@ RUN apt-get install -y \ libtemplate-perl RUN pip install gspread xlrd openpyxl lepl - RUN cd / && git clone https://github.com/MG-RAST/pipeline.git - RUN mkdir -p /sites/1/ && \ cd /sites/1/ && \ ln -s /MG-RAST/ From c9e34eb0c0dae9f959ee59f82a47616c715e2c3e Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Mon, 29 Jun 2015 13:07:10 -0500 Subject: [PATCH 05/17] added more dependencies --- dockerfiles/api/Dockerfile | 2 +- dockerfiles/web/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dockerfiles/api/Dockerfile b/dockerfiles/api/Dockerfile index 273876ff..e4542906 100644 --- a/dockerfiles/api/Dockerfile +++ b/dockerfiles/api/Dockerfile @@ -59,4 +59,4 @@ RUN mkdir -p /m5nr && \ ln -s /api-server-data/20131215 /m5nr/20131215 # Execute: -# /usr/local/apache2/bin/httpd -DFOREGROUND -f /MG-RAST/conf/httpd.conf +# /usr/local/apache2/bin/httpd -DFOREGROUND -f /api-server-conf/httpd.conf diff --git a/dockerfiles/web/Dockerfile b/dockerfiles/web/Dockerfile index 211c2887..79e0a1b2 100644 --- a/dockerfiles/web/Dockerfile +++ b/dockerfiles/web/Dockerfile @@ -44,7 +44,7 @@ RUN cd /sites/ && git clone https://github.com/MG-RAST/metazen.git && \ make # certificates need to be in daemon home directory -RUN ln -s /config/.postgresql/ /usr/sbin/.postgresql +RUN ln -s /config/postgresql/ /usr/sbin/.postgresql # docker run -t -i --name web -v /home/core/mgrast-config/services/web_v3/:/config -p 80:80 httpd:2.4.12 bash From be48ba33e0a85a5d407abe5b4735d973770c306b Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Mon, 29 Jun 2015 16:07:14 -0500 Subject: [PATCH 06/17] fix csv parser --- src/MGRAST/tools/BulkLoader/BulkLoader.java | 63 +++++++++++++-------- src/MGRAST/tools/BulkLoader/BulkLoader.sh | 2 +- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/MGRAST/tools/BulkLoader/BulkLoader.java b/src/MGRAST/tools/BulkLoader/BulkLoader.java index e0bb90cc..dfce18bc 100644 --- a/src/MGRAST/tools/BulkLoader/BulkLoader.java +++ b/src/MGRAST/tools/BulkLoader/BulkLoader.java @@ -8,8 +8,7 @@ import java.util.Arrays; import java.util.ArrayList; -import org.supercsv.io.CsvListReader; -import org.supercsv.prefs.CsvPreference; +import com.opencsv.CSVReader; import org.apache.cassandra.config.Config; import org.apache.cassandra.dht.Murmur3Partitioner; @@ -79,6 +78,23 @@ public static void main(String[] args) throws IOException { ") VALUES (" + "?, ?, ?, ?, ?, ?, ?, ?" + ")", keyspace, table); + } else if (table.equals("job_md5s")) { + schema = String.format("CREATE TABLE %s.%s (" + + "version int, " + + "job int, " + + "source text, " + + "md5 text, " + + "data frozen, " + + "accession list, " + + "function list, " + + "organism list, " + + "PRIMARY KEY ((version, job), source, md5) " + + ")", keyspace, table); + insert = String.format("INSERT INTO %s.%s (" + + "version, job, source, md5, data, accession, function, organism" + + ") VALUES (" + + "?, ?, ?, ?, ?, ?, ?, ?" + + ")", keyspace, table); } else { System.out.println("Unsupported table type: " + table); System.exit(1); @@ -107,35 +123,32 @@ public static void main(String[] args) throws IOException { // set cvs reader / parser try ( - BufferedReader reader = new BufferedReader(new FileReader(filename)); - CsvListReader csvReader = new CsvListReader(reader, CsvPreference.STANDARD_PREFERENCE); + CSVReader csvReader = new CSVReader(new FileReader(filename)); ) { - csvReader.getHeader(true); // skip the header - // Write to SSTable while reading data - List line; - while ((line = csvReader.read()) != null) { + String[] line; + while ((line = csvReader.readNext()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 if (table.equals("md5_id_annotation")) { - writer.addRow(Integer.parseInt(line.get(0)), - line.get(1), - line.get(2), - Boolean.valueOf(line.get(3)), - line.get(4), - parseList(line.get(5)), - parseList(line.get(6)), - parseList(line.get(7)), - parseList(line.get(8))); + writer.addRow(Integer.parseInt(line[0]), + line[1], + line[2], + Boolean.valueOf(line[3]), + line[4], + parseList(line[5]), + parseList(line[6]), + parseList(line[7]), + parseList(line[8])); } else if (table.equals("md5_annotation")) { - writer.addRow(line.get(0), - line.get(1), - Boolean.valueOf(line.get(2)), - line.get(3), - parseList(line.get(4)), - parseList(line.get(5)), - parseList(line.get(6)), - parseList(line.get(7))); + writer.addRow(line[0], + line[1], + Boolean.valueOf(line[2]), + line[3], + parseList(line[4]), + parseList(line[5]), + parseList(line[6]), + parseList(line[7])); } // Print nK lineNumber += 1; diff --git a/src/MGRAST/tools/BulkLoader/BulkLoader.sh b/src/MGRAST/tools/BulkLoader/BulkLoader.sh index 24a8c885..fc5f362d 100755 --- a/src/MGRAST/tools/BulkLoader/BulkLoader.sh +++ b/src/MGRAST/tools/BulkLoader/BulkLoader.sh @@ -6,7 +6,7 @@ TABLE='' INFILE='' OUTDIR='' JAVA=`which java` -JARS="/root/cassandra/lib /root/super-csv" +JARS="/root/cassandra/lib /root/opencsv" while getopts hk:t:i:o: option; do case "${option}" From d46fbae0ca4a3778a5087d9e613e02c6152649f0 Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Mon, 29 Jun 2015 16:07:30 -0500 Subject: [PATCH 07/17] update analysisDB schema --- src/MGRAST/Schema/mgrast_analysis_v4.cql | 24 ++++++- src/MGRAST/tools/dump_analysis_for_cass.pl | 79 ++++++++++------------ 2 files changed, 57 insertions(+), 46 deletions(-) diff --git a/src/MGRAST/Schema/mgrast_analysis_v4.cql b/src/MGRAST/Schema/mgrast_analysis_v4.cql index 7d4bde93..9376df40 100644 --- a/src/MGRAST/Schema/mgrast_analysis_v4.cql +++ b/src/MGRAST/Schema/mgrast_analysis_v4.cql @@ -12,9 +12,15 @@ CREATE TYPE source_info ( CREATE TYPE md5_info ( abundance int, + exp_avg float, + ident_avg float, + len_avg float, + exp_stdv float, + ident_stdv float, + len_stdv float, seek bigint, length int, - annotation map> + is_protein boolean ); CREATE TYPE lca_info ( @@ -42,6 +48,7 @@ INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 60, 'foo' INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 70, 'foo', 'hello world'); INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 70, 'bar', 'hello world'); +### version 1 ### CREATE TABLE IF NOT EXISTS job_md5s ( version int, job int, @@ -70,6 +77,7 @@ CREATE TABLE IF NOT EXISTS job_lcas ( # see: http://docs.datastax.com/en/cql/3.1/cql/cql_using/use-slice-partition.html +### version 2 ### CREATE TABLE IF NOT EXISTS job_md5s ( version int, job int, @@ -107,3 +115,17 @@ CREATE TABLE IF NOT EXISTS job_lcas ( PRIMARY KEY ((version, job), exp_avg, ident_avg, len_avg, lca) ); +### version 3 ### +CREATE TABLE IF NOT EXISTS job_md5s ( + version int, + job int, + source text, + md5 text, + data frozen, + accession list, + function list, + organism list, + PRIMARY KEY ((version, job), source, md5) +); + + diff --git a/src/MGRAST/tools/dump_analysis_for_cass.pl b/src/MGRAST/tools/dump_analysis_for_cass.pl index 9b9c1ff8..9853c4c0 100755 --- a/src/MGRAST/tools/dump_analysis_for_cass.pl +++ b/src/MGRAST/tools/dump_analysis_for_cass.pl @@ -129,53 +129,42 @@ sub process_batch { next unless ($data->{$mid}); next if (int($ea) > 0); $ea = $ea * -1; - my $md5 = $data->{$mid}[0]{md5}; - my $acc = {}; - my $fun = {}; - my $org = {}; + my $md5 = $data->{$mid}[0]{md5}; + my $srcs = {}; + my $md5_info = { + "abundance" => $abund, + "exp_avg" => $ea, + "ident_avg" => $ia, + "len_avg" => $la, + "exp_stdv" => $es, + "ident_stdv" => $is, + "len_stdv" => $ls, + "seek" => defined($seek) ? $seek : "", + "length" => defined($len) ? $len : "", + "is_protein" => $prot ? 'true' : 'false' + }; + foreach my $ann (@{$data->{$mid}}) { - push @{ $acc->{$ann->{source}} }, cescape($ann->{accession} || ""); - push @{ $fun->{$ann->{source}} }, cescape($ann->{function} || ""); - push @{ $org->{$ann->{source}} }, cescape($ann->{organism} || ""); + next unless ($ann->{source}); + # source => [[ accession, function, organism ]] + $ann->{accession} =~ s/\'/''/g; + $ann->{function} =~ s/\'/''/g; + $ann->{organism} =~ s/\'/''/g; + push @{$srcs->{$ann->{source}}}, [ $ann->{accession}, $ann->{function}, $ann->{organism} ]; } - my $out = [ $version, $job, - $ea, $ia, $la, $md5, - $es, $is, $ls, - $abund, $seek, $len, - ($prot ? 'true' : 'false'), - cstring($acc), - cstring($fun), - cstring($org) - ]; - push @output, $out; - } - - return @output; -} - -sub cescape { - my ($text) = @_; - $text =~ s/\'/''/g; - $text =~ s/\"/\\"/g; - return $text; -} - -sub cstring { - my ($obj) = @_; - my $str = "{"; - foreach my $key (keys %$obj) { - my $has_data = 0; - foreach my $v (@{$obj->{$key}}) { - if ($v) { - $has_data = 1; - last; - } - } - if ($has_data) { - $str .= "'".$key."':[".join(",", map {"'".$_."'"} @{$obj->{$key}})."],"; + foreach my $src (keys %$srcs) { + my @acc = map { $_->[0] } @{$srcs->{$src}}; + my @fun = map { $_->[1] } @{$srcs->{$src}}; + my @org = map { $_->[2] } @{$srcs->{$src}}; + my $acc = "[".join(",", map { "'".$_."'" } @acc)."]"; + my $fun = "[".join(",", map { "'".$_."'" } @fun)."]"; + my $org = "[".join(",", map { "'".$_."'" } @org)."]"; + $acc =~ s/\"/\\"/g; + $fun =~ s/\"/\\"/g; + $org =~ s/\"/\\"/g; + my $out = [ $version, $job, $src, $md5, $json->encode($md5_info), $acc, $fun, $org ]; + push @output, $out; } } - chop $str; - $str .= "}"; - return $str; + return @output; } From 22f67e8c2ac0c6d8f99f74d68e3822ab16b4da80 Mon Sep 17 00:00:00 2001 From: Tobias Paczian Date: Tue, 30 Jun 2015 06:19:16 -0500 Subject: [PATCH 08/17] patched in v4 upload page --- src/MGRAST/html/MGRAST-frontpage.tmpl | 2 +- src/MGRAST/html/MGRAST.tmpl | 2 +- src/MGRAST/html/js/MetagenomeSearch.js | 2 +- src/MGRAST/lib/WebPage/Home.pm | 38 +--------------------- src/MGRAST/lib/WebPage/MetagenomeSearch.pm | 1 + 5 files changed, 5 insertions(+), 40 deletions(-) diff --git a/src/MGRAST/html/MGRAST-frontpage.tmpl b/src/MGRAST/html/MGRAST-frontpage.tmpl index 2c08eee2..76077232 100644 --- a/src/MGRAST/html/MGRAST-frontpage.tmpl +++ b/src/MGRAST/html/MGRAST-frontpage.tmpl @@ -8,7 +8,7 @@
- + diff --git a/src/MGRAST/html/MGRAST.tmpl b/src/MGRAST/html/MGRAST.tmpl index c6914940..d449bc10 100644 --- a/src/MGRAST/html/MGRAST.tmpl +++ b/src/MGRAST/html/MGRAST.tmpl @@ -14,7 +14,7 @@
- + diff --git a/src/MGRAST/html/js/MetagenomeSearch.js b/src/MGRAST/html/js/MetagenomeSearch.js index 77e3d759..747ec77c 100644 --- a/src/MGRAST/html/js/MetagenomeSearch.js +++ b/src/MGRAST/html/js/MetagenomeSearch.js @@ -1,4 +1,4 @@ -var api_url = 'http://api.metagenomics.anl.gov/1/metagenome?verbosity=mixs&'; +var api_url = RetinaConfig ? RetinaConfig.mgrast_api + "/metagenome?verbosity=mixs&" : 'http://api.metagenomics.anl.gov/1/metagenome?verbosity=mixs&'; var datastore = {}; var result = 'result'; // div where results are to be displayed var saved_params = {}; diff --git a/src/MGRAST/lib/WebPage/Home.pm b/src/MGRAST/lib/WebPage/Home.pm index c133c4af..ab7b52bd 100644 --- a/src/MGRAST/lib/WebPage/Home.pm +++ b/src/MGRAST/lib/WebPage/Home.pm @@ -117,7 +117,7 @@ function forward_to_search (e) { $content .= "
".$register."
"; $content .= "
".$contact."
"; $content .= "
".$help."
"; - $content .= "
".$upload."
"; + $content .= "
".$upload."
"; $content .= "
".$news."
"; $content .= ""; $content .= "
"; @@ -176,13 +176,7 @@ END $content .= "

cite MG-RAST

cite MG-RAST API

"; - #$content .= "

The Metagenomics RAST server - A public resource for the automatic phylogenetic and functional analysis of metagenomes F. Meyer, D. Paarmann, M. D'Souza, R. Olson , E. M. Glass, M. Kubal, T. Paczian , A. Rodriguez , R. Stevens, A. Wilke, J. Wilkening, R. A. Edwards
BMC Bioinformatics 2008, 9:386 -#, [article]

"; - - #$content .= "

powered by

"; - my $logos = ""; - # news - logos return $content; } @@ -198,36 +192,6 @@ sub speedometer { my ($speed, $mileage, $trip, $togo) = split(/\t/, $line); $content .= "
pipeline status
$speedbp/sspeed
$tripMbplast 30 days
$togoMbpin queue
$mileageMbptotal
"; - - # my @trip1 = split(//, $trip); -# while (scalar(@trip1) < 7) { -# unshift(@trip1, "0"); -# } -# $trip = '';# style="margin-left: 36px;" -# foreach my $t (@trip1) { -# $trip .= ""; -# } -# $trip .= '
$t
'; -# my @togo1 = split(//, $togo); -# while (scalar(@togo1) < 7) { -# unshift(@togo1, "0"); -# } -# $togo = '';# style="margin-left: 36px;" -# foreach my $t (@togo1) { -# $togo .= ""; -# } -# $togo .= '
$t
'; -# my @mileage1 = split(//, $mileage); -# while (scalar(@mileage1) < 7) { -# unshift(@mileage1, "0"); -# } -# $mileage = ''; -# foreach my $t (@mileage1) { -# $mileage .= ""; -# } -# $mileage .= '
$t
'; - -# $content .= "
$speedbp/sspeed
$tripMbplast 30 days
$togoMbpin queue
$mileageMbptotal
"; } return $content; diff --git a/src/MGRAST/lib/WebPage/MetagenomeSearch.pm b/src/MGRAST/lib/WebPage/MetagenomeSearch.pm index 91cff8e3..68da4ee6 100644 --- a/src/MGRAST/lib/WebPage/MetagenomeSearch.pm +++ b/src/MGRAST/lib/WebPage/MetagenomeSearch.pm @@ -235,6 +235,7 @@ sub output { my $scripts = qq~ + ~; + ~; } $content .= "
"; diff --git a/src/MGRAST/lib/WebPage/MetagenomeOverview.pm b/src/MGRAST/lib/WebPage/MetagenomeOverview.pm index e2d284ba..c4d40bfd 100644 --- a/src/MGRAST/lib/WebPage/MetagenomeOverview.pm +++ b/src/MGRAST/lib/WebPage/MetagenomeOverview.pm @@ -146,7 +146,7 @@ sub output { my $mddb = $self->data('mddb'); my $user = $self->application->session->user; my $job_id = $job->job_id; - my $mg_link = "http://metagenomics.anl.gov/linkin.cgi?metagenome=$mgid"; + my $mg_link = $Conf::cgi_url."linkin.cgi?metagenome=$mgid"; # get project information my $project_link = ""; diff --git a/src/MGRAST/lib/WebPage/MetagenomeProject.pm b/src/MGRAST/lib/WebPage/MetagenomeProject.pm index fdb88cae..9f4c55b5 100644 --- a/src/MGRAST/lib/WebPage/MetagenomeProject.pm +++ b/src/MGRAST/lib/WebPage/MetagenomeProject.pm @@ -129,7 +129,7 @@ sub output { $self->{is_editor} = 1; } - my $proj_link = "http://metagenomics.anl.gov/linkin.cgi?project=".$self->{project_id}; + my $proj_link = $Conf::cgi_url."linkin.cgi?project=".$self->{project_id}; $html .= "

".$project->name.(($user and $user->has_right(undef, 'edit', 'user', '*')) ? " (ID ".$project->id.")": "")."

"; $html .= "

"; $html .= ""; diff --git a/src/MGRAST/lib/WebPage/PublishGenome.pm b/src/MGRAST/lib/WebPage/PublishGenome.pm index 170f1a3c..75233fdf 100644 --- a/src/MGRAST/lib/WebPage/PublishGenome.pm +++ b/src/MGRAST/lib/WebPage/PublishGenome.pm @@ -10,6 +10,7 @@ use JSON; use LWP::UserAgent; use HTTP::Request; use Data::Dumper; +use Conf; use WebConfig; use base qw( WebPage ); @@ -62,10 +63,10 @@ sub init { } # api info for making public - $self->data('api', "http://api.metagenomics.anl.gov"); + $self->data('api', $Conf::api_url || "http://api.metagenomics.anl.gov"); $self->data('job', $job); - $self->data('linkin', "http://metagenomics.anl.gov/linkin.cgi?metagenome=$id"); + $self->data('linkin', $Conf::cgi_url."linkin.cgi?metagenome=$id"); } diff --git a/src/MGRAST/lib/resources/validation.pm b/src/MGRAST/lib/resources/validation.pm index 97468ce5..6652b9a0 100644 --- a/src/MGRAST/lib/resources/validation.pm +++ b/src/MGRAST/lib/resources/validation.pm @@ -515,7 +515,7 @@ sub reformat_template { use JSON; my $ua = LWP::UserAgent->new; my $json = new JSON; - my $data = $json->decode($ua->get('http://api.metagenomics.anl.gov/1/metadata/template')->content); + my $data = $json->decode($ua->get(($Conf::api_url || 'http://api.metagenomics.anl.gov/1/').'metadata/template')->content); my $template = { "name" => "mgrast", "label" => "MG-RAST", From 69deabaff3f5eb93bf78288b2eb82d675f3f735f Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Wed, 1 Jul 2015 12:11:49 -0500 Subject: [PATCH 16/17] update and re-org cassandra schema / loader --- src/MGRAST/Schema/m5nr_copy.cql.tt | 17 +++++++++++++++++ .../Schema/{m5nr.cql.tt => m5nr_table.cql.tt} | 2 +- src/MGRAST/tools/BulkLoader/BulkLoader.java | 4 ++-- src/MGRAST/tools/BulkLoader/BulkLoader.sh | 17 ++++++++--------- 4 files changed, 28 insertions(+), 12 deletions(-) create mode 100644 src/MGRAST/Schema/m5nr_copy.cql.tt rename src/MGRAST/Schema/{m5nr.cql.tt => m5nr_table.cql.tt} (98%) diff --git a/src/MGRAST/Schema/m5nr_copy.cql.tt b/src/MGRAST/Schema/m5nr_copy.cql.tt new file mode 100644 index 00000000..04aa3ed9 --- /dev/null +++ b/src/MGRAST/Schema/m5nr_copy.cql.tt @@ -0,0 +1,17 @@ + +use m5nr_v[% version %]; + +COPY ontologies (source, name, level1, level2, level3, level4) FROM '[% data_dir %]/m5nr_v[% version %].ontology.all'; +COPY ont_level1 (source, level1, name) FROM '[% data_dir %]/m5nr_v[% version %].ontology.level1'; +COPY ont_level2 (source, level2, name) FROM '[% data_dir %]/m5nr_v[% version %].ontology.level2'; +COPY ont_level3 (source, level3, name) FROM '[% data_dir %]/m5nr_v[% version %].ontology.level3'; +COPY ont_level4 (source, level4, name) FROM '[% data_dir %]/m5nr_v[% version %].ontology.level4'; + +COPY organisms_ncbi (name, tax_domain, tax_phylum, tax_class, tax_order, tax_family, tax_genus, tax_species, ncbi_tax_id) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.all'; +COPY tax_domain (tax_domain, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.domain'; +COPY tax_phylum (tax_phylum, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.phylum'; +COPY tax_class (tax_class, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.class'; +COPY tax_order (tax_order, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.order'; +COPY tax_family (tax_family, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.family'; +COPY tax_genus (tax_genus, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.genus'; +COPY tax_species (tax_species, name) FROM '[% data_dir %]/m5nr_v[% version %].taxonomy.species'; diff --git a/src/MGRAST/Schema/m5nr.cql.tt b/src/MGRAST/Schema/m5nr_table.cql.tt similarity index 98% rename from src/MGRAST/Schema/m5nr.cql.tt rename to src/MGRAST/Schema/m5nr_table.cql.tt index bbf9b079..04f94a3c 100644 --- a/src/MGRAST/Schema/m5nr.cql.tt +++ b/src/MGRAST/Schema/m5nr_table.cql.tt @@ -4,7 +4,7 @@ CREATE KEYSPACE IF NOT EXISTS m5nr_v[% version %] use m5nr_v[% version %]; -CREATE TABLE IF NOT EXISTS md5_id_annotation ( +CREATE TABLE IF NOT EXISTS id_annotation ( id int, source text, md5 text, diff --git a/src/MGRAST/tools/BulkLoader/BulkLoader.java b/src/MGRAST/tools/BulkLoader/BulkLoader.java index dfce18bc..5973a7f0 100644 --- a/src/MGRAST/tools/BulkLoader/BulkLoader.java +++ b/src/MGRAST/tools/BulkLoader/BulkLoader.java @@ -43,7 +43,7 @@ public static void main(String[] args) throws IOException { System.out.println("outdir: "+outdir); // Schema and Insert for bulk load - if (table.equals("md5_id_annotation")) { + if (table.equals("id_annotation")) { schema = String.format("CREATE TABLE %s.%s (" + "id int, " + "source text, " + @@ -130,7 +130,7 @@ public static void main(String[] args) throws IOException { while ((line = csvReader.readNext()) != null) { // We use Java types here based on // http://www.datastax.com/drivers/java/2.0/com/datastax/driver/core/DataType.Name.html#asJavaClass%28%29 - if (table.equals("md5_id_annotation")) { + if (table.equals("id_annotation")) { writer.addRow(Integer.parseInt(line[0]), line[1], line[2], diff --git a/src/MGRAST/tools/BulkLoader/BulkLoader.sh b/src/MGRAST/tools/BulkLoader/BulkLoader.sh index 5c01c4e2..12a5a655 100755 --- a/src/MGRAST/tools/BulkLoader/BulkLoader.sh +++ b/src/MGRAST/tools/BulkLoader/BulkLoader.sh @@ -1,17 +1,18 @@ #!/bin/sh HELP=0 +CASS_DIR='' KEYSPACE='' TABLE='' INFILE='' OUTDIR='' JAVA=`which java` -JARS="/root/cassandra/lib/*" -while getopts hk:t:i:o: option; do +while getopts hc:k:t:i:o: option; do case "${option}" in h) HELP=1;; + c) CASS_DIR=${OPTARG};; k) KEYSPACE=${OPTARG};; t) TABLE=${OPTARG};; i) INFILE=${OPTARG};; @@ -19,7 +20,7 @@ while getopts hk:t:i:o: option; do esac done -USAGE="Usage: BulkLoader.sh [-h] -k -t
Visibility".($project->public ? 'Public' : 'Private')."
-i -o " +USAGE="Usage: BulkLoader.sh [-h] -c -k -t
-i -o " # check options if [ $HELP -eq 1 ]; then @@ -37,16 +38,14 @@ if [ ! -f "$INFILE" ]; then exit fi if [ -z "$OUTDIR" ]; then - OUTDIR=/mnt/sstable + OUTDIR=/data/sstable fi - -# check env -if [ -z "$CASSANDRA_CONFIG" ]; then - CASSANDRA_CONFIG=/root/cassandra/conf/cassandra.yaml +if [ -z "$CASS_DIR" ]; then + CASS_DIR=/opt/cassandra fi # set classpath -CLASSPATH=".:$CASSANDRA_CONFIG:$JARS" +CLASSPATH=".:$CASS_DIR/conf/cassandra.yaml:$CASS_DIR/lib/*" # Compile echo "compile: javac -cp $CLASSPATH BulkLoader.java" From 3a36f712e52fcb77de6296e697ec4fb56f93e313 Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Wed, 1 Jul 2015 13:44:20 -0500 Subject: [PATCH 17/17] fix perl warnings about undefined variable --- src/MGRAST/lib/WebPage/Analysis.pm | 4 ++-- src/PPO/DBObjectCache.pm | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/MGRAST/lib/WebPage/Analysis.pm b/src/MGRAST/lib/WebPage/Analysis.pm index 6e57ac68..1a558cae 100644 --- a/src/MGRAST/lib/WebPage/Analysis.pm +++ b/src/MGRAST/lib/WebPage/Analysis.pm @@ -6603,8 +6603,8 @@ sub selectable_metagenomes { push @{$collections->{$name}}, [ $pj->{metagenome_id}, $pj->{name} ]; } foreach my $coll ( sort keys %$collections ) { - if ( @{$collections->{$coll}} == 0 ) { next; } - push(@$colls, { label => $coll." [".scalar(@{$collections->{$coll}})."]", value => join('||', map { $_->[0]."##".$_->[1] } @{$collections->{$coll}}) }); + if ( (! $coll) || (! $collections->{$coll}) || (@{$collections->{$coll}} == 0) ) { next; } + push(@$colls, { label => $coll." [".scalar(@{$collections->{$coll}})."]", value => join('||', map { ($_->[0] || "")."##".($_->[1] || "") } @{$collections->{$coll}}) }); } } diff --git a/src/PPO/DBObjectCache.pm b/src/PPO/DBObjectCache.pm index 3493b89b..94f67102 100644 --- a/src/PPO/DBObjectCache.pm +++ b/src/PPO/DBObjectCache.pm @@ -137,7 +137,7 @@ sub object_to_cache { } my $id = $object->_id(); - unless (ref $self->{'_cache'}->{$backend}->{$db}->{$class}->{$id}) { + unless ($id && ref($self->{'_cache'}->{$backend}->{$db}->{$class}->{$id})) { $self->{'_cache'}->{$backend}->{$db}->{$class}->{$id} = $object; if (scalar(@{$self->{'recent'}}) > SIZE) { @@ -216,8 +216,7 @@ sub delete_object { my $class = $object->_class(); my $id = $object->_id(); - if (exists $self->{'_cache'}->{$backend}->{$db}->{$class} and - exists $self->{'_cache'}->{$backend}->{$db}->{$class}->{$id}) { - delete $self->{'_cache'}->{$backend}->{$db}->{$class}->{$id}; + if ($id && exists($self->{'_cache'}->{$backend}->{$db}->{$class}) && exists($self->{'_cache'}->{$backend}->{$db}->{$class}->{$id})) { + delete $self->{'_cache'}->{$backend}->{$db}->{$class}->{$id}; } }