Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions 12 src/MGRAST/Schema/dump_analysis_for_cass.pl
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@
my @batch_set = ();
my $batch_count = 0;
my $md5_count = 0;
my $batch_num = 0;
while (my @row = $sth->fetchrow_array()) {
push @batch_set, \@row;
$batch_count += 1;
$md5_count += 1;
if ($batch_count == 1000) {
$batch_num += 1;
my @output = process_batch(\@batch_set, $m5nr."/m5nr_".$version."/select");
foreach my $line (@output) {
print DUMP join(",", map { '"'.$_.'"' } @$line)."\n";
Expand All @@ -79,7 +81,8 @@
}
}
if (@batch_set > 0) {
my @output = process_batch(\@batch_set, $m5nr."/m5nr_".$version);
$batch_num += 1;
my @output = process_batch(\@batch_set, $m5nr."/m5nr_".$version."/select");
foreach my $line (@output) {
print DUMP join(",", map { '"'.$_.'"' } @$line)."\n";
}
Expand Down Expand Up @@ -109,7 +112,7 @@ sub process_batch {
if ($@) {
# try again !!!
if ($try >= 3) {
print STDERR "Failed 3 times at md5 # $md5_count\n".$@."\n".$result->content."\n";
print STDERR "Failed 3 times at md5 $md5_count (batch $batch_num)\n".$@."\n".$result->content."\n";
$dbh->disconnect;
exit 1;
} else {
Expand All @@ -124,6 +127,8 @@ sub process_batch {
foreach my $set (@batch_set) {
my ($mid, $abund, $ea, $ia, $la, $es, $is, $ls, $seek, $len, $prot) = @$set;
next unless ($data->{$mid});
next if (int($ea) > 0);
$ea = $ea * -1;
my $md5 = $data->{$mid}[0]{md5};
my $acc = {};
my $fun = {};
Expand All @@ -134,7 +139,8 @@ sub process_batch {
push @{ $org->{$ann->{source}} }, cescape($ann->{organism} || "");
}
my $out = [ $version, $job,
$ea, $ia, $la, $md5, $es, $is, $ls,
$ea, $ia, $la, $md5,
$es, $is, $ls,
$abund, $seek, $len,
($prot ? 'true' : 'false'),
cstring($acc),
Expand Down
3 changes: 3 additions & 0 deletions 3 src/MGRAST/Schema/dump_annotation_for_cass.pl
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
$lca =~ s/\"/\\"/g;
}
foreach my $d (@$data) {
next unless ($d->[1]);
# source => [[ accession, function, organism ]]
$d->[2] =~ s/\'/''/g;
$d->[3] =~ s/\'/''/g;
Expand Down Expand Up @@ -183,6 +184,7 @@
$lca =~ s/\"/\\"/g;
}
foreach my $d (@$data) {
next unless ($d->[1]);
# source => [[ accession, function, organism ]]
$d->[2] =~ s/\'/''/g;
$d->[3] =~ s/\'/''/g;
Expand Down Expand Up @@ -211,6 +213,7 @@
my $mid = $data->[0][0];
my $srcs = {};
foreach my $d (@$data) {
next unless ($d->[1]);
# source => [[ accession, function ]]
$d->[2] =~ s/\'/''/g;
$d->[3] =~ s/\'/''/g;
Expand Down
20 changes: 19 additions & 1 deletion 20 src/MGRAST/Schema/m5nr_v4.cql
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@

use MGRAST_m5nr;
CREATE KEYSPACE IF NOT EXISTS m5nr_v1
WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 };

use m5nr_v1;

CREATE TABLE IF NOT EXISTS md5_id_annotation (
id int,
Expand All @@ -13,6 +16,7 @@ CREATE TABLE IF NOT EXISTS md5_id_annotation (
organism list<text>,
PRIMARY KEY (id, source)
);
COPY md5_id_annotation (id, source, md5, is_protein, single, lca, accession, function, organism) FROM '/mnt/src/m5nr_v1.annotation.id';

CREATE TABLE IF NOT EXISTS md5_annotation (
md5 text,
Expand All @@ -25,6 +29,7 @@ CREATE TABLE IF NOT EXISTS md5_annotation (
organism list<text>,
PRIMARY KEY (md5, source)
);
COPY md5_annotation (md5, source, is_protein, single, lca, accession, function, organism) FROM '/mnt/src/m5nr_v1.annotation.md5';

CREATE TABLE IF NOT EXISTS ontologies (
source text,
Expand All @@ -35,34 +40,39 @@ CREATE TABLE IF NOT EXISTS ontologies (
level4 text,
PRIMARY KEY (source, name)
);
COPY ontologies (source, name, level1, level2, level3, level4) FROM '/mnt/src/m5nr_v1.ontology.all';

CREATE TABLE IF NOT EXISTS ont_level1 (
source text,
level1 text,
name text,
PRIMARY KEY (source, level1, name)
);
COPY ont_level1 (source, level1, name) FROM '/mnt/src/m5nr_v1.ontology.level1';

CREATE TABLE IF NOT EXISTS ont_level2 (
source text,
level2 text,
name text,
PRIMARY KEY (source, level2, name)
);
COPY ont_level2 (source, level2, name) FROM '/mnt/src/m5nr_v1.ontology.level2';

CREATE TABLE IF NOT EXISTS ont_level3 (
source text,
level3 text,
name text,
PRIMARY KEY (source, level3, name)
);
COPY ont_level3 (source, level3, name) FROM '/mnt/src/m5nr_v1.ontology.level3';

CREATE TABLE IF NOT EXISTS ont_level4 (
source text,
level4 text,
name text,
PRIMARY KEY (source, level4, name)
);
COPY ont_level4 (source, level4, name) FROM '/mnt/src/m5nr_v1.ontology.level4';

CREATE TABLE IF NOT EXISTS organisms_ncbi (
name text,
Expand All @@ -76,45 +86,53 @@ CREATE TABLE IF NOT EXISTS organisms_ncbi (
ncbi_tax_id int,
PRIMARY KEY (name)
);
COPY organisms_ncbi (name, tax_domain, tax_phylum, tax_class, tax_order, tax_family, tax_genus, tax_species, ncbi_tax_id) FROM '/mnt/src/m5nr_v1.taxonomy.all';

CREATE TABLE IF NOT EXISTS tax_domain (
tax_domain text,
name text,
PRIMARY KEY (tax_domain, name)
);
COPY tax_domain (tax_domain, name) FROM '/mnt/src/m5nr_v1.taxonomy.domain';

CREATE TABLE IF NOT EXISTS tax_phylum (
tax_phylum text,
name text,
PRIMARY KEY (tax_phylum, name)
);
COPY tax_phylum (tax_phylum, name) FROM '/mnt/src/m5nr_v1.taxonomy.phylum';

CREATE TABLE IF NOT EXISTS tax_class (
tax_class text,
name text,
PRIMARY KEY (tax_class, name)
);
COPY tax_class (tax_class, name) FROM '/mnt/src/m5nr_v1.taxonomy.class';

CREATE TABLE IF NOT EXISTS tax_order (
tax_order text,
name text,
PRIMARY KEY (tax_order, name)
);
COPY tax_order (tax_order, name) FROM '/mnt/src/m5nr_v1.taxonomy.order';

CREATE TABLE IF NOT EXISTS tax_family (
tax_family text,
name text,
PRIMARY KEY (tax_family, name)
);
COPY tax_family (tax_family, name) FROM '/mnt/src/m5nr_v1.taxonomy.family';

CREATE TABLE IF NOT EXISTS tax_genus (
tax_genus text,
name text,
PRIMARY KEY (tax_genus, name)
);
COPY tax_genus (tax_genus, name) FROM '/mnt/src/m5nr_v1.taxonomy.genus';

CREATE TABLE IF NOT EXISTS tax_species (
tax_species text,
name text,
PRIMARY KEY (tax_species, name)
);
COPY tax_species (tax_species, name) FROM '/mnt/src/m5nr_v1.taxonomy.species';
27 changes: 25 additions & 2 deletions 27 src/MGRAST/Schema/mgrast_analysis_v4.cql
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@

use MGRAST_analysis;
CREATE KEYSPACE IF NOT EXISTS mgrast_analysis
WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 };

use mgrast_analysis;

CREATE TYPE source_info (
accession list<text>,
Expand All @@ -20,6 +23,25 @@ CREATE TYPE lca_info (
level int
);

CREATE TABLE job_test (
version int,
job int,
exp_avg float,
md5 text,
data text,
PRIMARY KEY ((version, job), exp_avg, md5)
);
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 10, 50, 'foo', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 10, 50, 'bar', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 10, 60, 'foo', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 10, 70, 'foo', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 10, 70, 'bar', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 50, 'foo', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 50, 'bar', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 60, 'foo', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 70, 'foo', 'hello world');
INSERT INTO job_test (version, job, exp_avg, md5, data) VALUES (1, 20, 70, 'bar', 'hello world');

CREATE TABLE IF NOT EXISTS job_md5s (
version int,
job int,
Expand Down Expand Up @@ -66,7 +88,8 @@ CREATE TABLE IF NOT EXISTS job_md5s (
function map<text, frozen<list<text>>>,
organism map<text, frozen<list<text>>>,
PRIMARY KEY ((version, job), exp_avg, ident_avg, len_avg, md5)
);
) WITH CLUSTERING ORDER BY (exp_avg DESC, ident_avg DESC, len_avg DESC);
COPY job_md5s (version, job, exp_avg, ident_avg, len_avg, md5, exp_stdv, ident_stdv, len_stdv, abundance, seek, length, is_protein, accession, function, organism) FROM '/mnt/src/job_data.1595';

CREATE TABLE IF NOT EXISTS job_lcas (
version int,
Expand Down
2 changes: 1 addition & 1 deletion 2 src/MGRAST/Schema/test_cass.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def main(args):
found = set()
for i in range(opts.count):
ints = random_array(opts.batch)
query = "SELECT * FROM annot_by_id WHERE id IN ("+",".join(map(str, ints))+");"
query = "SELECT * FROM md5_id_annotation WHERE id IN ("+",".join(map(str, ints))+");"
if mode == "sql":
cursor = handle.cursor()
cursor.execute(query)
Expand Down
86 changes: 43 additions & 43 deletions 86 src/MGRAST/lib/WebPage/Analysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2346,21 +2346,21 @@ sub single_visual {
push(@$vals, $exp_hash->{$key}->[$ii] || 0);
}
my $row = $spec_hash->{$key};
foreach my $r (@$row) {
if ($r =~ /derived/) {
(undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
}
}
#foreach my $r (@$row) {
# if ($r =~ /derived/) {
# (undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
# }
#}
push(@$expanded_data, [ @$row, $vals ] );
}
} else {
foreach my $row (@$data) {
next if ($tree_domain_filter && $tree_domain_filter ne $row->[1]);
foreach my $r (@$row) {
if ($r =~ /derived/) {
(undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
}
}
#foreach my $r (@$row) {
# if ($r =~ /derived/) {
# (undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
# }
#}
push(@$expanded_data, [ @$row[1..9] ] );
}
}
Expand Down Expand Up @@ -3168,21 +3168,21 @@ sub phylogeny_visual {
push(@$vals, $exp_hash->{$key}->[$ii] || 0);
}
my $row = $spec_hash->{$key};
foreach my $r (@$row) {
if ($r =~ /derived/) {
(undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
}
}
#foreach my $r (@$row) {
# if ($r =~ /derived/) {
# (undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
# }
#}
push(@$expanded_data, [ @$row, $vals ] );
}
} else {
foreach my $row (@$data) {
next if ($tree_domain_filter && $tree_domain_filter ne $row->[2]);
foreach my $r (@$row) {
if ($r =~ /derived/) {
(undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
}
}
#foreach my $r (@$row) {
# if ($r =~ /derived/) {
# (undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
# }
#}
push(@$expanded_data, [ @$row[2..10] ] );
}
}
Expand Down Expand Up @@ -5231,36 +5231,36 @@ sub lca_visual {
my $mg2num = {};

for (my $hh=0; $hh<scalar(@comp_mgs); $hh++) {
$mg2num->{$comp_mgs[$hh]} = $hh;
$mg2num->{$comp_mgs[$hh]} = $hh;
}
foreach my $row (@$data) {
$spec_hash->{$row->[8]} = [ @$row[1..8] ];
unless (exists($exp_hash->{$row->[8]})) {
$exp_hash->{$row->[8]} = [];
}
$exp_hash->{$row->[8]}->[$mg2num->{$row->[0]}] = $row->[9];
$spec_hash->{$row->[8]} = [ @$row[1..8] ];
unless (exists($exp_hash->{$row->[8]})) {
$exp_hash->{$row->[8]} = [];
}
$exp_hash->{$row->[8]}->[$mg2num->{$row->[0]}] = $row->[9];
}
foreach my $key (sort(keys(%$exp_hash))) {
my $vals = [];
for (my $ii=0; $ii<scalar(@comp_mgs); $ii++) {
push(@$vals, $exp_hash->{$key}->[$ii] || 0);
}
my $row = $spec_hash->{$key};
foreach my $r (@$row) {
if ($r =~ /derived/) {
(undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
}
}
push(@$expanded_data, [ @$row, $vals ] );
my $vals = [];
for (my $ii=0; $ii<scalar(@comp_mgs); $ii++) {
push(@$vals, $exp_hash->{$key}->[$ii] || 0);
}
my $row = $spec_hash->{$key};
#foreach my $r (@$row) {
# if ($r =~ /derived/) {
# (undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
# }
#}
push(@$expanded_data, [ @$row, $vals ] );
}
} else {
foreach my $row (@$data) {
foreach my $r (@$row) {
if ($r =~ /derived/) {
(undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
}
}
push(@$expanded_data, [ @$row[1..9] ] );
#foreach my $r (@$row) {
# if ($r =~ /derived/) {
# (undef, $r) = $r =~ /^(unclassified \(derived from )(.+)(\))$/;
# }
#}
push(@$expanded_data, [ @$row[1..9] ] );
}
}
@$expanded_data = sort { $b->[8] <=> $a->[8] } @$expanded_data;
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.