diff --git a/Makefile b/Makefile index 3d8c450..a4b64f7 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ LOC_INCLUDE=include LOC_SRC=src OBJDIR=obj -CXXFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE) \ +CXXFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE) -I$(MYINCL) \ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ @@ -59,6 +59,7 @@ $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h # dependencies between .o files and .cc (or .c) files $(OBJDIR)/gqf.o: $(LOC_SRC)/cqf/gqf.c $(LOC_INCLUDE)/cqf/gqf.h +$(OBJDIR)/query.o: $(LOC_SRC)/sockets.h # # generic build rules diff --git a/README.md b/README.md index 2cf0349..bb56e5b 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ OPTIONS Prefix of input files. ``` - The command takes the following options : + The command takes the following options: - `--input-prefix,-p`: the directory where the output of coloreddbg command is present. - `--output,-o`: the file where the query results should be written (default : `samples.output`). @@ -94,10 +94,51 @@ OPTIONS - query transcripts: input transcripts to be queried. Finally, rather than writing the results in the "simple" output format, they can be written in JSON if you - provide the `--json,-j` flag to the `query` comamnd. + provide the `--json,-j` flag to the `query` command. The output file in contains the list of experiments (i.e., hits) corresponding to each queried transcript. +Server +------- + +`mantis server` lets you run mantis as a server which loads a mantis index and waits for queries. + +```bash + $ make mantis + $ ./mantis server -p raw/ +``` + +The options and arguments are as follows: + +```bash +SYNOPSIS + mantis server [-j] -p + +OPTIONS + -j, --json Write the output in JSON format + + + Prefix of input files. +``` + + The command takes the following options: + - `--input-prefix,-p`: the directory where the output of coloreddbg command is present. + + Rather than writing the results in the "simple" output format, they can be written in JSON if you + provide the `--json,-j` flag to the `query` command. + + The server loads mantis index to memory and starts to accept queries (port 23901 is used by default) in the following format: + +``` + +``` + + The query has the following arguments: + - query filepath: input transcripts to be queried. + - output filepath: the file where the query results should be written. The output file in contains the list of experiments (i.e., hits) corresponding to each queried transcript. + + There is a client example written in python `src/client.py` which demonstrates how one can communicate with the server. + Contributing ------------ Contributions via GitHub pull requests are welcome. diff --git a/scripts/install_mantis_server.sh b/scripts/install_mantis_server.sh new file mode 100755 index 0000000..df2ea7f --- /dev/null +++ b/scripts/install_mantis_server.sh @@ -0,0 +1,14 @@ +#!/bin/bash +. load_deps_spack.sh # load mantis dependencies with spack and save environment variables + +TOOLS=~/tools +mkdir -p $TOOLS/src +git clone https://github.com/simongog/sdsl-lite.git $TOOLS/src/sdsl-lite +$TOOLS/src/sdsl-lite/install.sh $TOOLS + +git clone https://github.com/effect/mantis.git $TOOLS/mantis + +LIBRARY_PATH=$TOOLS/lib:$LIBRARY_PATH +./patch_makefile.sh $TOOLS/mantis/Makefile +cd $TOOLS/mantis +MYINCL=$TOOLS/include make NH=1 diff --git a/scripts/libpaths.sh b/scripts/libpaths.sh new file mode 100755 index 0000000..91fc548 --- /dev/null +++ b/scripts/libpaths.sh @@ -0,0 +1,6 @@ +#!/bin/bash +IFS=":" +for line in $LIBRARY_PATH; do + echo -L$line '\\' +done + diff --git a/scripts/load_deps_spack.sh b/scripts/load_deps_spack.sh new file mode 100755 index 0000000..e0e2ddf --- /dev/null +++ b/scripts/load_deps_spack.sh @@ -0,0 +1,7 @@ +#!/bin/bash +spack load gcc +spack load cmake +spack load boost +spack load zlib +spack load bzip2 +spack load binutils diff --git a/scripts/patch_makefile.sh b/scripts/patch_makefile.sh new file mode 100755 index 0000000..c7caed8 --- /dev/null +++ b/scripts/patch_makefile.sh @@ -0,0 +1,6 @@ +#!/bin/bash +MAKEFILE=$1 +LIBPATHS="$(./libpaths.sh)" +LIBPATHS="$(echo "$LIBPATHS" | sed '$ ! s/$/\\/')" # escape new lines +sed -i "/TARGETS=/aMYLIBS=$LIBPATHS" $MAKEFILE # add MYLIBS var with all libraries path +sed -i "s/LDFLAGS += /LDFLAGS += \$\(MYLIBS\) /g" $MAKEFILE # add MYLIBS to LDFLAGS diff --git a/src/client.py b/src/client.py new file mode 100644 index 0000000..80f4fb8 --- /dev/null +++ b/src/client.py @@ -0,0 +1,17 @@ +import socket +import sys + +HOST, PORT = "localhost", 23901 + +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +s.connect((HOST, PORT)) + +query_filepath = "/home/paf2023/run_mantis/queries/query{0}.fa" +output_filepath = "/home/paf2023/run_mantis/out/out{0}.res" + +for x in range(0, 5): + print("Send request {0}".format(query_filepath.format(x))) + s.send(query_filepath.format(x) + " " + output_filepath.format(x)) + result = str(s.recv(4096)) + print("Got result {0}".format(result)) diff --git a/src/mantis.cc b/src/mantis.cc index b80ca6d..0a3023b 100644 --- a/src/mantis.cc +++ b/src/mantis.cc @@ -46,6 +46,7 @@ void explore_options_verbose(T& res) { } int query_main (QueryOpts& opt); +int server_main (QueryOpts& opt); int build_main (BuildOpts& opt); int validate_main (ValidateOpts& opt); @@ -57,16 +58,18 @@ int validate_main (ValidateOpts& opt); */ int main ( int argc, char *argv[] ) { using namespace clipp; - enum class mode {build, query, validate, help}; + enum class mode {build, query, server, validate, help}; mode selected = mode::help; auto console = spdlog::stdout_color_mt("mantis_console"); BuildOpts bopt; - QueryOpts qopt; + QueryOpts qopt, sopt; ValidateOpts vopt; + bopt.console = console; qopt.console = console; + sopt.console = console; vopt.console = console; start_time = time(NULL); @@ -145,6 +148,11 @@ int main ( int argc, char *argv[] ) { option("-o", "--output") & value("output_file", qopt.output) % "Where to write query output.", value(ensure_file_exists, "query", qopt.query_file) % "Prefix of input files." ); + auto server_mode = ( + command("server").set(selected, mode::server), + option("-j", "--json").set(sopt.use_json) % "Write the output in JSON format", + required("-p", "--input-prefix") & value(ensure_dir_exists, "query_prefix", sopt.prefix) % "Prefix of input files." // , + ); auto validate_mode = ( command("validate").set(selected, mode::validate), @@ -155,11 +163,12 @@ int main ( int argc, char *argv[] ) { ); auto cli = ( - (build_mode | query_mode | validate_mode | command("help").set(selected,mode::help) ), + (build_mode | query_mode | server_mode | validate_mode | command("help").set(selected,mode::help) ), option("-v", "--version").call([]{std::cout << "version 1.0\n\n";}).doc("show version") ); assert(build_mode.flags_are_prefix_free()); assert(query_mode.flags_are_prefix_free()); + assert(server_mode.flags_are_prefix_free()); assert(validate_mode.flags_are_prefix_free()); decltype(parse(argc, argv, cli)) res; @@ -178,6 +187,7 @@ int main ( int argc, char *argv[] ) { switch(selected) { case mode::build: build_main(bopt); break; case mode::query: query_main(qopt); break; + case mode::server: server_main(sopt); break; case mode::validate: validate_main(vopt); break; case mode::help: std::cout << make_man_page(cli, "mantis"); break; } diff --git a/src/query.cc b/src/query.cc index 0cc39b8..623cef3 100644 --- a/src/query.cc +++ b/src/query.cc @@ -53,13 +53,15 @@ #include "CLI/CLI.hpp" #include "CLI/Timer.hpp" +#include "sockets.h" + #define MAX_NUM_SAMPLES 2600 #define OUTPUT_FILE "samples.output" -void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, +void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, std::ofstream& opfile) { mantis::QueryResults qres; - uint32_t cnt= 0; + uint32_t cnt= 0; { CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; //size_t qctr{0}; @@ -77,10 +79,10 @@ void output_results(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, +void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, std::ofstream& opfile) { mantis::QueryResults qres; - uint32_t cnt= 0; + uint32_t cnt= 0; { CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; opfile << "[\n"; @@ -106,99 +108,127 @@ void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) { + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + + uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; + console->info("Use colored dbg with {} k-mers and {} color classes", + cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("K-mer size: {}", kmer_size); + console->info("Query file: {}", query_file); + console->info("Output file: {}", output_file); + + mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), + seed, + cdbg.range(), + kmer_size, + total_kmers); + console->info("Total k-mers to query: {}", total_kmers); + + std::ofstream opfile(output_file); + console->info("Querying the colored dbg."); + + if (use_json) { + output_results_json(multi_kmers, cdbg, opfile); + } else { + output_results(multi_kmers, cdbg, opfile); + } + + opfile.close(); + console->info("Writing done."); +} + /* * === FUNCTION ============================================================= - * Name: main + * Name: query main * Description: * ============================================================================ */ -int query_main (QueryOpts& opt) +int query_main(QueryOpts& opt) { - //CLI::App app("Mantis query"); - std::string prefix = opt.prefix; std::string query_file = opt.query_file; std::string output_file = opt.output;//{"samples.output"}; bool use_json = opt.use_json; - /* - app.add_option("-i,--input-prefix", prefix, "Prefix of input files.")->required(); - app.add_option("-o,--outout", output_file, "Where to write query output."); - app.add_option("query", query_file, "Prefix of input files.")->required(); - app.add_flag("-j,--json", use_json, "Write the output in JSON format"); - CLI11_PARSE(app, argc, argv); - */ - // Make sure the prefix is a full folder if (prefix.back() != '/') { prefix.push_back('/'); } spdlog::logger* console = opt.console.get(); - console->info("Reading colored dbg from disk."); - - std::string cqf_file(prefix + CQF_FILE); - std::string sample_file(prefix + SAMPLEID_FILE); - std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), - std::string(EQCLASS_FILE).c_str()); - - ColoredDbg*>, KeyObject> cdbg(cqf_file, - eqclass_files, - sample_file); - uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; - console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + console->info("Reading colored dbg from disk."); - //cdbg.get_cqf()->dump_metadata(); - //CQF cqf(query_file, false); - //CQF::Iterator it = cqf.begin(1); - //mantis::QuerySet input_kmers; - //do { - //KeyObject k = *it; - //input_kmers.insert(k.key); - //++it; - //} while (!it.done()); - - //mantis::QuerySets multi_kmers; - //multi_kmers.push_back(input_kmers); - - console->info("Reading query kmers from disk."); - uint32_t seed = 2038074743; - uint64_t total_kmers = 0; - mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), - kmer_size, - total_kmers); - console->info("Total k-mers to query: {}", total_kmers); - - // Attempt to optimize bulk query - /* - bool doBulk = multi_kmers.size() >= 100; - BulkQuery bq; - if (doBulk) { - uint32_t exp_id{0}; - for (auto& kmers : multi_kmers) { - for (auto& k : kmers) { - bq.qs.insert(k); - bq.map[k].push_back(exp_id); - } - ++exp_id; + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + run_query(query_file, output_file, cdbg, console, use_json); + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ + + +/* + * === FUNCTION ============================================================= + * Name: server main + * Description: + * ============================================================================ + */ +int server_main(QueryOpts& opt) +{ + try { + std::string prefix = opt.prefix; + bool use_json = opt.use_json; + + // Make sure the prefix is a full folder + if (prefix.back() != '/') { + prefix.push_back('/'); } - } - */ - std::ofstream opfile(output_file); - console->info("Querying the colored dbg."); + spdlog::logger* console = opt.console.get(); + console->info("Reading colored dbg from disk: " + prefix); - if (use_json) { - output_results_json(multi_kmers, cdbg, opfile); - } else { - output_results(multi_kmers, cdbg, opfile); + std::string cqf_file(prefix + CQF_FILE); + std::string sample_file(prefix + SAMPLEID_FILE); + std::vector eqclass_files = mantis::fs::GetFilesExt(prefix.c_str(), + std::string(EQCLASS_FILE).c_str()); + ColoredDbg*>, KeyObject> cdbg(cqf_file, + eqclass_files, + sample_file); + + boost::asio::io_service io_service; + server s(io_service, 23901, cdbg, console, use_json); + + console->info("Reading query kmers from disk."); + console->info("Run server accepting queries."); + io_service.run(); + + // read from stdin instead of socket + // while (1) { + // std::string query_file = opt.query_file; + // std::string output_file = opt.output; //{"samples.output"}; + + // cout << "Enter query file: " << endl; + // cin >> query_file; + // cout << "Enter output file: " << endl; + // cin >> output_file; + + // run_query(query_file, output_file, cdbg, console, use_json); + // } } - //std::cout << "Writing samples and abundances out." << std::endl; - opfile.close(); - console->info("Writing done."); + catch (std::exception& e) { + std::cerr << "Exception: " << e.what() << "\n"; + } + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ - return EXIT_SUCCESS; -} /* ---------- end of function main ---------- */ diff --git a/src/sockets.h b/src/sockets.h new file mode 100644 index 0000000..7cf685b --- /dev/null +++ b/src/sockets.h @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include + +using boost::asio::ip::tcp; + +extern void run_query(std::string query_file, std::string output_file, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json); + +class session + : public std::enable_shared_from_this +{ +public: + session(tcp::socket socket, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) + : socket_(std::move(socket)), + cdbg(cdbg), console(console), use_json(use_json) + { + } + + void start() + { + do_read(); + } + +private: + void do_read() + { + // read query file path and output file path + auto self(shared_from_this()); + socket_.async_read_some(boost::asio::buffer(data_, max_length), + [this, self](boost::system::error_code ec, std::size_t length) + { + char * delimeter_ptr = strstr(data_, " "); + if (!delimeter_ptr) + { + // TODO incorrect query format error. Call do_read? + return; + } + std::string query_filepath(data_, delimeter_ptr - data_); + std::string output_filepath(delimeter_ptr + 1); // assume exactly one space between filepaths + + run_query(query_filepath, output_filepath, cdbg, console, use_json); // TODO check result + + if (!ec) + { + do_write(length, output_filepath); + } + }); + } + + void do_write(std::size_t length, std::string outfile) + { + // respond with output filepath + auto self(shared_from_this()); + memset(data_, 0, sizeof(data_)); + strcpy(data_, outfile.c_str()); + boost::asio::async_write(socket_, boost::asio::buffer(data_, length), + [this, self](boost::system::error_code ec, std::size_t /*length*/) + { + if (!ec) + { + do_read(); + } + }); + } + + tcp::socket socket_; + enum { max_length = 4096 }; + char data_[max_length]; + + ColoredDbg*>, KeyObject>& cdbg; + spdlog::logger * console; + bool use_json; +}; + +class server +{ +public: + server(boost::asio::io_service& io_service, short port, + ColoredDbg*>, KeyObject>& cdbg, + spdlog::logger * console, + bool use_json) + : acceptor_(io_service, tcp::endpoint(tcp::v4(), port)), + socket_(io_service), + cdbg(cdbg), console(console), use_json(use_json) + { + do_accept(); + } + +private: + void do_accept() + { + acceptor_.async_accept(socket_, + [this](boost::system::error_code ec) + { + if (!ec) + { + std::make_shared(std::move(socket_), cdbg, console, use_json)->start(); + } + + do_accept(); + }); + } + + tcp::acceptor acceptor_; + tcp::socket socket_; + + ColoredDbg*>, KeyObject>& cdbg; + spdlog::logger * console; + bool use_json; +}; \ No newline at end of file