diff --git a/src/seqTools.cpp b/src/seqTools.cpp new file mode 100644 index 00000000..5f64c491 --- /dev/null +++ b/src/seqTools.cpp @@ -0,0 +1,68 @@ +// +// seqTools.cpp +// StochHMM +// +// Created by Paul Lott on 5/18/12. +// Copyright (c) 2012 University of California, Davis. All rights reserved. +// + +#include "seqTools.h" + +namespace StochHMM{ + + + sequence shuffle(sequence *seq){ + sequence shuffled_seq(ALPHA_NUM); + std::string strSeq=seq->getUndigitized(); + for(size_t i=0;igetTrack()); + + return shuffled_seq; + } + + sequence random_sequence(std::vector& freq, size_t length, track* tr){ + sequence random_seq(ALPHA_NUM); + + if (tr==NULL){ + std::cerr << "Track is not defined" << std::endl; + return random_seq; + } + + size_t alphaSize=tr->getAlphaSize(); + size_t freqSize=freq.size(); + if (alphaSize!=freqSize){ + std::cerr << "Frequency distribution size and Alphabet size must be the same." << std::endl; + return random_seq; + } + + //Create CDF of frequency distribution + std::vector > cdf; + double sum = 0.0; + for(size_t i=0;i val (sum, tr->getAlpha(i)); + cdf.push_back(val); + } + + //Generate random sequence + std::string random_string; + for(size_t j=0;j=val){ + random_string+=cdf[m].second; + break; + } + } + } + + random_seq.setSeq(random_string, tr); + return random_seq; + } +} \ No newline at end of file diff --git a/src/seqTools.h b/src/seqTools.h new file mode 100644 index 00000000..6e1c060d --- /dev/null +++ b/src/seqTools.h @@ -0,0 +1,39 @@ +// +// seqTools.h +// StochHMM +// +// Created by Paul Lott on 5/18/12. +// Copyright (c) 2012 University of California, Davis. All rights reserved. +// + +#ifndef StochHMM_seqTools_h +#define StochHMM_seqTools_h +#include +#include +#include +#include "sequence.h" +#include "hmm.h" +namespace StochHMM{ + sequence shuffle(sequence* seq); + + sequence random_sequence(std::vector const& freq, size_t , track*); + sequence random_sequence(emm*); + + sequence reverseComplement(); + sequence translate(); + + + void motifScoring(); + + void markov_length_distribution(model*); + + void markov_generate_sequence(model*); + +//Put in PWM scoring with options for set threshold or determine threshold through shuffling and calculation of FDR(give an FDR threshold) + + +} + + + +#endif