This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository psortb.
commit 65617d058a1a71778f2329bd817d42f00b3e7879 Author: Andreas Tille <[email protected]> Date: Wed Apr 12 14:49:03 2017 +0200 New upstream version 3.0.4+dfsg --- algorithm-hmm/src/include/config.h | 52 ---- algorithm-hmm/src/include/funcs.h | 350 ---------------------- algorithm-hmm/src/include/globals.h | 28 -- algorithm-hmm/src/include/gsi.h | 85 ------ algorithm-hmm/src/include/gsi64.h | 101 ------- algorithm-hmm/src/include/postprob.h | 55 ---- algorithm-hmm/src/include/structs.h | 565 ----------------------------------- 7 files changed, 1236 deletions(-) diff --git a/algorithm-hmm/src/include/config.h b/algorithm-hmm/src/include/config.h deleted file mode 100644 index fb89df2..0000000 --- a/algorithm-hmm/src/include/config.h +++ /dev/null @@ -1,52 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* config.h - * - * Configurable compile-time parameters in HMMER. - */ - -#ifndef CONFIGH_INCLUDED -#define CONFIGH_INCLUDED - -/* RAMLIMIT determines the point at which we switch from fast, - * full dynamic programming to slow, linear-memory divide and conquer - * dynamic programming algorithms. It is the minimum amount of available - * RAM on the systems the package will run on. It can be overridden - * from the Makefile. - * By default, we assume we have 32 Mb RAM available (per thread). - */ -#ifndef RAMLIMIT -#define RAMLIMIT 32 -#endif - -/* HMMER_NCPU determines the number of threads/processors that - * a threads version will parallelize across. This can be overridden - * by -DHMMER_NCPU=x in the Makefile, and by a setenv HMMER_NCPU x - * in the environment, and usually by a command line option. - * Usually we detect the number of processors dynamically, but - * on some systems (FreeBSD and Linux, notably), we can't. On - * these systems we assume 2 processors by default. That assumption - * can be overridden here if HMMER_NCPU is uncommented. - */ -/* #define HMMER_NCPU 4 */ - -#define INTSCALE 1000.0 /* scaling constant for floats to integer scores */ -#define MAXABET 20 /* maximum size of alphabet (4 or 20) */ -#define MAXCODE 23 /* maximum degenerate alphabet size (17 or 23) */ -#define MAXDCHLET 200 /* maximum # Dirichlet components in mixture prior */ -#define NINPUTS 4 /* number of inputs into structural prior */ -#define INFTY 987654321 /* infinity for purposes of integer DP cells */ -#define NXRAY 4 /* number of structural inputs */ -#define LOGSUM_TBL 20000 /* controls precision of ILogsum() */ -#define ALILENGTH 50 /* length of displayed alignment lines */ - -#endif /*CONFIGH_INCLUDED*/ - diff --git a/algorithm-hmm/src/include/funcs.h b/algorithm-hmm/src/include/funcs.h deleted file mode 100644 index a813d9f..0000000 --- a/algorithm-hmm/src/include/funcs.h +++ /dev/null @@ -1,350 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* funcs.h - * RCS $Id: funcs.h,v 1.1.1.1 2003/07/20 20:30:07 cspencer Exp $ - * - * Declarations of external functions in HMMER. - */ - -#ifndef FUNCSH_INCLUDED -#define FUNCSH_INCLUDED - -#include "config.h" -#include "structs.h" -#include "squid.h" -#include "msa.h" - -/* alphabet.c - * Configuration of global alphabet information - */ -extern void DetermineAlphabet(char **rseqs, int nseq); -extern void SetAlphabet(int type); -extern int SymbolIndex(char sym); -extern char *DigitizeSequence(char *seq, int L); -extern char *DedigitizeSequence(char *dsq, int L); -extern void DigitizeAlignment(MSA *msa, char ***ret_dsqs); -extern void P7CountSymbol(float *counters, char sym, float wt); -extern void DefaultGeneticCode(int *aacode); -extern void DefaultCodonBias(float *codebias); - -/* from core_algorithms.c - * Clean research/demonstration versions of basic algorithms. - */ -extern struct dpmatrix_s *AllocPlan7Matrix(int rows, int M, int ***xmx, - int ***mmx, int ***imx, int ***dmx); -extern struct dpshadow_s *AllocShadowMatrix(int rows, int M, char ***xtb, - char ***mtb, char ***itb, char ***dtb); -extern void FreePlan7Matrix(struct dpmatrix_s *mx); -extern void FreeShadowMatrix(struct dpshadow_s *tb); -extern int P7ViterbiSize(int L, int M); -extern int P7SmallViterbiSize(int L, int M); -extern int P7WeeViterbiSize(int L, int M); -extern float P7Forward(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s **ret_mx); -extern float P7Viterbi(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); -extern void P7ViterbiTrace(struct plan7_s *hmm, char *dsq, int L, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr); -extern float P7SmallViterbi(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr); -extern float P7ParsingViterbi(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); -extern float P7WeeViterbi(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); -extern float Plan7ESTViterbi(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s **ret_mx); -extern struct p7trace_s *P7ViterbiAlignAlignment(MSA *msa, struct plan7_s *hmm); -extern struct p7trace_s *ShadowTrace(struct dpshadow_s *tb, struct plan7_s *hmm, int L); -extern void PostprocessSignificantHit(struct tophit_s *ghit, struct tophit_s *dhit, struct p7trace_s *tr, struct plan7_s *hmm, char *dsq, int L, char *seqname, char *seqacc, char *seqdesc, int do_forward, float sc_override, int do_null2, struct threshold_s *thresh, int hmmpfam_mode); - - -/* from debug.c - * Debugging output of various sorts. - */ -extern char *Statetype(char st); -extern char *AlphabetType2String(int type); -extern void P7PrintTrace(FILE *fp, struct p7trace_s *tr, - struct plan7_s *hmm, char *dsq); -extern void P7PrintPrior(FILE *fp, struct p7prior_s *pri); -extern int TraceCompare(struct p7trace_s *t1, struct p7trace_s *t2); -extern int TraceVerify(struct p7trace_s *tr, int M, int N); - -/* - * from display.c - * Ian Holmes' functions for displaying HMMER2 data structures, especially - * for posterior probabilities in alignments. - */ -extern void DisplayPlan7Matrix(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s *mx); -extern void DisplayPlan7Posteriors(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, struct dpmatrix_s *backward, - struct p7trace_s *viterbi, struct p7trace_s *optacc); -extern void DisplayPlan7PostAlign(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, struct dpmatrix_s *backward, - struct p7trace_s **alignment, int A); - - -/* from emit.c - * Generation of sequences/traces from an HMM - */ -extern void EmitSequence(struct plan7_s *hmm, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr); -extern void EmitConsensusSequence(struct plan7_s *hmm, char **ret_seq, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr); -extern void StateOccupancy(struct plan7_s *hmm, float **ret_mp, float **ret_ip, float **ret_dp); - - -/* from emulation.c - * Interfaces between HMMER and other software packages - */ -extern void WriteProfile(FILE *fp, struct plan7_s *hmm, int do_xsw); - - -/* from histogram.c - * accumulation of scores - */ -extern struct histogram_s *AllocHistogram(int min, int max, int lumpsize); -extern void FreeHistogram(struct histogram_s *h); -extern void UnfitHistogram(struct histogram_s *h); -extern void AddToHistogram(struct histogram_s *h, float sc); -extern void PrintASCIIHistogram(FILE *fp, struct histogram_s *h); -extern void PrintXMGRHistogram(FILE *fp, struct histogram_s *h); -extern void PrintXMGRDistribution(FILE *fp, struct histogram_s *h); -extern void PrintXMGRRegressionLine(FILE *fp, struct histogram_s *h); -extern void EVDBasicFit(struct histogram_s *h); -extern int ExtremeValueFitHistogram(struct histogram_s *h, int censor, - float high_hint); -extern void ExtremeValueSetHistogram(struct histogram_s *h, float mu, float lambda, - float low, float high, int ndegrees); -extern int GaussianFitHistogram(struct histogram_s *h, float high_hint); -extern void GaussianSetHistogram(struct histogram_s *h, float mean, float sd); -extern double EVDDensity(float x, float mu, float lambda); -extern double EVDDistribution(float x, float mu, float lambda); -extern double ExtremeValueP (float x, float mu, float lambda); -extern double ExtremeValueP2(float x, float mu, float lambda, int N); -extern double ExtremeValueE (float x, float mu, float lambda, int N); -extern float EVDrandom(float mu, float lambda); -extern int EVDMaxLikelyFit(float *x, int *y, int n, - float *ret_mu, float *ret_lambda); -extern int EVDCensoredFit(float *x, int *y, int n, int z, float c, - float *ret_mu, float *ret_lambda); -extern void Lawless416(float *x, int *y, int n, float lambda, - float *ret_f, float *ret_df); -extern void Lawless422(float *x, int *y, int n, int z, float c, - float lambda, float *ret_f, float *ret_df); - -/* from hmmio.c - * Input/output (saving/reading) of models - */ -extern HMMFILE *HMMFileOpen(char *hmmfile, char *env); -extern int HMMFileRead(HMMFILE *hmmfp, struct plan7_s **ret_hmm); -extern void HMMFileClose(HMMFILE *hmmfp); -extern int HMMFileFormat(HMMFILE *hmmfp); -extern void HMMFileRewind(HMMFILE *hmmfp); -extern int HMMFilePositionByName(HMMFILE *hmmfp, char *name); -extern int HMMFilePositionByIndex(HMMFILE *hmmfp, int idx); -extern void WriteAscHMM(FILE *fp, struct plan7_s *hmm); -extern void WriteBinHMM(FILE *fp, struct plan7_s *hmm); - -/* masks.c - * Repetitive sequence masking. - */ -extern int XNU(char *dsq, int len); -extern float TraceScoreCorrection(struct plan7_s *hmm, struct p7trace_s *tr, char *dsq); - -/* mathsupport.c - * Much of this code deals with Dirichlet prior mathematics. - */ -extern int Prob2Score(float p, float null); -extern float Score2Prob(int sc, float null); -extern float Scorify(int sc); -extern double PValue(struct plan7_s *hmm, float sc); -extern float LogSum(float p1, float p2); -extern int ILogsum(int p1, int p2); -extern void LogNorm(float *vec, int n); -extern float Logp_cvec(float *cvec, int n, float *alpha); -extern void SampleDirichlet(float *alpha, int n, float *p); -extern float SampleGamma(float alpha); -extern void SampleCountvector(float *p, int n, int c, float *cvec); -extern float P_PvecGivenDirichlet(float *p, int n, float *alpha); - -/* from misc.c - * Miscellaneous functions with no home - */ -extern char *Getword(FILE *fp, int type); -extern char *Getline(char *s, int n, FILE *fp); -extern int SetAutocuts(struct threshold_s *thresh, struct plan7_s *hmm); - -/* from modelmakers.c - * Model construction algorithms - */ -extern void P7Handmodelmaker(MSA *msa, char **dsq, struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); -extern void P7Fastmodelmaker(MSA *msa, char **dsq, - float maxgap, struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); -extern void P7Maxmodelmaker(MSA *msa, char **dsq, - float maxgap, struct p7prior_s *prior, - float *null, float null_p1, float mpri, - struct plan7_s **ret_hmm, - struct p7trace_s ***ret_tr); - -/* from plan7.c - * Plan7 HMM structure support - */ -extern struct plan7_s *AllocPlan7(int M); -extern struct plan7_s *AllocPlan7Shell(void); -extern void AllocPlan7Body(struct plan7_s *hmm, int M); -extern void FreePlan7(struct plan7_s *hmm); -extern void ZeroPlan7(struct plan7_s *hmm); -extern void Plan7SetName(struct plan7_s *hmm, char *name); -extern void Plan7SetAccession(struct plan7_s *hmm, char *acc); -extern void Plan7SetDescription(struct plan7_s *hmm, char *desc); -extern void Plan7ComlogAppend(struct plan7_s *hmm, int argc, char **argv); -extern void Plan7SetCtime(struct plan7_s *hmm); -extern void Plan7SetNullModel(struct plan7_s *hmm, float null[MAXABET], float p1); -extern void P7Logoddsify(struct plan7_s *hmm, int viterbi_mode); -extern void Plan7Renormalize(struct plan7_s *hmm); -extern void Plan7RenormalizeExits(struct plan7_s *hmm); -extern void Plan7NakedConfig(struct plan7_s *hmm); -extern void Plan7GlobalConfig(struct plan7_s *hmm); -extern void Plan7LSConfig(struct plan7_s *hmm); -extern void Plan7SWConfig(struct plan7_s *hmm, float pentry, float pexit); -extern void Plan7FSConfig(struct plan7_s *hmm, float pentry, float pexit); -extern void PrintPlan7Stats(FILE *fp, struct plan7_s *hmm, char **dsq, - int nseq, struct p7trace_s **tr); -extern int DegenerateSymbolScore(float *p, float *null, int ambig); -extern void Plan9toPlan7(struct plan9_s *hmm, struct plan7_s **ret_plan7); - -/* - * from plan9.c - * Backwards compatibility for the Plan 9 data structures of HMMER 1.x - */ -extern struct plan9_s *P9AllocHMM(int M); -extern void P9ZeroHMM(struct plan9_s *hmm); -extern int P9FreeHMM(struct plan9_s *hmm); -extern void P9Renormalize(struct plan9_s *hmm); -extern void P9DefaultNullModel(float *null); - -/* - * from postprob.c - * Functions for working with posterior probabilities within alignments - */ -extern float P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, struct p7trace_s **ret_tr); -extern float P7Backward(char *dsq, int L, struct plan7_s *hmm, struct dpmatrix_s **ret_mx); -extern void P7EmitterPosterior(int L, struct plan7_s *hmm, struct dpmatrix_s *forward, - struct dpmatrix_s *backward, struct dpmatrix_s *mx); -extern float P7FillOptimalAccuracy(int L, int M, struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr); -extern void P7OptimalAccuracyTrace(int L, int M, struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, struct p7trace_s **ret_tr); -extern char *PostalCode(int L, struct dpmatrix_s *mx, struct p7trace_s *tr); - -/* from prior.c - * Dirichlet priors - */ -extern struct p7prior_s *P7AllocPrior(void); -extern struct p7prior_s *P7LaplacePrior(void); -extern struct p7prior_s *P7DefaultPrior(void); -extern struct p7prior_s *P7ReadPrior(char *prifile); -extern void P7FreePrior(struct p7prior_s *pri); -extern void PAMPrior(char *pamfile, struct p7prior_s *pri, float pamwgt); -extern void P7DefaultNullModel(float *null, float *ret_p1); -extern void P7ReadNullModel(char *rndfile, float *null, float *ret_p1); -extern void P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri); -extern void P7PriorifyTransitionVector(float *t, struct p7prior_s *prior, - float tq[MAXDCHLET]); -extern void P7PriorifyEmissionVector(float *vec, struct p7prior_s *pri, - int num, float eq[MAXDCHLET], - float e[MAXDCHLET][MAXABET], - float *ret_mix); - - -#ifdef HMMER_PVM -/* from pvm.c - * PVM Parallel Virtual Machine implementation - */ -extern void PVMSpawnSlaves(char *slave, int **ret_tid, int *ret_nslaves); -extern void PVMConfirmSlaves(int *slave_tid, int nslaves); -extern void PVMCheckSlaves(int *slave_tid, int nslaves); -extern void PVMKillSlaves(int *slave_tid, int nslaves); -extern int PVMPackString(char *s); -extern char * PVMUnpackString(void); -extern int PVMPackTrace(struct p7trace_s *tr); -extern struct p7trace_s *PVMUnpackTrace(void); -extern int PVMPackHMM(struct plan7_s *hmm); -extern struct plan7_s * PVMUnpackHMM(void); -#endif /*HMMER_PVM*/ - -#ifdef HMMER_THREADS -/* from threads.c - * POSIX threads implementation - */ -extern int ThreadNumber(void); -#endif /*HMMER_THREADS*/ - - -/* from tophits.c - * Support for keeping/sorting top scoring hit/alignment lists - */ -extern struct tophit_s *AllocTophits(int lumpsize); -extern void GrowTophits(struct tophit_s *h); -extern void FreeTophits(struct tophit_s *h); -extern struct fancyali_s *AllocFancyAli(void); -extern void FreeFancyAli(struct fancyali_s *ali); -extern void RegisterHit(struct tophit_s *h, double sortkey, - double pvalue, float score, - double motherp, float mothersc, - char *name, char *acc, char *desc, - int sqfrom, int sqto, int sqlen, - int hmmfrom, int hmmto, int hmmlen, - int domidx, int ndom, - struct fancyali_s *ali); -extern void GetRankedHit(struct tophit_s *h, int rank, - double *r_pvalue, float *r_score, - double *r_motherp, float *r_mothersc, - char **r_name, char **r_acc, char **r_desc, - int *r_sqfrom, int *r_sqto, int *r_sqlen, - int *r_hmmfrom, int *r_hmmto, int *r_hmmlen, - int *r_domidx, int *r_ndom, - struct fancyali_s **r_ali); -extern int TophitsMaxName(struct tophit_s *h); -extern void FullSortTophits(struct tophit_s *h); -extern void TophitsReport(struct tophit_s *h, double E, int nseq); - -/* from trace.c - * Support for traceback (state path) structure - */ -extern void P7AllocTrace(int tlen, struct p7trace_s **ret_tr); -extern void P7ReallocTrace(struct p7trace_s *tr, int tlen); -extern void P7FreeTrace(struct p7trace_s *tr); -extern void TraceSet(struct p7trace_s *tr, int tpos, char type, int idx, int pos); -extern struct p7trace_s **MergeTraceArrays(struct p7trace_s **t1, int n1, struct p7trace_s **t2, int n2); -extern void P7ReverseTrace(struct p7trace_s *tr); -extern void P7TraceCount(struct plan7_s *hmm, char *dsq, float wt, - struct p7trace_s *tr); -extern float P7TraceScore(struct plan7_s *hmm, char *dsq, struct p7trace_s *tr); -extern MSA *P7Traces2Alignment(char **dsq, SQINFO *sqinfo, float *wgt, - int nseq, int M, - struct p7trace_s **tr, int matchonly); -extern int TransitionScoreLookup(struct plan7_s *hmm, char st1, - int k1, char st2, int k2); -extern struct fancyali_s *CreateFancyAli(struct p7trace_s *tr, struct plan7_s *hmm, - char *dsq, char *name); -extern void PrintFancyAli(FILE *fp, struct fancyali_s *ali); -extern void TraceDecompose(struct p7trace_s *otr, struct p7trace_s ***ret_tr, - int *ret_ntr); -extern int TraceDomainNumber(struct p7trace_s *tr); -extern void TraceSimpleBounds(struct p7trace_s *tr, int *ret_i1, int *ret_i2, - int *ret_k1, int *ret_k2); -extern struct p7trace_s *MasterTraceFromMap(int *map, int M, int alen); -extern void ImposeMasterTrace(char **aseq, int nseq, struct p7trace_s *mtr, - struct p7trace_s ***ret_tr); - - -#endif /*FUNCSH_INCLUDED*/ diff --git a/algorithm-hmm/src/include/globals.h b/algorithm-hmm/src/include/globals.h deleted file mode 100644 index c9b028e..0000000 --- a/algorithm-hmm/src/include/globals.h +++ /dev/null @@ -1,28 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* globals.h - * Mon Nov 18 13:05:03 1996 - * - * Global variable definitions. - * This file may only be included in a main() .c file. - */ - -#ifndef __GLOBALS_H__ -#define __GLOBALS_H__ - -char Alphabet[MAXCODE]; /* ACGT, for instance */ -int Alphabet_type; /* hmmNUCLEIC or hmmAMINO */ -int Alphabet_size; /* uniq alphabet size: 4 or 20 */ -int Alphabet_iupac; /* total size of alphabet + IUPAC degen. */ -char Degenerate[MAXCODE][MAXABET]; -int DegenCount[MAXCODE]; - -#endif diff --git a/algorithm-hmm/src/include/gsi.h b/algorithm-hmm/src/include/gsi.h deleted file mode 100644 index 2f2a91c..0000000 --- a/algorithm-hmm/src/include/gsi.h +++ /dev/null @@ -1,85 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef GSIH_INCLUDED -#define GSIH_INCLUDED - -/* gsi.h - * Database indexing (GSI format support) - * RCS $Id: gsi.h,v 1.1.1.1 2003/07/20 20:30:07 cspencer Exp $ - * - * A GSI (generic sequence index) file is composed of - * recnum + nfiles + 1 records. Each record contains - * three fields; key, file number, and disk offset. - * Record 0 contains: - * [ "GSI" ] [ nfiles ] [ recnum ] - * Records 1..nfiles map file names to file numbers, and contain: - * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ] - * Records nfiles+1 to recnum+nfiles+1 provide disk offset - * and file number indices for every key: - * [ key ] [ file number ] [ offset] - * - * Because the file is binary, we take some (but not - * complete) care to improve portability amongst platforms. - * This means using network order integers (see ntohl()) - * and defining types for 16 and 32 bit integers. - * - * Because we use 32-bit offsets, ftell(), and fseek(), - * there is an implicit 2 Gb file size maximum. - * AFAIK neither ANSI C nor POSIX provide a portable solution - * to this problem. fsetpos(), fgetpos() use an - * opaque fpos_t datatype that we can't write portably - * to a disk file. Suggestions welcomed. - */ -#define GSI_KEYSIZE 32 /* keys are 32 bytes long */ -#define GSI_RECSIZE 38 /* 32 + 2 + 4 bytes */ -#define SQD_UINT16_MAX 65535 /* 2^16-1 */ -#define SQD_UINT32_MAX 4294967295U/* 2^32-1 */ - -struct gsi_s { - FILE *gsifp; /* open GSI index file */ - sqd_uint16 nfiles; /* number of files = 16 bit int */ - sqd_uint32 recnum; /* number of records = 32 bit int */ -}; -typedef struct gsi_s GSIFILE; - -struct gsikey_s { - char key[GSI_KEYSIZE]; - sqd_uint16 filenum; - sqd_uint32 offset; -}; -struct gsiindex_s { - char **filenames; - int *fmt; - sqd_uint16 nfiles; - - struct gsikey_s *elems; - int nkeys; -}; - - -/* from gsi.c - */ -extern GSIFILE *GSIOpen(char *gsifile); -extern int GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3); -extern int GSIGetOffset(GSIFILE *gsi, char *key, char *sqfile, - int *fmt, long *ret_offset); -extern void GSIClose(GSIFILE *gsi); -extern struct gsiindex_s *GSIAllocIndex(void); -extern void GSIFreeIndex(struct gsiindex_s *g); -extern void GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt); -extern void GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset); -extern void GSISortIndex(struct gsiindex_s *g); -extern void GSIWriteIndex(FILE *fp, struct gsiindex_s *g); -extern void GSIWriteHeader(FILE *fp, int nfiles, long nkeys); -extern int GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt); -extern int GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset); - -#endif /*GSIH_INCLUDED*/ diff --git a/algorithm-hmm/src/include/gsi64.h b/algorithm-hmm/src/include/gsi64.h deleted file mode 100644 index 6ccc172..0000000 --- a/algorithm-hmm/src/include/gsi64.h +++ /dev/null @@ -1,101 +0,0 @@ -/***************************************************************** - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - *****************************************************************/ - -#ifndef GSI64H_INCLUDED -#define GSI64H_INCLUDED -#ifdef USE_GSI64 - -/* gsi64.h - * Database indexing (GSI64 format support) - * CVS $Id: gsi64.h,v 1.1.1.1 2003/07/20 20:30:07 cspencer Exp $ - * - * A GSI64 (generic sequence index, 64 bit hack) file is composed of - * recnum + nfiles + 1 records. Each record contains - * three fields; key, file number, and disk offset. - * Record 0 contains: - * [ "GSI64" ] [ nfiles ] [ recnum ] - * Records 1..nfiles map file names to file numbers, and contain: - * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ] - * Records nfiles+1 to recnum+nfiles+1 provide disk offset - * and file number indices for every key: - * [ key ] [ file number ] [ offset] - * - * Because the file is binary, we take some (but not - * complete) care to improve portability amongst platforms. - * This means using network order integers (see ntohl()) - * and defining types for 16 and 64 bit integers. - * - * A short test program that verifies the sizes of these - * data types would be a good idea... - * - * Because we use 64-bit offsets, ftell64(), and fseek64(), - * we rely on the OS actually providing these. This is - * a temporary hack for human genome analysis. - */ -typedef unsigned long long sqd_uint64; /* 64 bit integer. */ - -#define GSI64_KEYSIZE 32 /* keys are 32 bytes long */ -#define GSI64_RECSIZE 42 /* 32 + 2 + 8 bytes */ -#define SQD_UINT16_MAX 65535 /* 2^16-1 */ -#define SQD_UINT64_MAX 18446744073709551615LU /* 2^64-1 */ - -struct gsi64_s { - FILE *gsifp; /* open GSI index file */ - sqd_uint16 nfiles; /* number of files = 16 bit int */ - sqd_uint64 recnum; /* number of records = 64 bit int */ -}; -typedef struct gsi64_s GSI64FILE; - -struct gsi64key_s { - char key[GSI64_KEYSIZE]; - sqd_uint16 filenum; - sqd_uint64 offset; -}; -struct gsi64index_s { - char **filenames; - int *fmt; - sqd_uint16 nfiles; - - struct gsi64key_s *elems; - sqd_uint64 nkeys; -}; - - - -/* if ntohl() and friends are not available, you - * can slip replacements in by providing sre_ntohl() - * functions. (i.e., there is a possible portability problem here.) - */ -#if 0 -#define sre_ntohl(x) ntohl(x); -#define sre_ntohs(x) ntohs(x); -#define sre_htonl(x) htonl(x); -#define sre_htons(x) htons(x); -#endif - -/* from gsi64.c - */ -extern GSI64FILE *GSI64Open(char *gsifile); -extern int GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3); -extern int GSI64GetOffset(GSI64FILE *gsi, char *key, char *sqfile, - int *fmt, long long *ret_offset); -extern void GSI64Close(GSI64FILE *gsi); -extern struct gsi64index_s *GSI64AllocIndex(void); -extern void GSI64FreeIndex(struct gsi64index_s *g); -extern void GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt); -extern void GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset); -extern void GSI64SortIndex(struct gsi64index_s *g); -extern void GSI64WriteIndex(FILE *fp, struct gsi64index_s *g); -extern void GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys); -extern int GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt); -extern int GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset); - -#endif /* USE_GSI64 */ -#endif /*GSIH_INCLUDED*/ diff --git a/algorithm-hmm/src/include/postprob.h b/algorithm-hmm/src/include/postprob.h deleted file mode 100644 index b09c036..0000000 --- a/algorithm-hmm/src/include/postprob.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************ - * Copyright (C) 1998 Ian Holmes ([email protected]) - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* postprob.h - * Author: Ian Holmes ([email protected], Jun 5 1998) - * Derived from core_algorithms.c (SRE, Nov 11 1996) - * Incorporated SRE, Sat Nov 6 09:07:02 1999 - * - * Functions for working with posterior probabilities, - * including unfussed "backwards" and "optimal accuracy" - * implementations. - */ - -#ifndef POSTPROB_INCLUDED -#define POSTPROB_INCLUDED - -#include "structs.h" -#include "config.h" -#include "funcs.h" -#include "squid.h" - -/* Extra algorithms to work with posterior probabilities. - */ - -extern float P7OptimalAccuracy(char *dsq, int L, struct plan7_s *hmm, - struct p7trace_s **ret_tr); - -extern float P7Backward(char *dsq, int L, struct plan7_s *hmm, - struct dpmatrix_s **ret_mx); - -extern void P7EmitterPosterior(int L, struct plan7_s *hmm, - struct dpmatrix_s *forward, - struct dpmatrix_s *backward, - struct dpmatrix_s *mx); - -extern float P7FillOptimalAccuracy(int L, int M, - struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, - struct p7trace_s **ret_tr); - -extern void P7OptimalAccuracyTrace(int L, int M, - struct dpmatrix_s *posterior, - struct dpmatrix_s *mx, - struct p7trace_s **ret_tr); - -#endif - diff --git a/algorithm-hmm/src/include/structs.h b/algorithm-hmm/src/include/structs.h deleted file mode 100644 index 90d0dc2..0000000 --- a/algorithm-hmm/src/include/structs.h +++ /dev/null @@ -1,565 +0,0 @@ -/************************************************************ - * HMMER - Biological sequence analysis with profile HMMs - * Copyright (C) 1992-1999 Washington University School of Medicine - * All Rights Reserved - * - * This source code is distributed under the terms of the - * GNU General Public License. See the files COPYING and LICENSE - * for details. - ************************************************************/ - -/* structs.h - * - * Data structures used in HMMER. - * Also, a few miscellaneous macros and global variable declarations. - * - * RCS $Id: structs.h,v 1.1.1.1 2003/07/20 20:30:07 cspencer Exp $ - */ - -#ifndef STRUCTSH_INCLUDED -#define STRUCTSH_INCLUDED - -#include "squid.h" -#include "config.h" -#include "ssi.h" - -/* Miscellaneous math macros used in the package - */ -#define sreLOG2(x) ((x) > 0 ? log(x) * 1.44269504 : -9999.) -#define sreEXP2(x) (exp((x) * 0.69314718 )) -#define SQR(x) ((x) * (x)) - -/* an idiom for determining a symbol's position in the array - * by pointer arithmetic. - * does no error checking, so caller must already be damned sure x is - * valid in the alphabet! - */ -#define SYMIDX(x) (strchr(Alphabet, (x)) - Alphabet) - -/* The symbol alphabet. - * Must deal with IUPAC degeneracies. Nondegenerate symbols - * come first in Alphabet[], followed by degenerate symbols. - * Nucleic alphabet also must deal with other common symbols - * like U (in RNA) and X (often misused for N). - * Example: - * Nucleic: "ACGTUNRYMKSWHBVDX" size=4 iupac=17 - * Amino: "ACDEFGHIKLMNPQRSTVWYBZX" size=20 iupac=23 - * - * Parts of the code assume that the last symbol is a - * symbol for an unknown residue, i.e. 'X'. - * - * MAXCODE and MAXABET constants are defined in config.h - */ -extern char Alphabet[MAXCODE]; /* "ACDEFGHIKLMNPQRSTVWYBZX" for example */ -extern int Alphabet_type; /* hmmNUCLEIC or hmmAMINO */ -extern int Alphabet_size; /* uniq alphabet size: 4 or 20 */ -extern int Alphabet_iupac; /* total size of alphabet + IUPAC degen. */ -extern char Degenerate[MAXCODE][MAXABET]; -extern int DegenCount[MAXCODE]; -#define hmmNOTSETYET 0 -#define hmmNUCLEIC 2 /* compatibility with squid's kRNA */ -#define hmmAMINO 3 /* compatibility with squid's kAmino */ - -/********************************************************************** - * - * Plan7 - * Implementation of the new Plan7 HMM architecture. - * Fully probabilistic even for hmmsw, hmmls, and hmmfs; - * No insert->delete or delete->insert transitions; - * Improved structure layout. - * - * The strategy is to infiltrate plan7 code into HMMER in - * an evolutionary rather than revolutionary manner. - * - **********************************************************************/ - -/* Plan 7 construction strategies. - */ -enum p7_construction { - P7_MAP_CONSTRUCTION, /* maximum a posteriori architecture */ - P7_HAND_CONSTRUCTION, /* hand specified architecture */ - P7_FAST_CONSTRUCTION /* fast ad hoc architecture */ -}; - -/* Plan 7 parameter optimization strategies - */ -enum p7_param { - P7_MAP_PARAM, /* standard maximum a posteriori */ - P7_MD_PARAM, /* maximum discrimination */ - P7_MRE_PARAM, /* maximum relative entropy */ - P7_WMAP_PARAM /* ad hoc weighted MAP */ -}; - -/* Structure: plan7_s - * - * Declaration of a Plan 7 profile-HMM. - */ -struct plan7_s { - /* Annotation on the model. A name is mandatory. - * Other fields are optional; whether they are present is - * flagged in the stateflags bit array. - * - * desc is only valid if PLAN7_DESC is set in flags. - * acc is only valid if PLAN7_ACC is set in flags. - * rf is only valid if PLAN7_RF is set in flags. - * cs is only valid if PLAN7_CS is set in flags. - * ca is only valid if PLAN7_CA is set in flags. - * map is only valid if PLAN7_MAP is set in flags. - */ - char *name; /* name of the model +*/ - char *acc; /* accession number of model (Pfam) +*/ - char *desc; /* brief description of model +*/ - char *rf; /* reference line from alignment 0..M +*/ - char *cs; /* consensus structure line 0..M +*/ - char *ca; /* consensus accessibility line 0..M */ - char *comlog; /* command line(s) that built model +*/ - int nseq; /* number of training sequences +*/ - char *ctime; /* creation date +*/ - int *map; /* map of alignment cols onto model 1..M+*/ - int checksum; /* checksum of training sequences +*/ - - /* The following are annotations added to support work by Michael Asman, - * CGR Stockholm. They are not stored in model files; they are only - * used in model construction. - * - * #=GC X-PRM (PRT,PRI) annotation is picked up by hmmbuild and interpreted - * as specifying which mixture Dirichlet component to use. If these flags - * are non-NULL, the normal mixture Dirichlet code is bypassed, and a - * single specific Dirichlet is used at each position. - */ - int *tpri; /* which transition mixture prior to use */ - int *mpri; /* which match mixture prior to use */ - int *ipri; /* which insert mixture prior to use */ - - /* Pfam-specific score cutoffs. - * - * ga1, ga2 are valid if PLAN7_GA is set in flags. - * tc1, tc2 are valid if PLAN7_TC is set in flags. - * nc1, nc2 are valid if PLAN7_NC is set in flags. - */ - float ga1, ga2; /* per-seq/per-domain gathering thresholds (bits) +*/ - float tc1, tc2; /* per-seq/per-domain trusted cutoff (bits) +*/ - float nc1, nc2; /* per-seq/per-domain noise cutoff (bits) +*/ - - /* The main model in probability form: data-dependent probabilities. - * This is the core Krogh/Haussler model. - * Transition probabilities are usually accessed as a - * two-D array: hmm->t[k][TMM], for instance. They are allocated - * such that they can also be stepped through in 1D by pointer - * manipulations, for efficiency in DP algorithms. - */ - int M; /* length of the model (# nodes) +*/ - float **t; /* transition prob's. t[1..M-1][0..6] +*/ - float **mat; /* match emissions. mat[1..M][0..19] +*/ - float **ins; /* insert emissions. ins[1..M-1][0..19] +*/ - float tbd1; /* B->D1 prob (data dependent) +*/ - - /* The unique states of Plan 7 in probability form. - * These are the algorithm-dependent, data-independent probabilities. - * Some parts of the code may briefly use a trick of copying tbd1 - * into begin[0]; this makes it easy to call FChoose() or FNorm() - * on the resulting vector. However, in general begin[0] is not - * a valid number. - */ - float xt[4][2]; /* N,E,C,J extra states: 2 transitions +*/ - float *begin; /* 1..M B->M state transitions +*/ - float *end; /* 1..M M->E state transitions (!= a dist!) +*/ - - /* The null model probabilities. - */ - float null[MAXABET]; /* "random sequence" emission prob's +*/ - float p1; /* null model loop probability +*/ - - /* The model in log-odds score form. - * These are created from the probabilities by LogoddsifyHMM(). - * By definition, null[] emission scores are all zero. - * Note that emission distributions are over 26 upper-case letters, - * not just the unambiguous protein or DNA alphabet: we - * precalculate the scores for all IUPAC degenerate symbols we - * may see. Non-IUPAC symbols simply have a -INFTY score. - * Note the reversed indexing on msc and isc -- for efficiency reasons. - * - * Only valid if PLAN7_HASBITS is set. - */ - int **tsc; /* transition scores [1.M-1][0.6] -*/ - int **msc; /* match emission scores [0.MAXCODE-1][1.M] -*/ - int **isc; /* ins emission scores [0.MAXCODE-1][1.M-1] -*/ - int xsc[4][2]; /* N,E,C,J transitions -*/ - int *bsc; /* begin transitions [1.M] -*/ - int *esc; /* end transitions [1.M] -*/ - - /* DNA translation scoring parameters - * For aligning protein Plan7 models to DNA sequence. - * Lookup value for a codon is calculated by pos1 * 16 + pos2 * 4 + pos3, - * where 'pos1' is the digitized value of the first nucleotide position; - * if any of the positions are ambiguous codes, lookup value 64 is used - * (which will generally have a score of zero) - * - * Only valid if PLAN7_HASDNA is set. - */ - int **dnam; /* triplet match scores [0.64][1.M] -*/ - int **dnai; /* triplet insert scores [0.64][1.M] -*/ - int dna2; /* -1 frameshift, doublet emission, M or I -*/ - int dna4; /* +1 frameshift, doublet emission, M or I -*/ - - /* P-value and E-value statistical parameters - * Only valid if PLAN7_STATS is set. - */ - float mu; /* EVD mu +*/ - float lambda; /* EVD lambda +*/ - - int flags; /* bit flags indicating state of HMM, valid data +*/ -}; - -/* Flags for plan7->flags. - * Note: Some models have scores but no probabilities (for instance, - * after reading from an HMM save file). Other models have - * probabilities but no scores (for instance, during training - * or building). Since it costs time to convert either way, - * I use PLAN7_HASBITS and PLAN7_HASPROB flags to defer conversion - * until absolutely necessary. This means I have to be careful - * about keeping these flags set properly when I fiddle a model. - */ -#define PLAN7_HASBITS (1<<0) /* raised if model has log-odds scores */ -#define PLAN7_DESC (1<<1) /* raised if description exists */ -#define PLAN7_RF (1<<2) /* raised if #RF annotation available */ -#define PLAN7_CS (1<<3) /* raised if #CS annotation available */ -#define PLAN7_XRAY (1<<4) /* raised if structural data available */ -#define PLAN7_HASPROB (1<<5) /* raised if model has probabilities */ -#define PLAN7_HASDNA (1<<6) /* raised if protein HMM->DNA seq params set*/ -#define PLAN7_STATS (1<<7) /* raised if EVD parameters are available */ -#define PLAN7_MAP (1<<8) /* raised if alignment map is available */ -#define PLAN7_ACC (1<<9) /* raised if accession number is available */ -#define PLAN7_GA (1<<10) /* raised if gathering thresholds available */ -#define PLAN7_TC (1<<11) /* raised if trusted cutoffs available */ -#define PLAN7_NC (1<<12) /* raised if noise cutoffs available */ -#define PLAN7_CA (1<<13) /* raised if surface accessibility avail. */ - -/* Indices for special state types, I: used for dynamic programming xmx[][] - * mnemonic: eXtra Matrix for B state = XMB - */ -#define XMB 0 -#define XME 1 -#define XMC 2 -#define XMJ 3 -#define XMN 4 - -/* Indices for special state types, II: used for hmm->xt[] indexing - * mnemonic: eXtra Transition for N state = XTN - */ -#define XTN 0 -#define XTE 1 -#define XTC 2 -#define XTJ 3 - -/* Indices for Plan7 main model state transitions. - * Used for indexing hmm->t[k][] - * mnemonic: Transition from Match to Match = TMM - */ -#define TMM 0 -#define TMI 1 -#define TMD 2 -#define TIM 3 -#define TII 4 -#define TDM 5 -#define TDD 6 - -/* Indices for extra state transitions - * Used for indexing hmm->xt[][]. - */ -#define MOVE 0 /* trNB, trEC, trCT, trJB */ -#define LOOP 1 /* trNN, trEJ, trCC, trJJ */ - -/* Declaration of Plan7 dynamic programming matrix structure. - */ -struct dpmatrix_s { - int **xmx; /* special scores [0.1..N][BECJN] */ - int **mmx; /* match scores [0.1..N][0.1..M] */ - int **imx; /* insert scores [0.1..N][0.1..M-1.M] */ - int **dmx; /* delete scores [0.1..N][0.1..M-1.M] */ -}; - -/* Declaration of Plan7 shadow matrix structure. - * In general, allowed values are STM, STI, etc. - * However, E state has M possible sources, from 1..M match states; - * hence the esrc array. - */ -struct dpshadow_s { - char **xtb; /* special state traces [0.1..N][BECJN] */ - char **mtb; /* match state traces [0.1..N][0.1..M] */ - char **itb; /* insert state traces [0.1..N][0.1..M-1.M] */ - char **dtb; /* delete state traces [0.1..N][0.1..M-1.M] */ - int *esrc; /* E trace is special; must store a M state number 1..M */ -}; - -/* Structure: HMMFILE - * - * Purpose: An open HMM file or HMM library. See hmmio.c. - */ -struct hmmfile_s { - FILE *f; /* pointer to file opened for reading */ - SSIFILE *ssi; /* pointer to open SSI index, or NULL */ - int (*parser)(struct hmmfile_s *, struct plan7_s **); /* parsing function */ - int is_binary; /* TRUE if format is a binary one */ - int byteswap; /* TRUE if binary and byteswapped */ - - /* Ewan (GeneWise) needs the input API to know the offset of each - * HMM on the disk, as it's being read. This might be enough - * support for him. hmmindex also uses this. Ewan, see - * HMMFilePositionByIndex() for an example of how to use this - * opaque offset type in the SSI API - the call you need - * is SSISetFilePosition(). - */ - int is_seekable; /* TRUE if we use offsets in this HMM file */ - int mode; /* type of offset */ - SSIOFFSET offset; /* Disk offset for beginning of the current HMM */ -}; -typedef struct hmmfile_s HMMFILE; - - -/* Plan 7 model state types - * used in traceback structure - */ -#define STBOGUS 0 -#define STM 1 -#define STD 2 -#define STI 3 -#define STS 4 -#define STN 5 -#define STB 6 -#define STE 7 -#define STC 8 -#define STT 9 -#define STJ 10 - -/* Structure: p7trace_s - * - * Traceback structure for alignments of model to sequence. - * Each array in a trace_s is 0..tlen-1. - * Element 0 is always to STATE_S. Element tlen-1 is always to STATE_T. - */ -struct p7trace_s { - int tlen; /* length of traceback */ - char *statetype; /* state type used for alignment */ - int *nodeidx; /* index of aligned node, 1..M (if M,D,I), or 0 */ - int *pos; /* position in dsq, 1..L, or 0 if none */ -}; - -/* Structure: p7prior_s - * - * Dirichlet priors on HMM parameters. - */ -struct p7prior_s { - int strategy; /* PRI_DCHLET, etc. */ - - int tnum; /* number of transition Dirichlet mixtures */ - float tq[MAXDCHLET]; /* probabilities of tnum components */ - float t[MAXDCHLET][7]; /* transition terms per mix component */ - - int mnum; /* number of mat emission Dirichlet mixtures */ - float mq[MAXDCHLET]; /* probabilities of mnum components */ - float m[MAXDCHLET][MAXABET]; /* match emission terms per mix component */ - - int inum; /* number of insert emission Dirichlet mixes */ - float iq[MAXDCHLET]; /* probabilities of inum components */ - float i[MAXDCHLET][MAXABET]; /* insert emission terms */ -}; -#define PRI_DCHLET 0 /* simple or mixture Dirichlets */ -#define PRI_PAM 1 /* PAM prior hack */ - - -/********************************************************************** - * Other structures, not having to do with HMMs. - **********************************************************************/ - -/* Structure: histogram_s - * - * Keep a score histogram. - * - * The main implementation issue here is that the range of - * scores is unknown, and will go negative. histogram is - * a 0..max-min array that represents the range min..max. - * A given score is indexed in histogram array as score-min. - * The AddToHistogram() function deals with dynamically - * resizing the histogram array when necessary. - */ -struct histogram_s { - int *histogram; /* counts of hits */ - int min; /* elem 0 of histogram == min */ - int max; /* last elem of histogram == max */ - int highscore; /* highest active elem has this score */ - int lowscore; /* lowest active elem has this score */ - int lumpsize; /* when resizing, overalloc by this */ - int total; /* total # of hits counted */ - - float *expect; /* expected counts of hits */ - int fit_type; /* flag indicating distribution type */ - float param[3]; /* parameters used for fits */ - float chisq; /* chi-squared val for goodness of fit*/ - float chip; /* P value for chisquared */ -}; -#define HISTFIT_NONE 0 /* no fit done yet */ -#define HISTFIT_EVD 1 /* fit type = extreme value dist */ -#define HISTFIT_GAUSSIAN 2 /* fit type = Gaussian */ -#define EVD_MU 0 /* EVD fit parameter mu */ -#define EVD_LAMBDA 1 /* EVD fit parameter lambda */ -#define EVD_WONKA 2 /* EVD fit fudge factor */ -#define GAUSS_MEAN 0 /* Gaussian parameter mean */ -#define GAUSS_SD 1 /* Gaussian parameter std. dev. */ - -/* Structure: fancyali_s - * - * Alignment of a hit to an HMM, for printing. - */ -struct fancyali_s { - char *rfline; /* reference coord info */ - char *csline; /* consensus structure info */ - char *model; /* aligned query consensus sequence */ - char *mline; /* "identities", conservation +'s, etc. */ - char *aseq; /* aligned target sequence */ - int len; /* length of strings */ - char *query; /* name of query HMM */ - char *target; /* name of target sequence */ - int sqfrom; /* start position on sequence (1..L) */ - int sqto; /* end position on sequence (1..L) */ -}; - -/* Structure: hit_s - * - * Info about a high-scoring database hit. - * We keep this info in memory, so we can output a - * sorted list of high hits at the end. - * - * sqfrom and sqto are the coordinates that will be shown - * in the results, not coords in arrays... therefore, reverse - * complements have sqfrom > sqto - */ -struct hit_s { - double sortkey; /* number to sort by; big is better */ - float score; /* score of the hit */ - double pvalue; /* P-value of the hit */ - float mothersc; /* score of whole sequence */ - double motherp; /* P-value of whole sequence */ - char *name; /* name of the target */ - char *acc; /* accession of the target */ - char *desc; /* description of the target */ - int sqfrom; /* start position in seq (1..N) */ - int sqto; /* end position in seq (1..N) */ - int sqlen; /* length of sequence (N) */ - int hmmfrom; /* start position in HMM (1..M) */ - int hmmto; /* end position in HMM (1..M) */ - int hmmlen; /* length of HMM (M) */ - int domidx; /* index of this domain */ - int ndom; /* total # of domains in this seq */ - struct fancyali_s *ali; /* ptr to optional alignment info */ -}; - - -/* Structure: tophit_s - * - * Array of high scoring hits, suitable for efficient sorting - * when we prepare to output results. "hit" list is NULL and - * unavailable until after we do a sort. - */ -struct tophit_s { - struct hit_s **hit; /* array of ptrs to top scoring hits */ - struct hit_s *unsrt; /* unsorted array */ - int alloc; /* current allocation size */ - int num; /* number of hits in list now */ - int lump; /* allocation lumpsize */ -}; - -/* struct threshold_s - * Contains score/evalue threshold settings. - * - * made first for hmmpfam: - * Since we're going to loop over all HMMs in a Pfam (or pfam-like) - * database in main_loop_{serial,pvm}, and we're going to - * allow autocutoffs using Pfam GA, NC, TC lines, we will need - * to reset those cutoffs with each HMM in turn. Therefore the - * main loops need to know whether they're supposed to be - * doing autocutoff. This amount of info was unwieldy enough - * to pass through the argument list that I put it - * in a structure. - */ -enum threshold_cut { CUT_NONE, CUT_GA, CUT_NC, CUT_TC }; -struct threshold_s { - float globT; /* T parameter: keep only hits > globT bits */ - double globE; /* E parameter: keep hits < globE E-value */ - float domT; /* T parameter for individual domains */ - double domE; /* E parameter for individual domains */ - /* autosetting of cutoffs using Pfam annot: */ - enum threshold_cut autocut; - int Z; /* nseq to base E value calculation on */ -}; - -/********************************************************** - * PVM parallelization - **********************************************************/ -#ifdef HMMER_PVM - -/* Message tags - */ -#define HMMPVM_INIT 0 /* an initialization packet to all slaves */ -#define HMMPVM_WORK 1 /* a work packet sent to a slave */ -#define HMMPVM_RESULTS 2 /* a results packet sent back to master */ -#define HMMPVM_TASK_TROUBLE 3 /* a notification of bad things in a slave task */ -#define HMMPVM_HOST_TROUBLE 4 /* a notification of bad things in a PVM host */ - -/* error codes - */ -#define HMMPVM_OK 0 -#define HMMPVM_NO_HMMFILE 1 -#define HMMPVM_NO_INDEX 2 -#define HMMPVM_BAD_INIT 3 /* failed to initialize a slave somehow */ - -#endif - - -/********************************************************** - * Plan 9: obsolete HMMER1.x code. We still need these structures - * for reading old HMM files (e.g. backwards compatibility) - **********************************************************/ - -/* We define a "basic" state, which covers the basic match, insert, and - * delete states from the Haussler paper. Numbers are stored as - * pre-calculated negative logs. - */ -struct basic_state { - float t[3]; /* state transitions to +1 M, +0 I, +1 D */ - float p[MAXABET]; /* symbol emission probabilities */ -}; - -/* A complete hidden Markov model - */ -struct plan9_s { - int M; /* length of the model */ - struct basic_state *ins; /* insert states 0..M+1 */ - struct basic_state *mat; /* match 0..M+1; 0 = BEGIN, M+1 = END */ - struct basic_state *del; /* delete 0..M+1 */ - - float null[MAXABET]; /* the *suggested* null model */ - - /* Optional annotation on the HMM, taken from alignment - */ - char *name; /* a name for the HMM */ - char *ref; /* reference coords and annotation */ - char *cs; /* consensus structure annotation */ - float *xray; /* Structural annotation: xray[0..M+1][NINPUTS], indexed manually */ - - int flags; /* flags for what optional info is in HMM */ -}; - -/* Flags for optional info in an HMM structure - */ -#define HMM_REF (1<<0) -#define HMM_CS (1<<1) -#define HMM_XRAY (1<<2) - -#define MATCH 0 -#define INSERT 1 -#define DELETE 2 -#define BEGIN MATCH -#define END MATCH - -#endif /* STRUCTSH_INCLUDED */ -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/psortb.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
