/*...And The Small Print Taketh Away...

COPYRIGHT NOTICE
----------------

    Copyright (c) 1996 by Harry Mangalam at Univ. of California, Irvine

    Permission to use, copy, modify, and distribute this software and its 
    documentation is hereby granted, subject to the following restrictions
    and understandings:

      1) Any copy of this software or any copy of software derived
         from it must include this copyright notice in full.

      2) All materials or software developed as a consequence of the
         use of this software or software derived from it must duly
         acknowledge such use, in accordance with the usual standards
         of acknowledging credit in academic research.

      3) The software may be used by anyone for any purpose, except
         that its redistribution for profit or inclusion in other
         software sold for profit requires the express, written
         permission of the author.  Note that the written permission
         of the author is not required for any use of the software
         that may result in profit, and the author waives any rights
         to any part of profit gained by any use other than
         its redistribution for profit.
 
         In plain English, the only thing that requires the express,
         written permission is if you want to sell software that
         includes or uses this software.  Any use other than selling
         software that uses this software is fine, and any profit made
         from that use is yours to keep.

      4) This software is provided AS IS with no warranties of any
         kind.  The author shall have no liability with respect to the
         infringement of copyrights, trade secrets or any patents by
         this software or any part thereof.  In no event will the
         author be liable for any lost revenue or profits or other
         special, indirect and consequential damages. 
*/


/* Defines */     
#define VERSION "1.5F"  /* PLEASE DON'T CHANGE THIS #define - I NEED THIS INFO TO BE ABLE TO GATHER STATS ON USAGE */
                         /* version of the program - one of the vars to be returned via udp */
#define REPORT 1     /* directs the inclusion (1) or exclusion (0) of the UDP reporting code */
#define MAXSITE 50 /* max length of site - may not need if do use pointer math intelligently */
#define MAX_NUM_RES 600  /* Max number of REs that we'll accept for text output routines will later*/
                         /* be taking some of these from the command line */
#define BASE_OVERLAP 30 /* # of bases past the end of a block in order to 'trap' REs whose cut offset put them 
                        into block - was 20, see if it works at 30 - yes!  Also used in topology routines as the 
                        amount that needs to be added to beginning and end of sequence to allow 'trapping'  
                        as above, if DNA is circular */                  
#define MAX_BASES_PER_LINE 210   /* the max allowable output width; need it to be able to decl and init man arrays*/
                                 /* placeholder until can grab basesPerLine from flags to set wanted output width */
#define O_SEQ_LINE 40         /* # of line in MAX_OUTPUT_LINES where the sequence gets writ; should be about MAX_OUTPUT_LINES - 10 */
#define MAX_OUTPUT_LINES 50   /* max # of lines per block - also influences how many blank lines go after 
                              the data at each block; was 25, now incr to 50 to allow 6 frames of translation and all the REs you could imagine */
#define O_LMARG 10            /* left margin in text output */
#define O_RMARG 10            /* right margin in text output */
#define MAX_ORGS 8            /* number of different Codon Usage tables */
#define N_CODONS 64           /* number of codons - this would ever change???? */
#define NFLAGS 23             /* number of flags (size of flag_value[] array */
#define MAX_LADGEL_WIDTH 250  /* # chars to print out the ladder/gel map - obv only use if use a v. compr font */
#define PREFERED_NUM_DIVS 10  /* # of major divisions in the ladder map */
#define TIC_REPEAT 4          /* after this many lines should the tics be repeated in the ladder map */
#define NUMBERLINE_REPEAT 20  /* after this many lines should the number header be repeated in the ladder map */
#define SUMMARY_CUTS 2        /* this many # cuts is the cutoff for the summary map  */

/* The following from Don Gilbert's Readseq */
#define is_upper(c) ('A'<=(c) && (c)<='Z')
#define is_lower(c) ('a'<=(c) && (c)<='z')
#define to_lower(c) ((char)(is_upper(c) ? (c)+' ' : (c)))
#define to_upper(c) ((char)(is_lower(c) ? (c)-' ' : (c)))

/* external declaration to allow RE to be passed by pointer */
extern char *optarg;
extern int optind, opterr, optopt;

struct RE_struct {
   char E_nam[10],  /* RE name */
        *E_raw_sit, /* RE name with the ' and _ and all n's included for a label, if needed */
        E_wsit[15], /* RE whole recognition site, minus _ ' trailing n's */
        E_hex[6];   /* the hexamer extracted from E_wsit, used to generate the hash key */
    int E_tcut,     /* the cut site on the top strand */
        E_olap,     /* the overlap if any in the cut- dif between the top cut and bottom cut */
        E_len,      /* the length of the recognition sequence */
        E_pal,      /* whether or not the recognition sequence is a palindrome */
        E_dgen,     /* degeneracy in the recognition sequence */
        E_Ncuts,    /* the # of cuts that the RE causes in the sequence */
        E_mag,      /* the magnitude of the degeneracy - acgt=1; n=0; yrmkws=1/2; b,d,h,v = 1/4, etc */
        E_nam_l;    /* the length of the name (E_nam, above) for calcing the name positions in output */
}; 

/********************************  Function Prototype Declarations for tacg  *********************************/

/* fill_out_sum duplicates the degeneracy the correct # of times in the array 'sum[]' for submission to the hash function */
void fill_out_sum(int degen, int N_degen, int dgn_sits[256]); 

/* palindrome func returns 1 if the sequence is a pal, 0 if it's not  */
int palindrome(char *site, int length);

/* hash() is a central function to this program - feel free to improve it.  It takes a 
(possibly degenerate) n-mer (most often a hexamer) sequence and calculates the hash value 
from (for a hexamer) 0 (=aaaaaa) to 4095 (=tttttt) */
int hash(char *nmer, int dgn_sits[256], int num_bases);

/* Degen_Cmp compares a degenerate DNA seq with a non degenerate sequence;  returns 1 if they're compatible, 0 if not */
int Degen_Cmp(char *degenerate_seq, char *pure_seq, int length);

/* Rev_Compl returns the Reverse Complement of the original sequence */
/* if original = ggatcatttc, reverse complement = cctagtaaag */
void Rev_Compl(char *original_seq, char *reverse_complement, int length);

/* Anti_Par returns the anti parallel sequence of the original sequence */
/* if original = ggatcatttc, anti parallel = gaaatgatcc */
void Anti_Par(char *original_seq, char *anti_parallel, int length);

/*  Function reverse; straight from K+R (p62) - reverses a string s in place  */
/* if original = ggatcatttc, reverse = ctttactagg */
void Reverse(char *s);

/* Function Triplet_Reverse reverses a string triplet by triplet ie:
   ArgTrpPheAsnCys ==> CysAsnPheTrpArg  so as to make the 6 frame translations readable in the oppo
   orientation */
void Triplet_Reverse(char *str);

/* Translate translates (nondegenerate, for now) DNA sequence into protein sequence, using 
    one of 8 codon preferences */
void Translate(char *DNA_in, char *Prot_out, int len, int n_letters, char Codons[8][64][4], int organism);  

/* Read_Codon_Prefs reads in the file that has the codon preferences in human-readable form
   then hashes the codons and inserts them into the right place in the Codon arary.  The file also has a 
   description line for each table that is read into a char array for labelling purposes. */
void Read_Codon_Prefs(char codons[8][64][4], char codon_labels[8][20]); 

int iamhere(char *progname);   /* from udping.c */

/* SetFlags() takes argc/argv and a bunch of variables that hold all the option values and 
   implements the routines to read them in and does most of the error checking.  It's related functionally to 
   Interactive(), which it calls, and to CommandLine(), which makes use of these flags to compose a 
   commandline argument which can be used then next time instead of going thru Interactive(). */
char *SetFlags(int argc, char *argv[], long flag_value[2][NFLAGS] /* , char *LadFile, char *GelFile */);

/* Usage() spits out some useful info about how to use the program if it's invoked incorrectly or without flags*/
void Usage(void);

/* compare is a dippy little function that qsort needs to perform its sordid little sort */
int compare(const void *n1, const void *n2 );

/* GetSequence reads and formats the sequence from **stdin** and returns the address of the read-in sequence, as well
   as length of the sequence, along with the bracketing repeats - needs only a few extra variables to do it
   and it cleans up main considerably */  
char *GetSequence(long *tot_seq_Cnt, long flag_value[2][NFLAGS], long *seq_len);    /* need to change function to fit last variable */

/* PrintSitesFrags prints out the sites and or fragments either sorted or unsorted, depending on whether the 
   array has been sorted - it doesn't care, it just prints whatever's in the array nicely */
void PrintSitesFrags(struct RE_struct REStr[],int NumGREs, int reps, int *GREs, long *FragOrSites[], 
                long flag_value[2][NFLAGS], int sites);

/* compare is a dippy little function that qsort needs to perform its sordid little sort */
int compare(const void *n1, const void *n2 );

/* PrintGelLadderMap does what it implies - prints a Gel and/or Ladder map sort of like the GCG program, 
   but in textmode only for now.  It uses a preset (#defined) output width of ~200 chars, vs the regular output 
   which is settable via a flag.  This being the case, it writes to a user-selectable file, rather 
   than to stdout.  The output is meant to be processed by an postscript wrapper program like genscript, 
   so that the output can be printed landscape in small font to be usable.*/
void PrintGelLadderMap(struct RE_struct REStr[], int NumGREs, long seq_len, int *GREs, 
               long *Dig_Sits[MAX_NUM_RES+10], long flag_value[2][NFLAGS], /* char *filename,*/ int gel);
                    	
/* SearchPaths () takes a filename and examines various environment variables to locate that filename.  If it can 
   find the file, it returns a pointer to the full path name; if not, it returns NULL  */
char *SearchPaths(char *InputFileName, char *FileType4Err);


