/* rmescseq.c -- Removes ANSI X3.64 escape sequences from input stream. To build: [g]cc -o rmescseq rmescseq.c To use: rmescseq < infile > outfile Authors: F. da Cruz, J. Altman, Columbia University. (Adapted by J. Altman from C-Kermit ckucns.c.) Copyright (C) 2001, Trustees of Columbia University in the City of New York, All rights reserved. Terms of use and redistribution as for C-Kermit 7.0: ftp://kermit.columbia.edu/kermit/f/COPYING.TXT */ /* Escape-sequence parser state definitions. */ #define ES_NORMAL 0 /* Normal, not in escape sequence */ #define ES_GOTESC 1 /* Current character is ESC */ #define ES_ESCSEQ 2 /* Inside an escape sequence */ #define ES_GOTCSI 3 /* Inside a control sequence */ #define ES_STRING 4 /* Inside DCS,OSC,PM, or APC string */ #define ES_TERMIN 5 /* 1st char of string terminator */ /* Important buffer lengths */ #define ESCBUFLEN 128 #define STRBUFLEN 256 /* Important Control Characters */ #define NUL 0 #define BS 8 #define CAN 24 #define SUB 26 #define ESC 27 #define SP 32 #define CSI 155 #define ST 156 /* The usual boolean values */ #define FALSE 0 #define TRUE 1 /* Some state */ static int escstate = ES_NORMAL; static int escnext = 1; static int esclast = 0; static int strrecv = 0; static unsigned char escbuffer[ESCBUFLEN]; static unsigned char strbuf[STRBUFLEN]; static int strlength = 0; /* This function parses an incoming data stream and extracts the ANSI x3.64 */ /* commands and places them into escbuffer[]. Characters which are part of */ /* the data stream but not part of a x3.64 command sequence are returned. */ /* When a character is not returned the function returns either -1 to */ /* indicate that a complete sequence has yet to be received or -2 to signal */ /* that a complete sequence is in the escbuffer[]. */ int ansi_x3_64(ch) unsigned char ch; { if (escstate == ES_NORMAL) { /* Not in an escape sequence */ if (ch == ESC) { /* This character is an Escape */ escstate = ES_GOTESC; /* Change state to GOTESC */ esclast = 0; /* Reset buffer pointer */ escbuffer[0] = ESC; } else if ( ch == CSI) { escstate = ES_GOTCSI; /* Escape sequence was restarted */ escbuffer[0] = CSI; /* Save in case we have to replay it */ esclast = 1; /* Reset buffer pointer, but */ escbuffer[1] = '['; /* But translate to 7-bit */ } else { /* Not an ESC, stay in NORMAL state */ return(ch); } return(-1); /* Return in either case. */ } /* We are in an escape sequence... */ if (ch < SP || (ch == CSI)) { /* Control character? */ if (ch == CAN || ch == SUB) { /* These cancel an escape sequence */ escstate = ES_NORMAL; /* Go back to normal. */ strlength = 0; strbuf[0] = 0 ; strrecv = FALSE ; } else if (ch == BS) { /* Erases previous */ if ( escstate == ES_GOTCSI && esclast == 1 ) { escstate = ES_GOTESC ; esclast = 0 ; } else if ( escstate == ES_ESCSEQ && esclast == 1 ) { escstate = ES_GOTESC ; esclast = 0 ; } else if ( escstate == ES_GOTESC && esclast == 0 ) { escstate = ES_NORMAL ; } else if ( escstate == ES_TERMIN ) { escstate = ES_STRING ; } else if ( escstate == ES_STRING ) { if ( strlength > 0 ) strlength-- ; else { escstate = ES_GOTESC ; } } else if ( esclast > 0 ) { esclast--; /* Escape sequence char (really?) */ } } else if (ch == ESC) { if ( escstate == ES_STRING ) escstate = ES_TERMIN ; else { escstate = ES_GOTESC; /* Escape sequence was restarted */ esclast = 0; /* Reset buffer pointer */ escbuffer[0] = ESC; /* Save in case we have to replay it */ } } else if (ch == CSI) { escstate = ES_GOTCSI; /* Escape sequence was restarted */ escbuffer[0] = CSI; /* Save in case we have to replay it */ esclast = 1; /* Reset buffer pointer, but */ escbuffer[1] = '['; /* But translate for vtescape() */ } else if (ch != NUL) { return(ch); } return(-1); } /* Put this character in the escape sequence buffer. But we don't put "strings" in this buffer; Note that indexing starts at 1, not 0. */ if (escstate != ES_STRING && escstate != ES_TERMIN) if (esclast < ESCBUFLEN) escbuffer[++esclast] = ch; switch (escstate) { /* Enter esc sequence state switcher */ case ES_GOTESC: /* GOTESC state, prev char was Esc */ switch ( ch ) { case '[': /* Left bracket after ESC is CSI */ escstate = ES_GOTCSI; /* Change to GOTCSI state */ break; case '_': /* Application Program Command (APC) */ case 'P': /* Device Control String (DCS) Intro */ case 'Q': /* Private Use One (PU1) Introducer */ case 'R': /* Private Use Two (PU2) Introducer */ case 'X': /* Start of String (SOS) Introducer */ case '^': /* Privacy Message (PM) */ case ']': /* Operating System Command (OSC) */ escstate = ES_STRING; /* Enter STRING-absorption state */ strrecv = TRUE; /* We are receiving a string */ strlength = 0; /* and reset string buffer index */ break; default: if ((ch > 057) && (ch < 0177)){/* Or final char, '0' thru '~' */ escstate = ES_NORMAL; /* Go back to normal. */ return(-2); /* Go act on it. */ } else escstate = ES_ESCSEQ; } break; case ES_ESCSEQ: /* ESCSEQ -- in an escape sequence */ if (ch > 057 && ch < 0177) { /* Final character is '0' thru '~' */ escstate = ES_NORMAL; /* Go back to normal. */ return(-2); /* Go handle it */ } case ES_GOTCSI: /* GOTCSI -- In a control sequence */ if (ch > 077 && ch < 0177) { /* Final character is '@' thru '~' */ escstate = ES_NORMAL; /* Go back to normal. */ return(-2); /* Go act on it. */ } case ES_STRING: /* Inside a string */ if (ch == ESC) { /* ESC may be 1st char of terminator */ escstate = ES_TERMIN; /* Change state to find out. */ } else if (ch == ST) { /* C1 String Terminator */ escstate = ES_NORMAL; /* If so, back to NORMAL */ strrecv = FALSE ; } else if (strrecv) { if (strlength < STRBUFLEN) { /* If in string, */ strbuf[strlength++] = ch; /* deposit this character */ } else { /* Buffer overrun */ strrecv = FALSE ; /* Discard what we got */ strlength = 0; /* and go back to normal */ escstate = ES_NORMAL ; } } break; /* Absorb all other characters. */ case ES_TERMIN: /* May have a string terminator */ if (ch == '\\') { /* which must be backslash */ escstate = ES_NORMAL; /* If so, back to NORMAL */ strrecv = FALSE ; } else { if (ch >= SP) /* Just a stray Esc character. */ escstate = ES_STRING; /* Return to string absorption. */ if (strrecv) { if (strlength+1 < STRBUFLEN) { /* In string, */ strbuf[strlength++] = ESC; /* deposit Esc character */ strbuf[strlength++] = ch; /* and this character too */ } } } } return(-1); } int main(argc, argv) int argc; char ** argv; { unsigned char ch; int n; while (1) { n = read(0, &ch, 1); if (n <= 0) return; if ((ch > 127) && (ch < 160) && /* It's a C1 character */ ch != CSI) { /* But not CSI */ n = ansi_x3_64(ESC); /* Convert to C0 form */ if (n >= 0) { ch = (n & 0xFF); write(1, &ch, 1); } ch = (ch & 0x7F) | 0x40; } n = ansi_x3_64(ch); if (n >= 0) { ch = (n & 0xFF); write(1, &ch, 1); } } }