/*******************************************************/ /* copyright (C) Simorgh Software. All rights reserved.*/ /* */ /* gsz2isi : gerdsooz to ISIRI-3342 encoding converter */ /* */ /* Compilation: */ /* */ /* On a Unix machine or PC with lex compiler utility */ /* execute the following commands: */ /* */ /* lex gsz2isi.l */ /* cc lex.yy.c -o gsz2isi -ll */ /* */ /* Requirements: */ /* */ /* input.gsz : a gerdsooz file with =begin= and */ /* =end= tags, marking the encoding area. */ /* Usage: */ /* */ /* gsz2isi < input.gsz > output.isi */ /* */ /* Options: */ /* */ /* -e Pass through any english text outside of */ /* the =begin= and =end= markers. */ /* */ /* */ /*******************************************************/ P [\!\@\#\$\%\^\&\*\(\)\_\\\+\`\=\-\|\]\}\[\{\'\"\;\:\?\/\>\.\<\,] D [0-9] L [a-zA-Z ] %{ #include #include #define PERSIAN 1 #define ENGLISH 2 #define TRUE 1 #define FALSE 0 #define ECHO outputString(yytext); int ch; int state = ENGLISH; int englishOutput= FALSE; int htmlOutput = FALSE; int beginQuote = TRUE; void processISIRI( unsigned char ch ); void stringOutput( unsigned char * string ); void charOutput ( unsigned char ch ); %} %% "=begin=" { state = PERSIAN; } "=end=" { state = ENGLISH; } {L}~ { if (state == ENGLISH && englishOutput == TRUE ) stringOutput( yytext ); else if (state == PERSIAN) { processISIRI(yytext[0]); charOutput(0xa1); } } {P} { if (state == ENGLISH && englishOutput == TRUE ) stringOutput( yytext ); else if ( state == PERSIAN ) processISIRI(yytext[0]); } {D} { if (state == ENGLISH && englishOutput == TRUE) stringOutput( yytext ); else if ( state == PERSIAN ) processISIRI(yytext[0]); } {L} { if (state == ENGLISH && englishOutput == TRUE) stringOutput( yytext ); else if ( state == PERSIAN ) processISIRI(yytext[0]); } [\t\v\n\f] { if (state == PERSIAN || englishOutput== TRUE) stringOutput( yytext ); } "\\\n" {} . {} %% void stringOutput ( unsigned char * string ) { register int i; while ( *string != NULL ) { fprintf(yyout,"%c", *string ); string++; } } void charOutput ( unsigned char ch ) { fprintf(yyout,"%c", ch ); } void processISIRI(unsigned char ch ) { switch (ch) { case 'a': charOutput(0xc1); break; case 'A': charOutput(0xc0); break; case 'b': charOutput(0xc3); break; case 'p': charOutput(0xc4); break; case 't': charOutput(0xc5); break; case 'c': charOutput(0xc6); break; case 'j': charOutput(0xc7); break; case 'G': charOutput(0xc8); break; case 'H': charOutput(0xc9); break; case 'K': charOutput(0xca); break; case 'd': charOutput(0xcb); break; case 'Z': charOutput(0xcc); break; case 'r': charOutput(0xcd); break; case 'z': charOutput(0xce); break; case 'J': charOutput(0xcf); break; case 's': charOutput(0xd0); break; case 'S': charOutput(0xd1); break; case 'C': charOutput(0xd2); break; case 'x': charOutput(0xd3); break; case 'T': charOutput(0xd4); break; case 'X': charOutput(0xd5); break; case 'e': charOutput(0xd6); break; case 'Q': charOutput(0xd7); break; case 'f': charOutput(0xd8); break; case 'q': charOutput(0xd9); break; case 'k': charOutput(0xda); break; case 'g': charOutput(0xdb); break; case 'l': charOutput(0xdc); break; case 'm': charOutput(0xdd); break; case 'n': charOutput(0xde); break; case 'v': charOutput(0xdf); break; case 'h': charOutput(0xe0); break; case 'y': charOutput(0xe1); break; case 'i': charOutput(0xc2); charOutput(0xa2); break; case '0': charOutput(0xb0); break; case '1': charOutput(0xb1); break; case '2': charOutput(0xb2); break; case '3': charOutput(0xb3); break; case '4': charOutput(0xb4); break; case '5': charOutput(0xb5); break; case '6': charOutput(0xb6); break; case '7': charOutput(0xb7); break; case '8': charOutput(0xb8); break; case '9': charOutput(0xb9); break; case ' ': charOutput(0xa0); break; case '!': charOutput(0xa3); break; case '"': if ( beginQuote == TRUE ) { beginQuote = FALSE; charOutput(0xe7); } else { beginQuote = TRUE; charOutput(0xe6); } break; case '%': charOutput(0xa5); break; case ':': charOutput(0xba); break; case '?': charOutput(0xbf); break; case ')': charOutput(0xa9); break; case '(': charOutput(0xa8); break; case '.': charOutput(0xa6); break; case ',': charOutput(0xac); break; default: charOutput(ch); } } void printUsage() { printf("\n"); printf("*************************************************************\n"); printf("Copyright(C) 1995 Simorgh Software. All rights reserved. \n"); printf("gsz2isi: gerdsooz to ISIRI-3342 converter \n"); printf("usage: gsz2isi [-e] < input.gsz > output.isi \n"); printf("options: \n"); printf(" -e allows English text to pass through filter \n"); printf(" \n"); printf("requirements: Gerdsooz input file have =begin= =end= markings\n"); printf(" around encoding \n"); printf("*************************************************************\n"); } main(argc,argv) int argc; char **argv; { int result; int errflag=0; extern char *optarg; extern int optind, opterr; while (( ch = getopt(argc, argv, "e")) != -1 ) { switch (ch) { case 'e' : englishOutput = TRUE ; break; default: errflag++; break; } } if ( errflag ) { printUsage(); exit(-1); } while ((result=yylex()) > 0 ); }