package testparser;
import java.io.*;
import java.util.*;
/**
* This class converts a COBOL 85 fixed-format source into a free format source.
* It also processes the REPLACE and COPY statements, and converts compiler
* directives to comments.
* @author Bernard Pinon
*
Copyright (c) Bernard Pinon 2002
*
This file can be freely redistributed under the terms of the
* LGPL licence
* published by the Free Software Foundation.
*/
public class Preprocessor {
/**
* Indicates that the input format is fixed - this is the
* standard ANSI format, inspired by punched cards. Each line
* must have exactly 80 characters.
*
* - Col 1 to 6 (included) : comments - used to number cards
*
- Col 7: indicator field
*
- Col 8-12 : Area A
*
- Col 13-72 : Area B
*
- Col 73-80 : comments
*
*/
public static final int FORMAT_FIXED = 0;
/**
* Indicates that the format is variable. This is used by the Fujitsu-Siemens
* compiler for instance. In this format, lines can have a variable length.
*
* - Col 1 to 6 (included) : comments
*
- Col 7: indicator field
*
- Col 8-12 : Area A
*
- Col 13-80 : Area B
*
*/
public static final int FORMAT_VARIABLE = 1;
/**
* Indicates that the format is the one used on HP-Compaq Non-stop systems,
* also known as "Tandem" systems. This is similar to the variable format, but
* without the first six comment characters.
*
* - Col 1: indicator field
*
- Col 2-6 : Area A
*
- Col 7-80 : Area B
*
*/
public static final int FORMAT_TANDEM = 2;
/**
* The format currently in use.
*/
private int source_format = FORMAT_FIXED;
/**
* The path of the source file. We assume that all copy-books will reside in
* the same directory.
*/
private File source_path;
/**
* A reader for the fixed format COBOL 85 source.
*/
private BufferedReader input;
/**
* A writer for the free format COBOL 85 source.
*/
private PrintWriter output;
/**
* The current source line.
*/
private String line;
/**
* The current line number, for error messages.
*/
private int line_number;
/**
* The indicator field of the current line.
*/
private char indicator_field;
/**
* The REPLACE substitution map.
*/
private HashMap replace = new HashMap();
/**
* The delimiters used to parse the source line
* for REPLACE/COPY.
*/
private static final String DELIMITERS = " \n\r\t,;.'\"=";
/**
* If true, we include debugging lines in the resulting source.
*/
private boolean include_debug_lines;
/**
* Public empty constructor.
*/
public Preprocessor() {
}
/**
* Skips all delimiters in the parsed line.
* @param st a StringTokenizer.
* @return the next significant token or an empty string.
*/
private final String skipDelimiters( StringTokenizer st ) {
if( !st.hasMoreTokens() ) return "";
String tok = st.nextToken();
while( DELIMITERS.indexOf( tok ) >= 0 && st.hasMoreTokens() ) {
tok = st.nextToken();
}
return tok;
}
/**
* Processes the COPY statement.
* @param st a StringTokenizer.
*/
private final void processCopy( StringTokenizer st ) {
String tok = skipDelimiters( st );
if( !st.hasMoreTokens() ) {
System.err.println("Problem parsing COPY, invalid syntax line " + line_number );
return;
}
String copy_name = tok.toUpperCase();
File copy_file = new File( source_path, copy_name );
// check if the file exists. If not, gives a second chance trying with
// a .COB extension.
if( !copy_file.exists() ) {
copy_name += ".COB";
copy_file = new File( source_path, copy_name );
if( !copy_file.exists() ) {
System.err.println("File not found : " + copy_file.getAbsolutePath() );
outputAsComment( line + " (file not found)" );
return;
}
}
// saves context
BufferedReader current_input = input;
int current_line_number = line_number;
outputAsComment( "start of copy-book " + copy_name + " (" + copy_file.getAbsolutePath() + ")" );
try {
input = new BufferedReader( new FileReader( copy_file ) );
preprocessMainline();
}
catch( Exception ex ) {
System.err.println("IO Exception while processing COPY line " + line_number + " " + line );
ex.printStackTrace();
}
input = current_input;
line_number = current_line_number;
outputAsComment( "end of copy-book " + copy_name );
}
/**
* Processes the REPLACE statement.
* @param st a StringTokenizer.
*/
private final void processReplace( StringTokenizer st ) {
String tok = skipDelimiters( st );
if( !st.hasMoreTokens() ) {
System.err.println("Problem parsing REPLACE, invalid syntax, expecting line " + line_number );
return;
}
String key = tok.toUpperCase();
tok = skipDelimiters( st ).toUpperCase();
if( !st.hasMoreTokens() || !tok.equals( "BY" ) ) {
System.err.println("Problem parsing REPLACE, invalid syntax, expecting BY line " + line_number );
return;
}
tok = skipDelimiters( st ).toUpperCase();
if( !st.hasMoreTokens() ) {
System.err.println("Problem parsing REPLACE, invalid syntax, expecting substitution text line " + line_number );
return;
}
StringBuffer subst = new StringBuffer();
int state = 0;
while( st.hasMoreTokens() && state != 2 ) {
if( state == 0 && tok.equals("=") ) {
state = 1;
continue;
}
else if( state == 1 ) {
if( tok.equals("=") ) state = 2;
else state = 0;
continue;
}
subst.append( tok );
tok = st.nextToken();
}
String value = subst.toString();
replace.put( key, value );
//System.out.println("New replace \"" + key + "\" -> \"" + value );
}
/**
* Removes the trailing spaces on a line. Used to save some space.
* @param line a String.
* @return the string without trailing spaces.
*/
private static final String removeTrailingSpaces( String line ) {
int i = line.length() - 1;
while( i >= 0 && line.charAt(i) == ' ' ) {
--i;
}
return line.substring(0,i+1);
}
/**
* Processes a COBOL line, traping COPY/REPLACE statements, and
* making the substitutions required by REPLACE.
* @param line a String
*/
private final void processCopyReplace( String line ) {
StringTokenizer st = new StringTokenizer( line, DELIMITERS, true );
StringBuffer result = new StringBuffer( line.length() + 10 );
boolean first_token = true;
boolean in_string = false;
result.append( ' ' );
while( st.hasMoreTokens() ) {
String tok = st.nextToken();
// is it a delimiter ?
if( DELIMITERS.indexOf( tok ) >= 0 ) {
if( tok.equals("\"") ) in_string = !in_string;
result.append( tok );
continue;
}
// REPLACE/COPY should be the first significant token
if( first_token ) {
if( tok.equalsIgnoreCase( "REPLACE" ) ) {
outputAsComment( line );
processReplace( st );
return;
}
else if( tok.equalsIgnoreCase( "COPY" ) ) {
outputAsComment( line );
processCopy( st );
return;
}
first_token = false;
}
// other token should be checked for substitution, except for
// those enclosed in ""
if( !in_string ) {
String utok = tok.toUpperCase();
if( replace.containsKey( utok ) ) {
tok = (String)replace.get( utok );
}
}
result.append( tok );
}
// fix sick strings that are not terminated
if( in_string ) result.append('\"');
output.println( removeTrailingSpaces( result.toString() ) );
}
/**
* Normalize an input line to the TANDEM format.
* @param line the input line.
* @return a COBOL source line in Tandem format.
*/
private final String normalizeLine( String line ) {
switch( source_format ) {
case FORMAT_FIXED :
return line.substring( 6, 72 );
case FORMAT_VARIABLE :
return line.substring( 6 );
default :
return line;
}
}
/**
* Outputs a line (which is not necessary a comment) as a comment.
* @param line the line.
*/
private final void outputAsComment( String line ) {
output.print( "*> " );
output.println( line );
}
/**
* Outputs a comment line.
* @param line the line.
*/
private final void outputComment( String line ) {
output.print( "*>" );
output.println( line );
}
/**
* The preprocessor mainline, called recursively when processing COPY.
* @throws IOException
*/
private final void preprocessMainline() throws IOException {
line = input.readLine();
while( line != null ) {
++line_number;
line = normalizeLine( line );
indicator_field = line.charAt(0);
line = line.substring(1);
switch( indicator_field ) {
case 'D' :
case 'd' :
if( include_debug_lines ) processCopyReplace( line );
else outputAsComment( line );
case ' ' :
case '-' :
processCopyReplace( line );
break;
case '*' :
outputComment( line );
break;
default :
outputAsComment( line );
break;
}
line = input.readLine();
}
}
/**
* The preprocessor main entry point.
* @param in_path Path to input file.
* @param out_path Path to output file.
*/
public void preprocess( String in_path, String out_path ) {
try {
File in_file = new File( in_path );
source_path = in_file.getParentFile();
input = new BufferedReader( new FileReader( in_file ) );
if( out_path != null ) {
output = new PrintWriter( new FileWriter( out_path ) );
}
else {
output = new PrintWriter( System.out );
out_path = "stdout";
}
System.out.println("Processing COBOL file \"" + in_path + "\" to \"" + out_path + "\"." );
preprocessMainline();
System.out.println("Done, " + line_number + " lines processed." );
input.close();
output.close();
}
catch( Exception ex ) {
if( line_number > 0 ) System.err.println("Exception at line " + line_number + " : " + line );
ex.printStackTrace();
return;
}
}
/**
* @return the last line number.
*/
public int getLineNumber() {
return line_number;
}
/**
* @return the source format.
*/
public int getSourceFormat() {
return source_format;
}
/**
* Sets the source format.
* @param source_format the new format.
*/
public void setSourceFormat(int source_format) {
this.source_format = source_format;
}
/**
* @return true if we include debug lines
*/
public boolean includesDebugLines() {
return include_debug_lines;
}
/**
* @param include_debug_lines if true, includes lines marked with "D".
*/
public void setIncludeDebugLines(boolean include_debug_lines) {
this.include_debug_lines = include_debug_lines;
}
/**
* Display program usage and exit. Why do I have this impression of
* repeating myself?
*/
private static final void displayProgramUsageAndExit() {
System.err.println("Usage : java Preprocessor [] []");
System.err.println(" : preprocessor options :");
System.err.println("Input format option : -F (fixed) -V (variable) -T (tandem)");
System.err.println("Debug option : -D (include debug lines)");
System.err.println(" : COBOL 85 fixed format source path.");
System.err.println(" : output path. If omitted, stdout is used.");
System.exit(0);
}
/**
* A simple mainline.
* Usage : java Preprocessor [<options>]<in_path> [<out_path>]
*
where :
* <options>
: preprocessor options :
* Input format option : -F (fixed) -V (variable) -T (tandem)
* Debug option : -D (include debug lines)
* <in_path>
: COBOL 85 source path.
* <out_path>
: output path. If omitted, stdout is used.
*
* @param args Command-line arguments.
*/
public static void main(String[] args) {
System.out.println("COBOL 85 Source preprocessor - (c) Bernard Pinon 2002");
if( args.length == 0 ) displayProgramUsageAndExit();
Preprocessor preprocessor = new Preprocessor();
int i;
// process command line options
for( i = 0; i < args.length; i++ ) {
if( !args[i].startsWith("-") ) break;
switch( args[i].charAt(1) ) {
case 'F' : preprocessor.setSourceFormat( FORMAT_FIXED ); break;
case 'V' : preprocessor.setSourceFormat( FORMAT_VARIABLE ); break;
case 'T' : preprocessor.setSourceFormat( FORMAT_TANDEM ); break;
case 'D' : preprocessor.setIncludeDebugLines( true ); break;
default : displayProgramUsageAndExit();
}
}
// process command line arguments
String source = args[i++];
String dest = ( args.length > i ? args[i] : null );
// preprocess
preprocessor.preprocess( source, dest );
// bye
System.exit(0);
}
}