//
//  pair_study_post.c
//  TVStudy
//
//  Copyright (c) 2012-2013 Hammett & Edison, Inc.  All rights reserved.


// For TVStudy version 1.2.3


// Post-process run output files from a TVStudy pair study run.  See StudyRunPair.java.

// This is mostly just pushing data around between files, but there are two major filtering and processing operations.
// The first is defining the point keys and building the unique study point list (eliminating point definitions that
// are duplicated between scenarios).  The algorithm for assigning a point key simply scales and combines the cell
// latitude and longitude index values (those are coordinates in integer arc-seconds) and the country key:

//   ((((latIndex / cellSize) * (maxLonIndex + 1)) + (lonIndex / cellSize)) * 3) + (countryKey - 1)

// The cellSize is the size of study cells in the latitude dimension, which is constant for a run.  The longitude
// dimension will vary, but it will always be equal or greater to the latitude, meaning the scaled index range is
// always equal or less.

// The latIndex and lonIndex ranges are limited to north latitude (0-75 degrees) and west longitude (0-180 degrees)
// and the country key must be 1, 2, or 3.  If values outside those ranges occur processing fails.  The cellSize must
// be >= 16 to ensure the point keys are within signed 32-bit integer range.

// A byte array is used as a flag map to track points.  The map is indexed by the cell coordinate part of the point
// key.  Within each map byte, bit flags are used for countries.

// The second operation is to eliminate duplicate rows from the service and interference point data that result from
// cases where a station is already on one of the proxy channels, or a channel interfering with one of the proxy
// channels, in the baseline scenario.  As the baseline run output is processed an index of (facilityID, channel) keys
// is built.  As the pairwise run outputs are processed later, every row is checked against that index and duplicates
// are skipped.

// Note this must be in sync with the engine code regarding the format in the temporary output files, names of those
// files are provided by argument.  The final output files use a comma-separated field format and all have fixed names.
// Since this is presumed to be immediate post-processing of data just generated by the engine and is not a general-
// purpose data parser, any error will immediately exit.


#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <sys/stat.h>
#include <stdarg.h>
#include <time.h>
#include <sys/time.h>


//---------------------------------------------------------------------------------------------------------------------
// Names for output files, and length of longest.

#define BASELINE_FILE      "baseline.csv"
#define REPLICATION_FILE   "replication.csv"
#define POINTS_FILE        "points.csv"
#define SERVICE_FILE       "service.csv"
#define INTERFERENCE_FILE  "interference.csv"

#define MAX_OUT_NAME  16

// Length of input line buffer.

#define MAX_LINE  256

// Max number of comma-separated fields in an input line.

#define MAX_FIELDS  10


//---------------------------------------------------------------------------------------------------------------------

static int fgetnl(char *buf, int siz, FILE *stream);
static int parse_line(char *line, char **fields, int maxFields);
static void write_log(FILE *out, char *fmt, ...);


//---------------------------------------------------------------------------------------------------------------------
// Check command line, first argument is the path to the output directory, second argument is the maximum facility ID
// in the data (used to size the index array).  Third and later are names of the temporary output files from the study
// runs, baseline first, pair scenarios after that.  There must be at least one pair scenario output.

int main(int argc, char **argv) {

	char *commandName = rindex(argv[0], '/');
	if (commandName) {
		commandName++;
	} else {
		commandName = argv[0];
	}

	if (argc < 5) {
		fprintf(stderr, "usage: %s outdir maxid basefile pairfile [ pairfile ... ]\n", commandName);
		exit(1);
	}

	char *filePath = argv[1];

	// Check max facility ID, allocate channel lookup map.  The map is just an array indexed by the facility ID with
	// each value indicating the baseline channel for that station.

	int maxFacilityID = atoi(argv[2]);
	if (maxFacilityID < 1) {
		write_log(stderr, "Error - Bad maximum facility ID value");
		exit(1);
	}
	int l = (maxFacilityID + 1) * sizeof(int);
	int *channelMap = (int *)malloc(l);
	if (!channelMap) {
		write_log(stderr, "Error - Memory allocation failed");
		exit(1);
	}
	memset(channelMap, 0, l);

	// Find longest file name, allocate buffer for constructing full file path.

	int i, maxFilePathName = MAX_OUT_NAME;
	for (i = 2; i < argc; i++) {
		l = strlen(argv[i]);
		if (l > maxFilePathName) {
			maxFilePathName = l;
		}
	}
	l = strlen(filePath);
	if ('/' == filePath[l - 1]) {
		filePath[l - 1] = '\0';
		l--;
	}
	maxFilePathName += l + 5;
	char *filePathName = (char *)malloc(maxFilePathName);

	// Open all the output files.

	snprintf(filePathName, maxFilePathName, "%s/%s", filePath, BASELINE_FILE);
	FILE *baselineOut = fopen(filePathName, "w");

	snprintf(filePathName, maxFilePathName, "%s/%s", filePath, REPLICATION_FILE);
	FILE *replicationOut = fopen(filePathName, "w");

	snprintf(filePathName, maxFilePathName, "%s/%s", filePath, POINTS_FILE);
	FILE *pointsOut = fopen(filePathName, "w");

	snprintf(filePathName, maxFilePathName, "%s/%s", filePath, SERVICE_FILE);
	FILE *serviceOut = fopen(filePathName, "w");

	snprintf(filePathName, maxFilePathName, "%s/%s", filePath, INTERFERENCE_FILE);
	FILE *interferenceOut = fopen(filePathName, "w");

	if (!baselineOut || !replicationOut || !pointsOut || !serviceOut || !interferenceOut) {
		write_log(stderr, "Error - Cannot create output files");
		exit(1);
	}

	// Now start processing the temporary data files.  The first file is handled differently, that file is from the
	// baseline run.  All service and interference point rows from that file are written to the output, and coverage
	// data is written to the baseline file including interference-free values.  Also the facility ID -> channel index
	// is built.  The second and later files are from the pair scenario run, point rows are checked against the
	// facility ID -> channel index to skip those already written from the baseline run (whenever the facility ID and
	// channel match the index, on both desired and undesired), and coverage data is written to the replication file
	// but without the interference-free values (those are meaningless for the pair scenarios since interference
	// conditions in those scenarios include every possible interfering station on every possible channel).

	FILE *in;
	long lineNumber = 0, pointsCount = 0, serviceCount = 0, interferenceCount = 0;
	int inBaseline = 1, inCase = 0, inPoint = 0, inCoverage = 0, cellSize = 0, pointKey = 0, latIndex = 0,
		lonIndex = 0, countryKey = 0, mapIndex = 0, indexCellSize = 0, maxLatIndex = 75 * 3600,
		maxLonIndex = 180 * 3600, latKeyScale = 0, facilityID = 0, channel = 0, serviceFlag = 0, ixFacilityID = 0,
		ixChannel = 0, iarg;
	char *fileName, line[MAX_LINE], firstChar, *fields[MAX_FIELDS], *errorMessage = NULL;
	unsigned char *pointMap = NULL, countryBit;

	struct stat st;
	double fileSize, updateStep, filePos, nextUpdate;
	int pcntDone;

	for (iarg = 3; iarg < argc; iarg++) {

		fileName = argv[iarg];
		lineNumber = 0;

		snprintf(filePathName, maxFilePathName, "%s/%s", filePath, fileName);
		in = fopen(filePathName, "r");
		if (!in) {
			errorMessage = "Cannot open file";
			break;
		}

		if (inBaseline) {
			write_log(stdout, "Working on baseline study output...");
		} else {
			write_log(stdout, "Working on pair study output %d of %d...", (iarg - 3), (argc - 4));
		}

		fstat(fileno(in), &st);
		fileSize = (double)st.st_size;
		updateStep = fileSize / 20.;
		nextUpdate = 0.;

		inCase = 0;
		inPoint = 0;
		inCoverage = 0;

		while (1) {

			if (fgetnl(line, MAX_LINE, in) < 0) {
				if (inCase) {
					errorMessage = "Open block at end of file";
				}
				break;
			}
			lineNumber++;

			filePos = (double)ftell(in);
			if (filePos > nextUpdate) {
				pcntDone = (int)rint((filePos / fileSize) * 100.);
				write_log(stdout, "Completed %d%%", pcntDone);
				nextUpdate += updateStep;
			}

			firstChar = line[0];

			// Check block stucture in file, [case]/[endcase] enclose each scenario run, within the case block
			// [point]/[endpoint] enclose study points and [coverage]/[endcoverage] enclose total coverage information.
			// Unknown blocking keywords are generally ignored but are considered errors inside a point or coverage
			// block.  The first line of a case block provides the cell size for index calculations, that line must be
			// processed before any other block keyword occurs.

			if ('[' == firstChar) {

				if (inCase && (0 == cellSize)) {
					errorMessage = "Missing scenario parameters";
					break;
				}

				if (0 == strcmp(line, "[point]")) {
					if (inCase) {
						if (inPoint || inCoverage) {
							errorMessage = "Overlapping data blocks";
							break;
						}
						inPoint = 1;
						pointKey = 0;
						facilityID = 0;
					} else {
						errorMessage = "Data block outside scenario block";
						break;
					}
				} else {

					if (0 == strcmp(line, "[endpoint]")) {
						if (!inPoint) {
							errorMessage = "Data block closed when no block open";
							break;
						}
						inPoint = 0;
					} else {

						if (0 == strcmp(line, "[coverage]")) {
							if (inCase) {
								if (inPoint || inCoverage) {
									errorMessage = "Overlapping data blocks";
									break;
								}
								inCoverage = 1;
							} else {
								errorMessage = "Data block outside scenario block";
								break;
							}
						} else {

							if (0 == strcmp(line, "[endcoverage]")) {
								if (!inCoverage) {
									errorMessage = "Data block closed when no block open";
									break;
								}
								inCoverage = 0;
							} else {

								if (0 == strcmp(line, "[case]")) {
									if (inCase) {
										errorMessage = "Overlapping scenario blocks";
										break;
									}
									inCase = 1;
									cellSize = 0;
								} else {

									if (0 == strcmp(line, "[endcase]")) {
										if (!inCase) {
											errorMessage = "Scenario block closed when no block open";
											break;
										}
										if (inPoint || inCoverage) {
											errorMessage = "Scenario block closed when data block still open";
											break;
										}
										inCase = 0;
									} else {

										if (inPoint || inCoverage) {
											errorMessage = "Unknown block keyword inside data block";
											break;
										}
									}
								}
							}
						}
					}
				}

				continue;
			}

			// In a point block all lines are valid data.  The initial line has point information.  Extract index
			// values, compute the point key.  Check the point map, if this is a new point write it to the points file.
			// That gets the point key, a comma, then the point information line verbatim.

			if (inPoint) {

				if (0 == pointKey) {

					l = parse_line(line, fields, MAX_FIELDS);
					if (l < 3) {
						errorMessage = "Bad format in point parameters";
						break;
					}

					latIndex = atoi(fields[0]);
					lonIndex = atoi(fields[1]);
					countryKey = atoi(fields[2]);

					if ((latIndex < 0) || (latIndex > maxLatIndex) || (lonIndex < 0) || (lonIndex > maxLonIndex) ||
							(countryKey < 1) || (countryKey > 3)) {
						errorMessage = "Illegal value in point parameters";
						break;
					}

					mapIndex = ((latIndex / indexCellSize) * latKeyScale) + (lonIndex / indexCellSize);
					pointKey = (mapIndex * 3) + (countryKey - 1);

					if (3 == countryKey) {
						countryBit = 4;
					} else {
						countryBit = countryKey;
					}

					if (0 == (pointMap[mapIndex] & countryBit)) {
						fprintf(pointsOut, "%d,%s\n", pointKey, line);
						pointsCount++;
						pointMap[mapIndex] |= countryBit;
					}

					continue;

				// For all later lines, the first field is a single character that identifies the data being reported,
				// D for a desired, U for an undesired.  Parse out the fields on either line, both start with facility
				// ID and channel, the D record has one more field, the service flag.  Desireds are written to the
				// service file, undesireds to the interference file, but see comments above regarding baseline vs.
				// pair runs and the channel index.  U records must follow a D, any that don't are errors.  Lines that
				// don't start D or U are ignored.

				} else {

					if ('U' == firstChar) {

						if (0 == facilityID) {
							errorMessage = "U record with no preceding D record";
							break;
						}

						l = parse_line(line, fields, MAX_FIELDS);
						if (l < 3) {
							errorMessage = "Bad format in U record";
							break;
						}

						ixFacilityID = atoi(fields[1]);
						ixChannel = atoi(fields[2]);

						if ((ixFacilityID < 1) || (ixFacilityID > maxFacilityID) || (ixChannel <= 0)) {
							errorMessage = "Illegal values in U record";
							break;
						}

						if (inBaseline || (channel != channelMap[facilityID]) ||
								(ixChannel != channelMap[ixFacilityID])) {
							fprintf(interferenceOut, "%d,%d,%d,%d,%d\n", pointKey, facilityID, channel, ixFacilityID,
								ixChannel);
							interferenceCount++;
						}

						continue;
					}

					if ('D' == firstChar) {

						l = parse_line(line, fields, MAX_FIELDS);
						if (l < 4) {
							errorMessage = "Bad format in D record";
							break;
						}

						facilityID = atoi(fields[1]);
						channel = atoi(fields[2]);
						serviceFlag = atoi(fields[3]);

						if ((facilityID < 1) || (facilityID > maxFacilityID) || (channel <= 0)) {
							errorMessage = "Illegal values in D record";
							break;
						}

						if (inBaseline || (channel != channelMap[facilityID])) {
							fprintf(serviceOut, "%d,%d,%d,%d\n", pointKey, facilityID, channel, serviceFlag);
							serviceCount++;
						}

						continue;
					}
				}
			}

			// In the baseline file the coverage block lines are copied verbatim, however facility ID and channel must
			// be parsed to build the facility ID -> channel map.  For later files, only copy the initial fields for
			// noise-limited and terrain-limited coverage; the interference-free coverage numbers are meaningless in
			// the pair scenario runs.

			if (inCoverage) {

				if (inBaseline) {

					l = parse_line(line, fields, MAX_FIELDS);
					if (l < 2) {
						errorMessage = "Bad format in coverage data";
						break;
					}

					facilityID = atoi(fields[0]);
					channel = atoi(fields[1]);

					if ((facilityID < 1) || (facilityID > maxFacilityID) || (channel <= 0)) {
						errorMessage = "Illegal values in coverage data";
						break;
					}

					channelMap[facilityID] = channel;

					fprintf(baselineOut, "%s\n", line);

				} else {

					l = parse_line(line, fields, MAX_FIELDS);
					if (l < 7) {
						errorMessage = "Bad format in coverage data";
						break;
					}

					fprintf(replicationOut, "%s,%s,%s,%s,%s,%s,%s\n", fields[0], fields[1], fields[2], fields[3],
						fields[4], fields[5], fields[6]);
				}

				continue;
			}

			// If this is the first line of a new case block, extract the cell size.  The first time, set up the flag
			// map array.  Every scenario after the first must have the same cell size.  If the line fails to parse or
			// the cell size changes, or if the initial cell size is illegal, abort.

			if (inCase && (0 == cellSize)) {

				l = parse_line(line, fields, MAX_FIELDS);
				if (l < 3) {
					errorMessage = "Bad format in scenario parameters";
					break;
				}

				cellSize = atoi(fields[2]);

				if (0 == indexCellSize) {

					if (cellSize < 16) {
						errorMessage = "Illegal value in scenario parameters";
						break;
					}

					indexCellSize = cellSize;
					latKeyScale = (maxLonIndex / indexCellSize) + 1;

					l = ((maxLatIndex / indexCellSize) + 1) * latKeyScale;
					pointMap = (unsigned char *)malloc(l);
					if (!pointMap) {
						errorMessage = "Memory allocation failed";
						break;
					}
					memset(pointMap, 0, l);

				} else {

					if (cellSize != indexCellSize) {
						errorMessage = "Cell size changed between scenarios";
						break;
					}
				}

				continue;
			}

		// Next line.

		}

		pcntDone = (int)rint((filePos / fileSize) * 100.);
		write_log(stdout, "Completed %d%%", pcntDone);

		// End of file, or error.

		fclose(in);
		in = NULL;

		if (errorMessage) {
			break;
		}

		// Next file.

		inBaseline = 0;
	}

	// Close files, report error.

	fclose(baselineOut);
	fclose(replicationOut);
	fclose(pointsOut);
	fclose(serviceOut);
	fclose(interferenceOut);

	if (errorMessage) {
		write_log(stderr, "Error - %s", errorMessage);
		if (fileName) {
			if (lineNumber) {
				write_log(stderr, "  working on file '%s' at line %ld", fileName, lineNumber);
			} else {
				write_log(stderr, "  working on file '%s'", fileName);
			}
		}
		exit(1);
	}

	// Done.

	write_log(stdout, "Processing complete");
	write_log(stdout, "%ld study points", pointsCount);
	write_log(stdout, "%ld service records", serviceCount);
	write_log(stdout, "%ld interference records", interferenceCount);

	exit(0);
}


//---------------------------------------------------------------------------------------------------------------------
// Replacement for fgets().  Read always continues to the next line-end character or EOF, if the return buffer is too
// short the line is truncated.  Either newline or carriage return is recognized as line end.  The line-end character
// is discarded.  An implied line end occurs at EOF if needed, so EOF is returned only if no characters are read.

// Arguments:

//   buf     Return buffer, always terminated.
//   siz     Size of buffer including terminator.
//   stream  Input stream.

// Return the number of characters read not including the line end, if >siz truncation occurred; return -1 if at EOF.

static int fgetnl(char *buf, int siz, FILE *stream) {

	int len = siz - 1, i = 0, n = 0, c;
	char chr;

	while (((chr = (char)(c = getc(stream))) != '\n') && (chr != '\r') && (c != EOF)) {
		if (i < len) {
			buf[i++] = chr;
		}
		n++;
	}

	if (n || (c != EOF)) {
		buf[i] = '\0';
		return n;
	}

	return -1;
}


//---------------------------------------------------------------------------------------------------------------------
// Separate a line into fields broken on ','.  Return points to a local buffer holding a copy of the source line (the
// source is not modified), meaning uses cannot overlap.

// Arguments:

//   line       Source line.
//   fields     Return pointers to individual fields.
//   maxFields  Size of fields array, must be >0!  If the line has more fields than this, the last field will contain
//                the entire remaining line including additional separators.

// Return the number of fields parsed.

static int parse_line(char *line, char **fields, int maxFields) {

	static char *buffer = NULL;
	static int maxBuffer = 0;

	int l = strlen(line);
	if (l > maxBuffer) {
		maxBuffer = l + 100;
		buffer = (char *)malloc(maxBuffer);
	}
	strcpy(buffer, line);

	int i;
	char *sep;
	fields[0] = buffer;
	for (i = 1; i < maxFields; i++) {
		sep = index(fields[i - 1], ',');
		if (sep) {
			*sep = '\0';
			fields[i] = sep + 1;
		} else {
			break;
		}
	}
	int n = i;
	for (; i < maxFields; i++) {
		fields[i] = NULL;
	}

	return n;
}


//---------------------------------------------------------------------------------------------------------------------
// Write log message with relative timestamp, set start time on first call.

// Arguments:

//   out  Output stream.
//   fmt  printf-style format string, followed by varargs.

static void write_log(FILE *out, char *fmt, ...) {

	static time_t startTime = 0;
	static int startTimeMillis = 0;

	struct timeval t;
	struct timezone tz;
	gettimeofday(&t, &tz);

	if (!startTime) {

		startTime = t.tv_sec;
		startTimeMillis = (int)(t.tv_usec / 1000);

		struct tm *p = localtime(&startTime);
		write_log(stdout, "Run started %04d.%02d.%02d %02d:%02d:%02d.%03d", (p->tm_year + 1900), (p->tm_mon + 1),
			p->tm_mday, p->tm_hour, p->tm_min, p->tm_sec, startTimeMillis);
	}

	int seconds = (int)(t.tv_sec - startTime);
	int millis = (int)(t.tv_usec / 1000) - startTimeMillis;
	if (millis < 0) {
		seconds--;
		millis += 1000;
	}
	int hours = seconds / 3600;
	int minutes = (seconds % 3600) / 60;
	seconds = seconds % 60;

	char str[MAX_LINE];
	va_list ap;

	va_start(ap, fmt);
	vsnprintf(str, MAX_LINE, fmt, ap);
	va_end(ap);

	fprintf(out, "%3d:%02d:%02d.%03d - %s\n", hours, minutes, seconds, millis, str);
	fflush(out);
}
