#!/usr/bin/env python3
# The above line is a "shebang". It tells the system to use the Python 3
# interpreter to run this script if it's executed directly.

# --- Shell Guard ---
# The following block of code will cause the script to exit with an error
# if it is accidentally run with a shell like bash or sh, instead of Python.
# In Python, this is just a multi-line string, which is safely ignored.
"""
echo "ERROR: This is a Python script, not a shell script." >&2
echo "Please run it with the 'python' command: python $0" >&2
exit 1
"""
# --- End Shell Guard ---

import argparse
import pandas as pd
from pathlib import Path

# --- ANSI escape codes for terminal colors ---
class Colors:
    """A class to hold ANSI escape codes for styling terminal output."""
    BOLD_RED = '\033[1;31m'  # Bold and Red
    ENDC = '\033[0m'         # Resets all formatting

def extract_population(name_str):
    """
    Extracts the population integer from the 'Name' column string.
    Example: '153855-255288-1034' -> 1034
    """
    try:
        # Split the string by the hyphen and return the last part as an integer
        return int(name_str.split('-')[-1])
    except (ValueError, IndexError):
        # Handle cases where the format is unexpected or a part is not a number
        return 0

def compare_studies(folder1, folder2):
    """
    Compares two tvstudy.csv files from two different folders and calculates
    population changes based on the 'Serv' column.
    """
    # --- 1. Construct paths and validate file existence ---
    path1 = Path(folder1) / 'tvstudy.csv'
    path2 = Path(folder2) / 'tvstudy.csv'

    if not path1.is_file():
        print(f"Error: File not found at '{path1}'")
        return
    if not path2.is_file():
        print(f"Error: File not found at '{path2}'")
        return

    print(f"Comparing '{path1}' and '{path2}'...")

    try:
        # --- 2. Load data, skipping the first 7 rows ---
        df1 = pd.read_csv(path1, skiprows=7)
        df2 = pd.read_csv(path2, skiprows=7)

        # --- 3. Extract population from the 'Name' column for both files ---
        df1['population'] = df1['Name'].apply(extract_population)
        df2['population'] = df2['Name'].apply(extract_population)

        # --- 4. Merge the two dataframes based on the 'Name' column ---
        # We only need the 'Serv' and 'population' columns for the comparison
        merged_df = pd.merge(
            df1[['Name', 'Serv', 'population']],
            df2[['Name', 'Serv']],
            on='Name',
            suffixes=('_1', '_2')
        )

        # --- 5. Initialize population counters ---
        pop_no_service = 0      # Serv 0 -> 0
        pop_both_service = 0    # Serv 1 -> 1
        pop_loss = 0            # Serv 1 -> 0
        pop_gain = 0            # Serv 0 -> 1

        # --- 6. Iterate through matched rows and sum populations ---
        for _, row in merged_df.iterrows():
            serv1 = row['Serv_1']
            serv2 = row['Serv_2']
            population = row['population']

            if serv1 == 0 and serv2 == 0:
                pop_no_service += population
            elif serv1 == 1 and serv2 == 1:
                pop_both_service += population
            elif serv1 == 1 and serv2 == 0:
                pop_loss += population
            elif serv1 == 0 and serv2 == 1:
                pop_gain += population

        # --- 7. Print the final results ---
        print("\n--- Population Comparison Results ---")
        print(f"No service in either case:  {pop_no_service:,}")
        print(f"L-R service in either case: {pop_both_service:,}")
        print(f"Gain with L-R:              {pop_gain:,}")
        # Apply bold red color to the loss line and reset it afterwards
        print(f"{Colors.BOLD_RED}Loss with L-R:              {pop_loss:,}{Colors.ENDC}")
        print("-------------------------------------\n")


    except Exception as e:
        print(f"An error occurred during processing: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Compare two tvstudy.csv files from different folders."
    )
    parser.add_argument(
        "folder1",
        help="The first folder containing a tvstudy.csv file."
    )
    parser.add_argument(
        "folder2",
        help="The second folder containing a tvstudy.csv file."
    )
    args = parser.parse_args()

    compare_studies(args.folder1, args.folder2)
