#!/usr/bin/env python3
# The above line is a "shebang". It tells the system to use the Python 3
# interpreter to run this script if it's executed directly.

# --- Shell Guard ---
# The following block of code will cause the script to exit with an error
# if it is accidentally run with a shell like bash or sh, instead of Python.
# In Python, this is just a multi-line string, which is safely ignored.
"""
echo "ERROR: This is a Python script, not a shell script." >&2
echo "Please run it with the 'python' command: python $0" >&2
exit 1
"""
# --- End Shell Guard ---

import argparse
import pandas as pd
from pathlib import Path

# --- ANSI escape codes for terminal colors ---
class Colors:
    """A class to hold ANSI escape codes for styling terminal output."""
    BOLD_RED = '\033[1;31m'
    BOLD_YELLOW = '\033[1;33m'
    ENDC = '\033[0m' # Resets all formatting

def extract_population(name_str):
    """
    Extracts the population integer from the 'Name' column string.
    Example: '153855-255288-1034' -> 1034
    """
    try:
        # Split the string by the hyphen and return the last part as an integer
        return int(name_str.split('-')[-1])
    except (ValueError, IndexError):
        # Handle cases where the format is unexpected or a part is not a number
        return 0

def analyze_service_counts(folder1, folder2):
    """
    Analyzes service counts for locations based on two study files.

    It uses the file in folder1 to get a master list of locations and their
    populations. It then counts services (where InSA=1) for each location
    from the file in folder2 and summarizes the total population for each
    service level.
    """
    # --- 1. Construct paths and validate file existence ---
    master_path = Path(folder1) / 'tvstudy.csv'
    service_path = Path(folder2) / 'tvstudy.csv'

    if not master_path.is_file():
        print(f"Error: Master file not found at '{master_path}'")
        return
    if not service_path.is_file():
        print(f"Error: Service data file not found at '{service_path}'")
        return

    print(f"Using master list from: '{master_path}'")
    print(f"Analyzing service data from: '{service_path}'\n")

    try:
        # --- 2. Load data, skipping the first 7 rows ---
        df_master = pd.read_csv(master_path, skiprows=7)
        df_services = pd.read_csv(service_path, skiprows=7)

        # --- 3. Get unique locations and extract population from master ---
        all_locations = pd.DataFrame(df_master['Name'].unique(), columns=['Name'])
        all_locations['population'] = all_locations['Name'].apply(extract_population)

        # --- 4. Calculate service counts from the second file ---
        # Filter for rows where a service is present (InSA == 1)
        # Then, group by the location 'Name' and count the number of services for each.
        service_counts = df_services[df_services['InSA'] == 1].groupby('Name').size()

        # --- 5. Combine master list with service counts ---
        # Merge the full list of locations with their calculated service counts.
        # Use a 'left' merge to keep all master locations.
        # Locations from the master list without services will get NaN.
        summary_df = all_locations.merge(
            service_counts.rename('service_count'), on='Name', how='left'
        )
        # Replace NaN with 0 for locations that have no services.
        summary_df['service_count'] = summary_df['service_count'].fillna(0).astype(int)

        # --- 6. Tally the results by summing population ---
        # Group by the service_count and SUM the population for each group.
        final_tally = summary_df.groupby('service_count')['population'].sum()

        # --- 7. Sort and print the final, colored results ---
        print("--- Service Count Summary ---")
        # Sort by the number of services (the index) in descending order.
        for service_count, total_population in final_tally.sort_index(ascending=False).items():

            # Choose color based on the number of services
            color = ""
            if service_count == 4 or service_count == 0:
                color = Colors.BOLD_RED
            elif 1 <= service_count <= 3:
                color = Colors.BOLD_YELLOW

            # Format and print the line
            end_color = Colors.ENDC if color else ""
            print(
                f"{color}Population with {service_count} services:  {total_population:,}{end_color}"
            )

        print("-----------------------------\n")

    except Exception as e:
        print(f"An error occurred during processing: {e}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Summarize service counts by comparing two tvstudy files."
    )
    parser.add_argument(
        "folder1",
        help="The folder containing the master tvstudy.csv file."
    )
    parser.add_argument(
        "folder2",
        help="The folder containing the service data tvstudy.csv file."
    )
    args = parser.parse_args()

    analyze_service_counts(args.folder1, args.folder2)
