In [1]:
%%capture

import warnings

warnings.filterwarnings("ignore")
import _ct_district_data_prep
# import _ct_district_visuals
import _report_utils
import calitp_data_analysis.magics
import geopandas as gpd
import pandas as pd
from great_tables import GT
from IPython.display import HTML, Image, Markdown, display, display_html
from slugify import slugify
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS

In [2]:
import google.auth

credentials, project = google.auth.default()

import gcsfs

fs = gcsfs.GCSFileSystem()

In [3]:
# Comment out and leave this cell right below pandas
# district =  '02 - Redding'

In [4]:
# Parameters
district = "12 - Santa Ana"


In [5]:
DISTRICT_DIGEST_URL = (
    "https://gtfs-digest--cal-itp-data-analyses.netlify.app/district_"
    f"{slugify(district)}"
)

In [6]:
%%capture_parameters
district, DISTRICT_DIGEST_URL

In [7]:
# Extract district from district string when doing an sjoin
# between CT districts & routes
district_int = [int(s) for s in district.split() if s.isdigit()][0]

In [8]:
# Load Datasets
operator_df = _ct_district_data_prep.data_wrangling_operator_profile(district)

operator_gdf = _ct_district_data_prep.data_wrangling_operator_map(
    list(operator_df.portfolio_organization_name.unique())
)

ct_district_gdf = _ct_district_data_prep.load_ct_district(district_int)

transit_route_shs_gdf, transit_route_shs_table = (
    _ct_district_data_prep.final_transit_route_shs_outputs(20, str(district_int))
)
district_gdf = _ct_district_data_prep.load_ct_district(district_int)

In [9]:
shn_gdf = _ct_district_data_prep.load_buffered_shn_map(50, district_int)

In [10]:
gtfs_table_df = _ct_district_data_prep.create_gtfs_stats(operator_df)

# District 12 - Santa Ana

These are district summaries for [GTFS Digest](https://gtfs-digest--cal-itp-data-analyses.netlify.app/). 

Individual transit operators have their pages at: **[https://gtfs-digest--cal-itp-data-analyses.netlify.app/district_12-santa-ana](https://gtfs-digest--cal-itp-data-analyses.netlify.app/district_12-santa-ana)**

In [11]:
district_summary = _report_utils.district_stats(operator_df, "caltrans_district")

In [12]:
summary_table1 = (
    GT(
        district_summary.drop(columns=["arrivals_per_stop", "trips_per_operator"]).pipe(
            _report_utils.transpose_summary_stats, district_col="caltrans_district"
        )
    )
    .fmt_number(columns="value", sep_mark = ",")
    .cols_label(index="")
    .tab_header(title=f"District {district} GTFS summary stats")
)

summary_table2 = (
    GT(
        district_summary[
            ["caltrans_district", "arrivals_per_stop", "trips_per_operator"]
        ].pipe(_report_utils.transpose_summary_stats, district_col="caltrans_district")
    )
    .fmt_number("value", decimals=1, sep_mark = ",")
    .cols_label(index="")
)

In [13]:
display(summary_table1)
display(summary_table2)

District 12 - Santa Ana GTFS summary stats,District 12 - Santa Ana GTFS summary stats
Unnamed: 0_level_1,Value
# Operators,9
# routes,85
# trips,16233
# stops,5526
# arrivals,213058


Unnamed: 0,Value
Arrivals per Stop,38.56
Trips per Operator,1803.67


## Routes within the District

In [14]:
m = district_gdf.explore(
    name="District",
    tiles="CartoDB positron",
    style_kwds={"color": "#9DA4A6", "opacity": 0.5},
    height=500,
    width=1000,
    legend = False
)

In [15]:
m = operator_gdf.explore(
    m=m,
    column="Portfolio Organization Name",
    cmap="Spectral",
    categorical=True,
    legend=False,
    legend_kwds={"width": 200},
)

In [16]:
display(m)

## Transit Routes on the State Highway Network
**Only transit routes that have 20% or more if its length on one or more State Highway Network routes are included**

In [17]:
m2 = shn_gdf.loc[shn_gdf.District == district_int].explore(
    height=500,
    width=1000,
    style_kwds={"color": "#9DA4A6", "weight": 6, "opacity": 0.5},
    tiles="CartoDB positron",
    name="shs",
)

In [18]:
m2 = transit_route_shs_gdf.explore(
    "Percentage of Transit Route on SHN Across All Districts",
    m=m2,
    cmap="Blues",
    legend=True,
)

In [19]:
display(m2)

In [20]:
GT(
    transit_route_shs_table.sort_values(
        by=[
            "Portfolio Organization Name",
            "Percentage of Transit Route on SHN Across All Districts",
        ],
        ascending=[True, False],
    )
)

Portfolio Organization Name,Route,State Highway Network Routes in District 12,Percentage of Transit Route on SHN Across All Districts
Anaheim Transportation Network,15 15 ARTIC Sports Complex Line,"5, 91, 57",42.6
Anaheim Transportation Network,06/12 Combo 06 Disney Way / 12 Manchester Line,5,26.2
City of Laguna Beach,Short Coastal,"133, 1",93.0
City of Laguna Beach,Long Coastal,"133, 1",73.8
City of Laguna Beach,Coastal Trolley,"133, 1",62.3
Orange County Transportation Authority,29A Buena Park - Huntington Beach,"5, 39, 91, 1, 22, 405",76.6
Orange County Transportation Authority,529 Rapid Fullerton - Huntington Beach,"5, 39, 91, 22, 405",66.7
Orange County Transportation Authority,83 Fullerton - Laguna Hills,"5, 91, 133, 22, 57, 405, 261, 55",64.0


## GTFS Stats by Operator

In [21]:
string_cols = gtfs_table_df.select_dtypes(include="object").columns.tolist()

In [22]:
gtfs_table = (
    GT(gtfs_table_df.sort_values("# Trips", ascending=False))
    .fmt_integer(
        columns=[
            c
            for c in gtfs_table_df.columns
            if c not in ["Operator Service Miles", "Avg Arrivals per Stop"]
            and (c not in string_cols)
        ]
    )
    .fmt_number(columns=["Operator Service Miles", "Avg Arrivals per Stop"], decimals=1)
    .data_color(
        columns=["# Trips", "Avg Arrivals per Stop"],
        palette=["#e0ecf6", "#376fa9"],
        na_color="lightgray",
    )
    .tab_header(
        title=f"District {district}",
        subtitle="Daily GTFS schedule statistics by operator",
    )
    .cols_align(
        columns=[
            c
            for c in gtfs_table_df.columns
            if c not in ["Organization", "Transit Operator"]
        ],
        align="center",
    )
)

In [23]:
gtfs_table = _report_utils.great_table_formatting(gtfs_table)
gtfs_table

District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana,District 12 - Santa Ana
Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator
Portfolio Organization Name,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,operator_arrivals_per_stop,Avg Arrivals per Stop
Anaheim Transportation Network,12,12504,165,70,25396,55.3,363,362.8
Orange County Transportation Authority,58,3177,155,5188,179063,944.3,35,34.5
"University of California, Irvine",6,318,13,28,2299,21.8,82,82.1
City of Irvine,1,80,2,76,3160,12.3,42,41.6
City of San Clemente,3,73,3,37,1102,19.5,30,29.8
City of Laguna Beach,1,38,4,5,141,9.1,28,28.2
City of Mission Viejo,1,23,1,66,1518,34.5,23,23.0
City of San Juan Capistrano,1,18,1,19,342,6.6,18,18.0
City of Dana Point,2,2,2,37,37,17.8,1,1.0
