In [1]:
%%capture

import warnings

warnings.filterwarnings("ignore")
import _ct_district_data_prep
# import _ct_district_visuals
import _report_utils
import calitp_data_analysis.magics
import geopandas as gpd
import pandas as pd
from great_tables import GT
from IPython.display import HTML, Image, Markdown, display, display_html
from slugify import slugify
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS

In [2]:
import google.auth

credentials, project = google.auth.default()

import gcsfs

fs = gcsfs.GCSFileSystem()

In [3]:
# Comment out and leave this cell right below pandas
# district =  '02 - Redding'

In [4]:
# Parameters
district = "11 - San Diego"


In [5]:
DISTRICT_DIGEST_URL = (
    "https://gtfs-digest--cal-itp-data-analyses.netlify.app/district_"
    f"{slugify(district)}"
)

In [6]:
%%capture_parameters
district, DISTRICT_DIGEST_URL

In [7]:
# Extract district from district string when doing an sjoin
# between CT districts & routes
district_int = [int(s) for s in district.split() if s.isdigit()][0]

In [8]:
# Load Datasets
operator_df = _ct_district_data_prep.data_wrangling_operator_profile(district)

operator_gdf = _ct_district_data_prep.data_wrangling_operator_map(
    list(operator_df.portfolio_organization_name.unique())
)

ct_district_gdf = _ct_district_data_prep.load_ct_district(district_int)

transit_route_shs_gdf, transit_route_shs_table = (
    _ct_district_data_prep.final_transit_route_shs_outputs(20, str(district_int))
)
district_gdf = _ct_district_data_prep.load_ct_district(district_int)

In [9]:
shn_gdf = _ct_district_data_prep.load_buffered_shn_map(50, district_int)

In [10]:
gtfs_table_df = _ct_district_data_prep.create_gtfs_stats(operator_df)

# District 11 - San Diego

These are district summaries for [GTFS Digest](https://gtfs-digest--cal-itp-data-analyses.netlify.app/). 

Individual transit operators have their pages at: **[https://gtfs-digest--cal-itp-data-analyses.netlify.app/district_11-san-diego](https://gtfs-digest--cal-itp-data-analyses.netlify.app/district_11-san-diego)**

In [11]:
district_summary = _report_utils.district_stats(operator_df, "caltrans_district")

In [12]:
summary_table1 = (
    GT(
        district_summary.drop(columns=["arrivals_per_stop", "trips_per_operator"]).pipe(
            _report_utils.transpose_summary_stats, district_col="caltrans_district"
        )
    )
    .fmt_number(columns="value", sep_mark = ",")
    .cols_label(index="")
    .tab_header(title=f"District {district} GTFS summary stats")
)

summary_table2 = (
    GT(
        district_summary[
            ["caltrans_district", "arrivals_per_stop", "trips_per_operator"]
        ].pipe(_report_utils.transpose_summary_stats, district_col="caltrans_district")
    )
    .fmt_number("value", decimals=1, sep_mark = ",")
    .cols_label(index="")
)

In [13]:
display(summary_table1)
display(summary_table2)

District 11 - San Diego GTFS summary stats,District 11 - San Diego GTFS summary stats
Unnamed: 0_level_1,Value
# Operators,5
# routes,177
# trips,9723
# stops,6306
# arrivals,277423


Unnamed: 0,Value
Arrivals per Stop,43.99
Trips per Operator,1944.6


## Routes within the District

In [14]:
m = district_gdf.explore(
    name="District",
    tiles="CartoDB positron",
    style_kwds={"color": "#9DA4A6", "opacity": 0.5},
    height=500,
    width=1000,
    legend = False
)

In [15]:
m = operator_gdf.explore(
    m=m,
    column="Portfolio Organization Name",
    cmap="Spectral",
    categorical=True,
    legend=False,
    legend_kwds={"width": 200},
)

In [16]:
display(m)

## Transit Routes on the State Highway Network
**Only transit routes that have 20% or more if its length on one or more State Highway Network routes are included**

In [17]:
m2 = shn_gdf.loc[shn_gdf.District == district_int].explore(
    height=500,
    width=1000,
    style_kwds={"color": "#9DA4A6", "weight": 6, "opacity": 0.5},
    tiles="CartoDB positron",
    name="shs",
)

In [18]:
m2 = transit_route_shs_gdf.explore(
    "Percentage of Transit Route on SHN Across All Districts",
    m=m2,
    cmap="Blues",
    legend=True,
)

In [19]:
display(m2)

In [20]:
GT(
    transit_route_shs_table.sort_values(
        by=[
            "Portfolio Organization Name",
            "Percentage of Transit Route on SHN Across All Districts",
        ],
        ascending=[True, False],
    )
)

Portfolio Organization Name,Route,State Highway Network Routes in District 11,Percentage of Transit Route on SHN Across All Districts
Imperial County Transportation Commission,32,"78, 111, 98, 8, 86",91.0
Imperial County Transportation Commission,31,"78, 111, 98, 8, 86",88.9
Imperial County Transportation Commission,41,86,63.7
Imperial County Transportation Commission,21,"98, 8, 111, 86",62.5
Imperial County Transportation Commission,22,"115, 111, 78, 86",59.4
Imperial County Transportation Commission,2,"115, 111, 78, 86",44.0
Imperial County Transportation Commission,1,"98, 8, 111, 86",37.7
Imperial County Transportation Commission,Blue,"8, 86",21.5
North County Transit District,371 Escondido - Ramona,"67, 78",85.3
North County Transit District,388 Escondido - Pala,"76, 78",27.4


## GTFS Stats by Operator

In [21]:
string_cols = gtfs_table_df.select_dtypes(include="object").columns.tolist()

In [22]:
gtfs_table = (
    GT(gtfs_table_df.sort_values("# Trips", ascending=False))
    .fmt_integer(
        columns=[
            c
            for c in gtfs_table_df.columns
            if c not in ["Operator Service Miles", "Avg Arrivals per Stop"]
            and (c not in string_cols)
        ]
    )
    .fmt_number(columns=["Operator Service Miles", "Avg Arrivals per Stop"], decimals=1)
    .data_color(
        columns=["# Trips", "Avg Arrivals per Stop"],
        palette=["#e0ecf6", "#376fa9"],
        na_color="lightgray",
    )
    .tab_header(
        title=f"District {district}",
        subtitle="Daily GTFS schedule statistics by operator",
    )
    .cols_align(
        columns=[
            c
            for c in gtfs_table_df.columns
            if c not in ["Organization", "Transit Operator"]
        ],
        align="center",
    )
)

In [23]:
gtfs_table = _report_utils.great_table_formatting(gtfs_table)
gtfs_table

District 11 - San Diego,District 11 - San Diego,District 11 - San Diego,District 11 - San Diego,District 11 - San Diego,District 11 - San Diego,District 11 - San Diego,District 11 - San Diego,District 11 - San Diego
Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator,Daily GTFS schedule statistics by operator
Portfolio Organization Name,# Routes,# Trips,# Shapes,# Stops,# Arrivals,Operator Service Miles,operator_arrivals_per_stop,Avg Arrivals per Stop
"San Diego Metropolitan Transit System, Airport, Flagship Cruises",105,7330,306,4241,219411,1112.2,52,51.7
North County Transit District,44,1343,100,1768,44981,577.5,25,25.4
"University of California, San Diego",6,778,6,67,9525,20.6,142,142.2
Yuma County Intergovernmental Public Transportation Authority,9,136,22,94,1277,267.2,14,13.6
Imperial County Transportation Commission,13,136,19,136,2229,283.2,16,16.4
