Skip to content

Commit 70744ea

Browse files
committed
Swap pandas with fireduck
1 parent b16a502 commit 70744ea

File tree

10 files changed

+60
-41
lines changed

10 files changed

+60
-41
lines changed

mbta-performance/chalicelib/gtfs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from datetime import date
22
from tempfile import TemporaryDirectory
3-
import pandas as pd
43
from typing import Iterable
5-
import boto3
64

5+
import boto3
6+
import fireducks.pandas as pd
77
from mbta_gtfs_sqlite import MbtaGtfsArchive
88
from mbta_gtfs_sqlite.models import StopTime, Trip
99
from sqlalchemy import or_

mbta-performance/chalicelib/historic/gtfs_archive.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import datetime
2-
import pandas as pd
32
import pathlib
43
import shutil
54
import urllib.request
65

6+
import fireducks.pandas as pd
7+
78
from ..date import to_dateint
89

910
MAIN_DIR = pathlib.Path("./data/gtfs_archives/")

mbta-performance/chalicelib/historic/process.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1-
import pandas as pd
2-
import uuid
31
import pathlib
4-
from .constants import HISTORIC_COLUMNS_PRE_LAMP as HISTORIC_COLUMNS
2+
import uuid
3+
from datetime import datetime
4+
5+
import fireducks.pandas as pd
6+
57
from .constants import (
68
CSV_FIELDS,
79
arrival_field_mapping,
810
departure_field_mapping,
11+
inbound_outbound,
912
station_mapping,
1013
unofficial_ferry_labels_map,
11-
inbound_outbound,
1214
)
15+
from .constants import HISTORIC_COLUMNS_PRE_LAMP as HISTORIC_COLUMNS
1316
from .gtfs_archive import add_gtfs_headways
14-
from datetime import datetime
1517

1618

1719
def process_events(input_csv: str, outdir: str, nozip: bool = False, columns: list = HISTORIC_COLUMNS):

mbta-performance/chalicelib/lamp/backfill/main.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import pandas as pd
2-
from ..ingest import fetch_pq_file_from_remote, ingest_pq_file, upload_to_s3
3-
from ... import parallel
41
from datetime import date, timedelta
52

3+
import fireducks.pandas as pd
4+
5+
from ... import parallel
6+
from ..ingest import fetch_pq_file_from_remote, ingest_pq_file, upload_to_s3
67

78
_parallel_upload = parallel.make_parallel(upload_to_s3)
89

mbta-performance/chalicelib/lamp/ingest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from datetime import date
33
from typing import Tuple
44

5-
import pandas as pd
5+
import fireducks.pandas as pd
66
import requests
77

88
from .. import parallel, s3

mbta-performance/chalicelib/lamp/tests/test_ingest.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
from datetime import date
21
import io
32
import os
43
import unittest
4+
from datetime import date
55
from unittest import mock
66

7-
import pandas as pd
7+
import fireducks.pandas as pd
88

9-
from .. import ingest
10-
from .. import constants
9+
from .. import constants, ingest
1110

1211
# The sample file attached here is 10k events sampled from Feb 7th, 2024.
1312
# These rows contain real-world inconsistencies in their data!

mbta-performance/chalicelib/parallel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from concurrent.futures import ThreadPoolExecutor, as_completed
22

3-
import pandas as pd
3+
import fireducks.pandas as pd
44

55

66
def make_parallel(single_func, THREAD_COUNT=5):

mbta-performance/chalicelib/s3.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import boto3
21
import io
3-
import pandas as pd
4-
import zlib
52
import time
3+
import zlib
4+
5+
import boto3
6+
import fireducks.pandas as pd
67

78
s3 = boto3.client("s3")
89
cloudfront = boto3.client("cloudfront")

pyproject.toml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ license = "MIT"
88
dependencies = [
99
"requests>=2.32.3,<3",
1010
"boto3~=1.34.162",
11-
"pandas~=2.2.3",
1211
"datadog_lambda==6.109.0",
13-
"pyarrow~=17.0.0",
12+
"pyarrow~=21.0.0",
1413
"mbta-gtfs-sqlite~=1.1.1",
14+
"fireducks~=1.4.3",
1515
]
1616

1717
[dependency-groups]
@@ -28,7 +28,3 @@ package = false
2828
[build-system]
2929
requires = ["hatchling"]
3030
build-backend = "hatchling.build"
31-
32-
[tool.black]
33-
line-length = 120
34-
target-version = ['py312']

uv.lock

Lines changed: 34 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)