Merged
Show file tree
Hide file tree
Changes from all commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Failed to load files.
perf: avoid extra API calls from to_dataframe if all rows are cached
Follow-up to previous PR, which cached the first page of
`getQueryResults`. If the first page is the only page (no `pageToken`),
then it is unnecessary to make extra API calls from `to_dataframe` or
`to_arrow` to the BigQuery Storage API.
  • Loading branch information
@tswast
tswast committedNov 11, 2020
commit 0912b88dbb80fcfea96c190bd124e285b1edcbac
Original file line numberDiff line numberDiff line change
Expand Up@@ -1351,6 +1351,41 @@ def __init__(
self._total_rows = total_rows
self._first_page_response = first_page_response

def _is_completely_cached(self):
"""Check if all results are completely cached.

This is useful to know, because we can avoid alternative download
mechanisms.
"""
if self._first_page_response is None or self.next_page_token:
return False

return self._first_page_response.get(self._next_token) is None

def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):
"""Returns if the BigQuery Storage API can be used.

Returns:
bool
True if the BigQuery Storage client can be used or created.
"""
using_bqstorage_api = bqstorage_client or create_bqstorage_client
if not using_bqstorage_api:
return False

if self._is_completely_cached():
return False

if self.max_results is not None:
warnings.warn(
"Cannot use bqstorage_client if max_results is set, "
"reverting to fetching data with the REST endpoint.",
stacklevel=2,
)
return False

return True

def _get_next_page_response(self):
"""Requests the next page from the path provided.

Expand DownExpand Up@@ -1412,6 +1447,9 @@ def _get_progress_bar(self, progress_bar_type):
def _to_page_iterable(
self, bqstorage_download, tabledata_list_download, bqstorage_client=None
):
if not self._validate_bqstorage(bqstorage_client, False):
bqstorage_client = None

if bqstorage_client is not None:
for item in bqstorage_download():
yield item
Expand DownExpand Up@@ -1503,14 +1541,7 @@ def to_arrow(
if pyarrow is None:
raise ValueError(_NO_PYARROW_ERROR)

if (
bqstorage_client or create_bqstorage_client
) and self.max_results is not None:
warnings.warn(
"Cannot use bqstorage_client if max_results is set, "
"reverting to fetching data with the REST endpoint.",
stacklevel=2,
)
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None

Expand DownExpand Up@@ -1687,14 +1718,7 @@ def to_dataframe(
if dtypes is None:
dtypes = {}

if (
bqstorage_client or create_bqstorage_client
) and self.max_results is not None:
warnings.warn(
"Cannot use bqstorage_client if max_results is set, "
"reverting to fetching data with the REST endpoint.",
stacklevel=2,
)
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None

Expand Down