@@ -617,6 +617,39 @@ def head(self, n: int = 5) -> Series:
|
617 | 617 | def tail(self, n: int = 5) -> Series:
|
618 | 618 | return typing.cast(Series, self.iloc[-n:])
|
619 | 619 |
|
| 620 | +def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame: |
| 621 | +""" |
| 622 | +Preview n arbitrary elements from the series. No guarantees about row selection or ordering. |
| 623 | +``Series.peek(force=False)`` will always be very fast, but will not succeed if data requires |
| 624 | +full data scanning. Using ``force=True`` will always succeed, but may be perform queries. |
| 625 | +Query results will be cached so that future steps will benefit from these queries. |
| 626 | +
|
| 627 | +Args: |
| 628 | +n (int, default 5): |
| 629 | +The number of rows to select from the series. Which N rows are returned is non-deterministic. |
| 630 | +force (bool, default True): |
| 631 | +If the data cannot be peeked efficiently, the series will instead be fully materialized as part |
| 632 | +of the operation if ``force=True``. If ``force=False``, the operation will throw a ValueError. |
| 633 | +Returns: |
| 634 | +pandas.Series: A pandas Series with n rows. |
| 635 | +
|
| 636 | +Raises: |
| 637 | +ValueError: If force=False and data cannot be efficiently peeked. |
| 638 | +""" |
| 639 | +maybe_result = self._block.try_peek(n) |
| 640 | +if maybe_result is None: |
| 641 | +if force: |
| 642 | +self._cached() |
| 643 | +maybe_result = self._block.try_peek(n, force=True) |
| 644 | +assert maybe_result is not None |
| 645 | +else: |
| 646 | +raise ValueError( |
| 647 | +"Cannot peek efficiently when data has aggregates, joins or window functions applied. Use force=True to fully compute dataframe." |
| 648 | +) |
| 649 | +as_series = maybe_result.squeeze(axis=1) |
| 650 | +as_series.name = self.name |
| 651 | +return as_series |
| 652 | + |
620 | 653 | def nlargest(self, n: int = 5, keep: str = "first") -> Series:
|
621 | 654 | if keep not in ("first", "last", "all"):
|
622 | 655 | raise ValueError("'keep must be one of 'first', 'last', or 'all'")
|
@@ -1400,7 +1433,7 @@ def apply(
|
1400 | 1433 |
|
1401 | 1434 | # return Series with materialized result so that any error in the remote
|
1402 | 1435 | # function is caught early
|
1403 |
| -materialized_series = result_series._cached() |
| 1436 | +materialized_series = result_series._cached(session_aware=False) |
1404 | 1437 | return materialized_series
|
1405 | 1438 |
|
1406 | 1439 | def combine(
|
@@ -1775,10 +1808,11 @@ def cache(self):
|
1775 | 1808 | Returns:
|
1776 | 1809 | Series: Self
|
1777 | 1810 | """
|
1778 |
| -return self._cached(force=True) |
| 1811 | +# Do not use session-aware cashing if user-requested |
| 1812 | +return self._cached(force=True, session_aware=False) |
1779 | 1813 |
|
1780 |
| -def _cached(self, *, force: bool = True) -> Series: |
1781 |
| -self._block.cached(force=force) |
| 1814 | +def _cached(self, *, force: bool = True, session_aware: bool = True) -> Series: |
| 1815 | +self._block.cached(force=force, session_aware=session_aware) |
1782 | 1816 | return self
|
1783 | 1817 |
|
1784 | 1818 | def _optimize_query_complexity(self):
|
|
0 commit comments