Merged
Changes from all commits
Show all changes
23 commitsSelect commit Hold shift + click to select a range
b3771b8
feat: Add Series.peek to preview data efficiently
TrevorBergerone865395
Merge remote-tracking branch '/main' into series_cache
TrevorBergeronf227476
add another test
TrevorBergeron68fc1e1
Merge remote-tracking branch '/main' into series_cache
TrevorBergerona17e027
cleanup comments
TrevorBergeron1764106
Merge remote-tracking branch '/main' into series_cache
TrevorBergeron5ff4661
more comments, up to 4 cluster cols for session-based caching
TrevorBergeron936e73d
add another session caching test
TrevorBergeron41f6083
Merge remote-tracking branch '/main' into series_cache
TrevorBergeronffbc518
Merge remote-tracking branch '/main' into series_cache
TrevorBergerona9b16c4
add todo for geo predicate detection
TrevorBergeron83fc8fb
Merge branch 'main' into series_cache
tswastec1d973
add dtype clusterable and orderable property
TrevorBergeronc307625
Merge remote-tracking branch '/main' into series_cache
TrevorBergeronb917c71
fix session aware caching unit tests
TrevorBergeron848d0a4
mock session for planner test
TrevorBergeron79d05b5
fix offsets column name collision
TrevorBergeron06c9866
Merge remote-tracking branch '/main' into series_cache
TrevorBergeron2ed1520
Update bigframes/dtypes.py
TrevorBergeron1ff4f68
Update bigframes/dtypes.py
TrevorBergeron81e5a02
add another series peek test
TrevorBergerone91dbb5
remove partial comment
TrevorBergeron108e449
Merge remote-tracking branch '/main' into series_cache
TrevorBergeronFile filter
Filter by extension
Conversations
Failed to load comments.
Uh oh!
There was an error while loading. Please reload this page.
Jump to
Failed to load files.
Uh oh!
There was an error while loading. Please reload this page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# Copyright 2024 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import bigframes.core.expression as ex | ||
import bigframes.core.schema as schemata | ||
import bigframes.dtypes | ||
import bigframes.operations as ops | ||
LOW_CARDINALITY_TYPES = [bigframes.dtypes.BOOL_DTYPE] | ||
COMPARISON_OP_TYPES = tuple( | ||
type(i) | ||
for i in ( | ||
ops.eq_op, | ||
ops.eq_null_match_op, | ||
ops.ne_op, | ||
ops.gt_op, | ||
ops.ge_op, | ||
ops.lt_op, | ||
ops.le_op, | ||
) | ||
) | ||
def cluster_cols_for_predicate( | ||
predicate: ex.Expression, schema: schemata.ArraySchema | ||
) -> list[str]: | ||
"""Try to determine cluster col candidates that work with given predicates.""" | ||
# TODO: Prioritize based on predicted selectivity (eg. equality conditions are probably very selective) | ||
if isinstance(predicate, ex.UnboundVariableExpression): | ||
cols = [predicate.id] | ||
elif isinstance(predicate, ex.OpExpression): | ||
op = predicate.op | ||
# TODO: Support geo predicates, which support pruning if clustered (other than st_disjoint) | ||
# https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions | ||
if isinstance(op, COMPARISON_OP_TYPES): | ||
cols = cluster_cols_for_comparison(predicate.inputs[0], predicate.inputs[1]) | ||
elif isinstance(op, (type(ops.invert_op))): | ||
cols = cluster_cols_for_predicate(predicate.inputs[0], schema) | ||
elif isinstance(op, (type(ops.and_op), type(ops.or_op))): | ||
left_cols = cluster_cols_for_predicate(predicate.inputs[0], schema) | ||
right_cols = cluster_cols_for_predicate(predicate.inputs[1], schema) | ||
cols = [*left_cols, *[col for col in right_cols if col not in left_cols]] | ||
else: | ||
cols = [] | ||
else: | ||
# Constant | ||
cols = [] | ||
return [ | ||
col for col in cols if bigframes.dtypes.is_clusterable(schema.get_type(col)) | ||
] | ||
def cluster_cols_for_comparison( | ||
left_ex: ex.Expression, right_ex: ex.Expression | ||
) -> list[str]: | ||
# TODO: Try to normalize expressions such that one side is a single variable. | ||
# eg. Convert -cola>=3 to cola<-3 and colb+3 < 4 to colb < 1 | ||
if left_ex.is_const: | ||
# There are some invertible ops that would also be ok | ||
tswast marked this conversation as resolved. Show resolved Hide resolvedUh oh!There was an error while loading. Please reload this page. | ||
if isinstance(right_ex, ex.UnboundVariableExpression): | ||
return [right_ex.id] | ||
elif right_ex.is_const: | ||
if isinstance(left_ex, ex.UnboundVariableExpression): | ||
return [left_ex.id] | ||
return [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Oops, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.