|
30 | 30 | Iterable,
|
31 | 31 | List,
|
32 | 32 | Literal,
|
| 33 | +Mapping, |
33 | 34 | MutableSequence,
|
34 | 35 | Optional,
|
35 | 36 | Sequence,
|
@@ -115,6 +116,11 @@ def _is_query(query_or_table: str) -> bool:
|
115 | 116 | return re.search(r"\s", query_or_table.strip(), re.MULTILINE) is not None
|
116 | 117 |
|
117 | 118 |
|
| 119 | +def _is_table_with_wildcard_suffix(query_or_table: str) -> bool: |
| 120 | +"""Determine if `query_or_table` is a table and contains a wildcard suffix.""" |
| 121 | +return not _is_query(query_or_table) and query_or_table.endswith("*") |
| 122 | + |
| 123 | + |
118 | 124 | class Session(
|
119 | 125 | third_party_pandas_gbq.GBQIOMixin,
|
120 | 126 | third_party_pandas_parquet.ParquetIOMixin,
|
@@ -248,7 +254,9 @@ def read_gbq(
|
248 | 254 | elif col_order:
|
249 | 255 | columns = col_order
|
250 | 256 |
|
251 |
| -query_or_table = self._filters_to_query(query_or_table, columns, filters) |
| 257 | +filters = list(filters) |
| 258 | +if len(filters) != 0 or _is_table_with_wildcard_suffix(query_or_table): |
| 259 | +query_or_table = self._to_query(query_or_table, columns, filters) |
252 | 260 |
|
253 | 261 | if _is_query(query_or_table):
|
254 | 262 | return self._read_gbq_query(
|
@@ -272,13 +280,18 @@ def read_gbq(
|
272 | 280 | use_cache=use_cache,
|
273 | 281 | )
|
274 | 282 |
|
275 |
| -def _filters_to_query(self, query_or_table, columns, filters): |
276 |
| -"""Convert filters to query""" |
277 |
| -if len(filters) == 0: |
278 |
| -return query_or_table |
279 |
| - |
| 283 | +def _to_query( |
| 284 | +self, |
| 285 | +query_or_table: str, |
| 286 | +columns: Iterable[str], |
| 287 | +filters: third_party_pandas_gbq.FiltersType, |
| 288 | +) -> str: |
| 289 | +"""Compile query_or_table with conditions(filters, wildcards) to query.""" |
| 290 | +filters = list(filters) |
280 | 291 | sub_query = (
|
281 |
| -f"({query_or_table})" if _is_query(query_or_table) else query_or_table |
| 292 | +f"({query_or_table})" |
| 293 | +if _is_query(query_or_table) |
| 294 | +else f"`{query_or_table}`" |
282 | 295 | )
|
283 | 296 |
|
284 | 297 | select_clause = "SELECT " + (
|
@@ -287,7 +300,7 @@ def _filters_to_query(self, query_or_table, columns, filters):
|
287 | 300 |
|
288 | 301 | where_clause = ""
|
289 | 302 | if filters:
|
290 |
| -valid_operators = { |
| 303 | +valid_operators: Mapping[third_party_pandas_gbq.FilterOps, str] = { |
291 | 304 | "in": "IN",
|
292 | 305 | "not in": "NOT IN",
|
293 | 306 | "==": "=",
|
@@ -298,19 +311,16 @@ def _filters_to_query(self, query_or_table, columns, filters):
|
298 | 311 | "!=": "!=",
|
299 | 312 | }
|
300 | 313 |
|
301 |
| -if ( |
302 |
| -isinstance(filters, Iterable) |
303 |
| -and isinstance(filters[0], Tuple) |
304 |
| -and (len(filters[0]) == 0 or not isinstance(filters[0][0], Tuple)) |
| 314 | +# If single layer filter, add another pseudo layer. So the single layer represents "and" logic. |
| 315 | +if isinstance(filters[0], tuple) and ( |
| 316 | +len(filters[0]) == 0 or not isinstance(list(filters[0])[0], tuple) |
305 | 317 | ):
|
306 |
| -filters = [filters] |
| 318 | +filters = typing.cast(third_party_pandas_gbq.FiltersType, [filters]) |
307 | 319 |
|
308 | 320 | or_expressions = []
|
309 | 321 | for group in filters:
|
310 | 322 | if not isinstance(group, Iterable):
|
311 |
| -raise ValueError( |
312 |
| -f"Filter group should be a iterable, {group} is not valid." |
313 |
| -) |
| 323 | +group = [group] |
314 | 324 |
|
315 | 325 | and_expressions = []
|
316 | 326 | for filter_item in group:
|
@@ -329,13 +339,13 @@ def _filters_to_query(self, query_or_table, columns, filters):
|
329 | 339 | if operator not in valid_operators:
|
330 | 340 | raise ValueError(f"Operator {operator} is not valid.")
|
331 | 341 |
|
332 |
| -operator = valid_operators[operator] |
| 342 | +operator_str = valid_operators[operator] |
333 | 343 |
|
334 |
| -if operator in ["IN", "NOT IN"]: |
| 344 | +if operator_str in ["IN", "NOT IN"]: |
335 | 345 | value_list = ", ".join([repr(v) for v in value])
|
336 |
| -expression = f"`{column}` {operator} ({value_list})" |
| 346 | +expression = f"`{column}` {operator_str} ({value_list})" |
337 | 347 | else:
|
338 |
| -expression = f"`{column}` {operator} {repr(value)}" |
| 348 | +expression = f"`{column}` {operator_str} {repr(value)}" |
339 | 349 | and_expressions.append(expression)
|
340 | 350 |
|
341 | 351 | or_expressions.append(" AND ".join(and_expressions))
|
@@ -521,6 +531,7 @@ def read_gbq_table(
|
521 | 531 | index_col: Iterable[str] | str = (),
|
522 | 532 | columns: Iterable[str] = (),
|
523 | 533 | max_results: Optional[int] = None,
|
| 534 | +filters: third_party_pandas_gbq.FiltersType = (), |
524 | 535 | use_cache: bool = True,
|
525 | 536 | col_order: Iterable[str] = (),
|
526 | 537 | ) -> dataframe.DataFrame:
|
@@ -546,6 +557,19 @@ def read_gbq_table(
|
546 | 557 | elif col_order:
|
547 | 558 | columns = col_order
|
548 | 559 |
|
| 560 | +filters = list(filters) |
| 561 | +if len(filters) != 0 or _is_table_with_wildcard_suffix(query): |
| 562 | +query = self._to_query(query, columns, filters) |
| 563 | + |
| 564 | +return self._read_gbq_query( |
| 565 | +query, |
| 566 | +index_col=index_col, |
| 567 | +columns=columns, |
| 568 | +max_results=max_results, |
| 569 | +api_name="read_gbq_table", |
| 570 | +use_cache=use_cache, |
| 571 | +) |
| 572 | + |
549 | 573 | return self._read_gbq_table(
|
550 | 574 | query=query,
|
551 | 575 | index_col=index_col,
|
|
0 commit comments