|
14 | 14 |
|
15 | 15 | """Cursor for the Google BigQuery DB-API."""
|
16 | 16 |
|
| 17 | +from __future__ import annotations |
| 18 | + |
17 | 19 | import collections
|
18 | 20 | from collections import abc as collections_abc
|
19 |
| -import copy |
20 |
| -import logging |
21 | 21 | import re
|
| 22 | +from typing import Optional |
22 | 23 |
|
23 | 24 | try:
|
24 | 25 | from google.cloud.bigquery_storage import ArrowSerializationOptions
|
|
34 | 35 | import google.cloud.exceptions # type: ignore
|
35 | 36 |
|
36 | 37 |
|
37 |
| -_LOGGER = logging.getLogger(__name__) |
38 |
| - |
39 | 38 | # Per PEP 249: A 7-item sequence containing information describing one result
|
40 | 39 | # column. The first two items (name and type_code) are mandatory, the other
|
41 | 40 | # five are optional and are set to None if no meaningful values can be
|
@@ -76,18 +75,31 @@ def __init__(self, connection):
|
76 | 75 | # most appropriate size.
|
77 | 76 | self.arraysize = None
|
78 | 77 | self._query_data = None
|
79 |
| -self._query_job = None |
| 78 | +self._query_rows = None |
80 | 79 | self._closed = False
|
81 | 80 |
|
82 | 81 | @property
|
83 |
| -def query_job(self): |
84 |
| -"""google.cloud.bigquery.job.query.QueryJob: The query job created by |
85 |
| -the last ``execute*()`` call. |
| 82 | +def query_job(self) -> Optional[job.QueryJob]: |
| 83 | +"""google.cloud.bigquery.job.query.QueryJob | None: The query job |
| 84 | +created by the last ``execute*()`` call, if a query job was created. |
86 | 85 |
|
87 | 86 | .. note::
|
88 | 87 | If the last ``execute*()`` call was ``executemany()``, this is the
|
89 | 88 | last job created by ``executemany()``."""
|
90 |
| -return self._query_job |
| 89 | +rows = self._query_rows |
| 90 | + |
| 91 | +if rows is None: |
| 92 | +return None |
| 93 | + |
| 94 | +job_id = rows.job_id |
| 95 | +project = rows.project |
| 96 | +location = rows.location |
| 97 | +client = self.connection._client |
| 98 | + |
| 99 | +if job_id is None: |
| 100 | +return None |
| 101 | + |
| 102 | +return client.get_job(job_id, location=location, project=project) |
91 | 103 |
|
92 | 104 | def close(self):
|
93 | 105 | """Mark the cursor as closed, preventing its further use."""
|
@@ -117,8 +129,8 @@ def _set_description(self, schema):
|
117 | 129 | for field in schema
|
118 | 130 | )
|
119 | 131 |
|
120 |
| -def _set_rowcount(self, query_results): |
121 |
| -"""Set the rowcount from query results. |
| 132 | +def _set_rowcount(self, rows): |
| 133 | +"""Set the rowcount from a RowIterator. |
122 | 134 |
|
123 | 135 | Normally, this sets rowcount to the number of rows returned by the
|
124 | 136 | query, but if it was a DML statement, it sets rowcount to the number
|
@@ -129,10 +141,10 @@ def _set_rowcount(self, query_results):
|
129 | 141 | Results of a query.
|
130 | 142 | """
|
131 | 143 | total_rows = 0
|
132 |
| -num_dml_affected_rows = query_results.num_dml_affected_rows |
| 144 | +num_dml_affected_rows = rows.num_dml_affected_rows |
133 | 145 |
|
134 |
| -if query_results.total_rows is not None and query_results.total_rows > 0: |
135 |
| -total_rows = query_results.total_rows |
| 146 | +if rows.total_rows is not None and rows.total_rows > 0: |
| 147 | +total_rows = rows.total_rows |
136 | 148 | if num_dml_affected_rows is not None and num_dml_affected_rows > 0:
|
137 | 149 | total_rows = num_dml_affected_rows
|
138 | 150 | self.rowcount = total_rows
|
@@ -165,9 +177,10 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
|
165 | 177 | parameters (Union[Mapping[str, Any], Sequence[Any]]):
|
166 | 178 | (Optional) dictionary or sequence of parameter values.
|
167 | 179 |
|
168 |
| -job_id (str): |
169 |
| -(Optional) The job_id to use. If not set, a job ID |
170 |
| -is generated at random. |
| 180 | +job_id (str | None): |
| 181 | +(Optional and discouraged) The job ID to use when creating |
| 182 | +the query job. For best performance and reliability, manually |
| 183 | +setting a job ID is discouraged. |
171 | 184 |
|
172 | 185 | job_config (google.cloud.bigquery.job.QueryJobConfig):
|
173 | 186 | (Optional) Extra configuration options for the query job.
|
@@ -181,7 +194,7 @@ def _execute(
|
181 | 194 | self, formatted_operation, parameters, job_id, job_config, parameter_types
|
182 | 195 | ):
|
183 | 196 | self._query_data = None
|
184 |
| -self._query_job = None |
| 197 | +self._query_results = None |
185 | 198 | client = self.connection._client
|
186 | 199 |
|
187 | 200 | # The DB-API uses the pyformat formatting, since the way BigQuery does
|
@@ -190,33 +203,35 @@ def _execute(
|
190 | 203 | # libraries.
|
191 | 204 | query_parameters = _helpers.to_query_parameters(parameters, parameter_types)
|
192 | 205 |
|
193 |
| -if client._default_query_job_config: |
194 |
| -if job_config: |
195 |
| -config = job_config._fill_from_default(client._default_query_job_config) |
196 |
| -else: |
197 |
| -config = copy.deepcopy(client._default_query_job_config) |
198 |
| -else: |
199 |
| -config = job_config or job.QueryJobConfig(use_legacy_sql=False) |
200 |
| - |
| 206 | +config = job_config or job.QueryJobConfig() |
201 | 207 | config.query_parameters = query_parameters
|
202 |
| -self._query_job = client.query( |
203 |
| -formatted_operation, job_config=config, job_id=job_id |
204 |
| -) |
205 | 208 |
|
206 |
| -if self._query_job.dry_run: |
207 |
| -self._set_description(schema=None) |
208 |
| -self.rowcount = 0 |
209 |
| -return |
210 |
| - |
211 |
| -# Wait for the query to finish. |
| 209 | +# Start the query and wait for the query to finish. |
212 | 210 | try:
|
213 |
| -self._query_job.result() |
| 211 | +if job_id is not None: |
| 212 | +rows = client.query( |
| 213 | +formatted_operation, |
| 214 | +job_config=job_config, |
| 215 | +job_id=job_id, |
| 216 | +).result( |
| 217 | +page_size=self.arraysize, |
| 218 | +) |
| 219 | +else: |
| 220 | +rows = client.query_and_wait( |
| 221 | +formatted_operation, |
| 222 | +job_config=config, |
| 223 | +page_size=self.arraysize, |
| 224 | +) |
214 | 225 | except google.cloud.exceptions.GoogleCloudError as exc:
|
215 | 226 | raise exceptions.DatabaseError(exc)
|
216 | 227 |
|
217 |
| -query_results = self._query_job._query_results |
218 |
| -self._set_rowcount(query_results) |
219 |
| -self._set_description(query_results.schema) |
| 228 | +self._query_rows = rows |
| 229 | +self._set_description(rows.schema) |
| 230 | + |
| 231 | +if config.dry_run: |
| 232 | +self.rowcount = 0 |
| 233 | +else: |
| 234 | +self._set_rowcount(rows) |
220 | 235 |
|
221 | 236 | def executemany(self, operation, seq_of_parameters):
|
222 | 237 | """Prepare and execute a database operation multiple times.
|
@@ -250,25 +265,26 @@ def _try_fetch(self, size=None):
|
250 | 265 |
|
251 | 266 | Mutates self to indicate that iteration has started.
|
252 | 267 | """
|
253 |
| -if self._query_job is None: |
| 268 | +if self._query_data is not None: |
| 269 | +# Already started fetching the data. |
| 270 | +return |
| 271 | + |
| 272 | +rows = self._query_rows |
| 273 | +if rows is None: |
254 | 274 | raise exceptions.InterfaceError(
|
255 | 275 | "No query results: execute() must be called before fetch."
|
256 | 276 | )
|
257 | 277 |
|
258 |
| -if self._query_job.dry_run: |
259 |
| -self._query_data = iter([]) |
| 278 | +bqstorage_client = self.connection._bqstorage_client |
| 279 | +if rows._should_use_bqstorage( |
| 280 | +bqstorage_client, |
| 281 | +create_bqstorage_client=False, |
| 282 | +): |
| 283 | +rows_iterable = self._bqstorage_fetch(bqstorage_client) |
| 284 | +self._query_data = _helpers.to_bq_table_rows(rows_iterable) |
260 | 285 | return
|
261 | 286 |
|
262 |
| -if self._query_data is None: |
263 |
| -bqstorage_client = self.connection._bqstorage_client |
264 |
| - |
265 |
| -if bqstorage_client is not None: |
266 |
| -rows_iterable = self._bqstorage_fetch(bqstorage_client) |
267 |
| -self._query_data = _helpers.to_bq_table_rows(rows_iterable) |
268 |
| -return |
269 |
| - |
270 |
| -rows_iter = self._query_job.result(page_size=self.arraysize) |
271 |
| -self._query_data = iter(rows_iter) |
| 287 | +self._query_data = iter(rows) |
272 | 288 |
|
273 | 289 | def _bqstorage_fetch(self, bqstorage_client):
|
274 | 290 | """Start fetching data with the BigQuery Storage API.
|
@@ -290,7 +306,7 @@ def _bqstorage_fetch(self, bqstorage_client):
|
290 | 306 | # bigquery_storage can indeed be imported here without errors.
|
291 | 307 | from google.cloud import bigquery_storage
|
292 | 308 |
|
293 |
| -table_reference = self._query_job.destination |
| 309 | +table_reference = self._query_rows._table |
294 | 310 |
|
295 | 311 | requested_session = bigquery_storage.types.ReadSession(
|
296 | 312 | table=table_reference.to_bqstorage(),
|
|
0 commit comments