Merged
Show file tree
Hide file tree
Changes from all commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Failed to load files.
Original file line numberDiff line numberDiff line change
Expand Up@@ -2291,9 +2291,18 @@ def load_table_from_dataframe(
name
for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe)
)
# schema fields not present in the dataframe are not needed
job_config.schema = [
field for field in table.schema if field.name in columns_and_indexes
# Field description and policy tags are not needed to
# serialize a data frame.
SchemaField(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the actual bug fix. Rather than populate all properties of schema field from the table schema, just populate the minimum we need to convert to parquet/CSV and then upload

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll need to revisit this for parameterization constraints, but that's a problem for future Tim.

field.name,
field.field_type,
mode=field.mode,
fields=field.fields,
)
# schema fields not present in the dataframe are not needed
for field in table.schema
if field.name in columns_and_indexes
]

job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
Expand Down
Original file line numberDiff line numberDiff line change
Expand Up@@ -19,6 +19,7 @@
from google.cloud.bigquery_v2 import types


_DEFAULT_VALUE = object()
_STRUCT_TYPES = ("RECORD", "STRUCT")

# SQL types reference:
Expand DownExpand Up@@ -73,14 +74,18 @@ def __init__(
name,
field_type,
mode="NULLABLE",
description=None,
description=_DEFAULT_VALUE,
fields=(),
policy_tags=None,
):
self._name = name
self._field_type = field_type
self._mode = mode
self._description = description
self._properties = {
"name": name,
"type": field_type,
}
if mode is not None:
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shollyman This is one of the key changes: we no longer set the resource value for "description" if it's not explicitly set.

We already omit policy_tags from the resource if none (though arguably it should get the same treatment so that someone can unset policy tags from Python)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My default inclination would be for special handling for None values to happen at the places where it's significant, like when calling tables.update. It's also the case that schema fields can't be manipulated individually, so perhaps I'm simply just not thinking this through properly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I called that out as a possibility in #558, but that'd require updating our field mask logic to support sub-fields, which gets into some hairy string parsing (perhaps not all that hairy, as it could be as simple as split on '.', but is definitely a departure from what we've been doing).

Also, it might mean that we'd have to introduce a field mask to our load job methods. Based on the error message we're seeing, it sounds like it's possible to make updates to fields like policy tags from a load job.

self._properties["description"] = description
self._fields = tuple(fields)
self._policy_tags = policy_tags

Expand All@@ -98,7 +103,7 @@ def from_api_repr(cls, api_repr):
"""
# Handle optional properties with default values
mode = api_repr.get("mode", "NULLABLE")
description = api_repr.get("description")
description = api_repr.get("description", _DEFAULT_VALUE)
fields = api_repr.get("fields", ())

return cls(
Expand All@@ -113,7 +118,7 @@ def from_api_repr(cls, api_repr):
@property
def name(self):
"""str: The name of the field."""
return self._name
return self._properties["name"]

@property
def field_type(self):
Expand All@@ -122,7 +127,7 @@ def field_type(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
"""
return self._field_type
return self._properties["type"]

@property
def mode(self):
Expand All@@ -131,17 +136,17 @@ def mode(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
"""
return self._mode
return self._properties.get("mode")

@property
def is_nullable(self):
"""bool: whether 'mode' is 'nullable'."""
return self._mode == "NULLABLE"
return self.mode == "NULLABLE"

@property
def description(self):
"""Optional[str]: description for the field."""
return self._description
return self._properties.get("description")

@property
def fields(self):
Expand All@@ -164,13 +169,7 @@ def to_api_repr(self):
Returns:
Dict: A dictionary representing the SchemaField in a serialized form.
"""
# Put together the basic representation. See http:///2hOAT5u.
answer = {
"mode": self.mode.upper(),
"name": self.name,
"type": self.field_type.upper(),
"description": self.description,
}
answer = self._properties.copy()

# If this is a RECORD type, then sub-fields are also included,
# add this to the serialized representation.
Expand All@@ -193,10 +192,10 @@ def _key(self):
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
"""
return (
self._name,
self._field_type.upper(),
self._mode.upper(),
self._description,
self.name,
self.field_type.upper(),
self.mode.upper(),
self.description,
self._fields,
self._policy_tags,
)
Expand Down
Original file line numberDiff line numberDiff line change
Expand Up@@ -434,13 +434,11 @@ def test_schema_setter_fields(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"description": None,
}
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
"description": None,
}
self.assertEqual(
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
Expand All@@ -449,24 +447,18 @@ def test_schema_setter_fields(self):
def test_schema_setter_valid_mappings_list(self):
config = self._get_target_class()()

schema = [
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
{"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
]
config.schema = schema

full_name_repr = {
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
"description": None,
}
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
"description": None,
}
schema = [full_name_repr, age_repr]
config.schema = schema
self.assertEqual(
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
)
Expand Down
Loading