@@ -105,6 +105,85 @@ def indicate_duplicates(
|
105 | 105 | )
|
106 | 106 |
|
107 | 107 |
|
| 108 | +def interpolate_linear(block: blocks.Block) -> blocks.Block: |
| 109 | +backwards_window = windows.WindowSpec(following=0) |
| 110 | +forwards_window = windows.WindowSpec(preceding=0) |
| 111 | + |
| 112 | +output_column_ids = [] |
| 113 | + |
| 114 | +original_columns = block.value_columns |
| 115 | +original_labels = block.column_labels |
| 116 | +block, offsets = block.promote_offsets() |
| 117 | +for column in original_columns: |
| 118 | +# null in same places column is null |
| 119 | +should_interpolate = block._column_type(column) in [ |
| 120 | +pd.Float64Dtype(), |
| 121 | +pd.Int64Dtype(), |
| 122 | +] |
| 123 | +if should_interpolate: |
| 124 | +block, notnull = block.apply_unary_op(column, ops.notnull_op) |
| 125 | +block, masked_offsets = block.apply_binary_op( |
| 126 | +offsets, notnull, ops.partial_arg3(ops.where_op, None) |
| 127 | +) |
| 128 | + |
| 129 | +block, previous_value = block.apply_window_op( |
| 130 | +column, agg_ops.LastNonNullOp(), backwards_window |
| 131 | +) |
| 132 | +block, next_value = block.apply_window_op( |
| 133 | +column, agg_ops.FirstNonNullOp(), forwards_window |
| 134 | +) |
| 135 | +block, previous_value_offset = block.apply_window_op( |
| 136 | +masked_offsets, agg_ops.LastNonNullOp(), backwards_window |
| 137 | +) |
| 138 | +block, next_value_offset = block.apply_window_op( |
| 139 | +masked_offsets, agg_ops.FirstNonNullOp(), forwards_window |
| 140 | +) |
| 141 | + |
| 142 | +block, prediction_id = _interpolate( |
| 143 | +block, |
| 144 | +previous_value_offset, |
| 145 | +previous_value, |
| 146 | +next_value_offset, |
| 147 | +next_value, |
| 148 | +offsets, |
| 149 | +) |
| 150 | + |
| 151 | +block, interpolated_column = block.apply_binary_op( |
| 152 | +column, prediction_id, ops.fillna_op |
| 153 | +) |
| 154 | +# pandas linear interpolation also extrapolates forward like 'ffill' |
| 155 | +block, interpolated_column = block.apply_window_op( |
| 156 | +interpolated_column, agg_ops.LastNonNullOp(), backwards_window |
| 157 | +) |
| 158 | + |
| 159 | +output_column_ids.append(interpolated_column) |
| 160 | +else: |
| 161 | +output_column_ids.append(column) |
| 162 | + |
| 163 | +return block.select_columns(output_column_ids).with_column_labels(original_labels) |
| 164 | + |
| 165 | + |
| 166 | +def _interpolate( |
| 167 | +block: blocks.Block, |
| 168 | +x0_id: str, |
| 169 | +y0_id: str, |
| 170 | +x1_id: str, |
| 171 | +y1_id: str, |
| 172 | +xpredict_id: str, |
| 173 | +) -> typing.Tuple[blocks.Block, str]: |
| 174 | +"""Applies linear interpolation equation to predict y values for xpredict.""" |
| 175 | +block, x1x0diff = block.apply_binary_op(x1_id, x0_id, ops.sub_op) |
| 176 | +block, y1y0diff = block.apply_binary_op(y1_id, y0_id, ops.sub_op) |
| 177 | +block, xpredictx0diff = block.apply_binary_op(xpredict_id, x0_id, ops.sub_op) |
| 178 | + |
| 179 | +block, y1_weight = block.apply_binary_op(y1y0diff, x1x0diff, ops.div_op) |
| 180 | +block, y1_part = block.apply_binary_op(xpredictx0diff, y1_weight, ops.mul_op) |
| 181 | + |
| 182 | +block, prediction_id = block.apply_binary_op(y0_id, y1_part, ops.add_op) |
| 183 | +block = block.drop_columns([x1x0diff, y1y0diff, xpredictx0diff, y1_weight, y1_part]) |
| 184 | +return block, prediction_id |
| 185 | + |
| 186 | + |
108 | 187 | def drop_duplicates(
|
109 | 188 | block: blocks.Block, columns: typing.Sequence[str], keep: str = "first"
|
110 | 189 | ) -> blocks.Block:
|
|
0 commit comments