|
| 1 | +# ----------------------------------------------------------------------------- |
| 2 | +# Copyright (c) 2025, Oracle and/or its affiliates. |
| 3 | +# |
| 4 | +# This software is dual-licensed to you under the Universal Permissive License |
| 5 | +# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License |
| 6 | +# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose |
| 7 | +# either license. |
| 8 | +# |
| 9 | +# If you elect to accept the software under the Apache License, Version 2.0, |
| 10 | +# the following applies: |
| 11 | +# |
| 12 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 13 | +# you may not use this file except in compliance with the License. |
| 14 | +# You may obtain a copy of the License at |
| 15 | +# |
| 16 | +# https://www.apache.org/licenses/LICENSE-2.0 |
| 17 | +# |
| 18 | +# Unless required by applicable law or agreed to in writing, software |
| 19 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 20 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 21 | +# See the License for the specific language governing permissions and |
| 22 | +# limitations under the License. |
| 23 | +# ----------------------------------------------------------------------------- |
| 24 | + |
| 25 | +# ----------------------------------------------------------------------------- |
| 26 | +# dataframe_pandas_async.py |
| 27 | +# |
| 28 | +# An asynchronous version of dataframe_pandas.py |
| 29 | +# |
| 30 | +# Shows how to use AsyncConnection.fetch_df_all() and |
| 31 | +# AsyncConnection.fetch_df_batches(). This example then creates Pandas |
| 32 | +# dataframes. Alternative dataframe libraries could be used similar to the |
| 33 | +# other, synchronous, data frame samples. |
| 34 | +# ----------------------------------------------------------------------------- |
| 35 | + |
| 36 | +import asyncio |
| 37 | + |
| 38 | +import pandas |
| 39 | +import pyarrow |
| 40 | + |
| 41 | +import oracledb |
| 42 | +import sample_env |
| 43 | + |
| 44 | + |
| 45 | +async def main(): |
| 46 | +connection = await oracledb.connect_async( |
| 47 | +user=sample_env.get_main_user(), |
| 48 | +password=sample_env.get_main_password(), |
| 49 | +dsn=sample_env.get_connect_string(), |
| 50 | +params=sample_env.get_connect_params(), |
| 51 | +) |
| 52 | + |
| 53 | +SQL = "select id, name from SampleQueryTab order by id" |
| 54 | + |
| 55 | +# ------------------------------------------------------------------------- |
| 56 | +# |
| 57 | +# Fetching all records |
| 58 | + |
| 59 | +# Get an OracleDataFrame. |
| 60 | +# Adjust arraysize to tune the query fetch performance |
| 61 | +odf = await connection.fetch_df_all(statement=SQL, arraysize=100) |
| 62 | + |
| 63 | +# Get a Pandas DataFrame from the data |
| 64 | +df = pyarrow.Table.from_arrays( |
| 65 | +odf.column_arrays(), names=odf.column_names() |
| 66 | +).to_pandas() |
| 67 | + |
| 68 | +# Perform various Pandas operations on the DataFrame |
| 69 | + |
| 70 | +print("Columns:") |
| 71 | +print(df.columns) |
| 72 | + |
| 73 | +print("\nDataframe description:") |
| 74 | +print(df.describe()) |
| 75 | + |
| 76 | +print("\nLast three rows:") |
| 77 | +print(df.tail(3)) |
| 78 | + |
| 79 | +print("\nTransform:") |
| 80 | +print(df.T) |
| 81 | + |
| 82 | +# ------------------------------------------------------------------------- |
| 83 | +# |
| 84 | +# Batch record fetching |
| 85 | +# |
| 86 | +# Note that since this particular example ends up with all query rows being |
| 87 | +# held in memory, it would be more efficient to use fetch_df_all() as shown |
| 88 | +# above. |
| 89 | + |
| 90 | +print("\nFetching in batches:") |
| 91 | +df = pandas.DataFrame() |
| 92 | + |
| 93 | +# Tune 'size' for your data set. Here it is small to show the batch fetch |
| 94 | +# behavior on the sample table. |
| 95 | +async for odf in connection.fetch_df_batches(statement=SQL, size=10): |
| 96 | +df_b = pyarrow.Table.from_arrays( |
| 97 | +odf.column_arrays(), names=odf.column_names() |
| 98 | +).to_pandas() |
| 99 | +print(f"Appending {df_b.shape[0]} rows") |
| 100 | +df = pandas.concat([df, df_b], ignore_index=True) |
| 101 | + |
| 102 | +r, c = df.shape |
| 103 | +print(f"{r} rows, {c} columns") |
| 104 | + |
| 105 | +print("\nLast three rows:") |
| 106 | +print(df.tail(3)) |
| 107 | + |
| 108 | + |
| 109 | +asyncio.run(main()) |
0 commit comments