20 files changed

+1288
-97
lines changed
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
sample_env.run_sql_script(
5555
conn, "create_schema_21", main_user=sample_env.get_main_user()
5656
)
57-
if sample_env.get_server_version() >= (23, 5):
57+
if sample_env.get_server_version() >= (23, 7):
5858
sample_env.run_sql_script(
5959
conn, "create_schema_23", main_user=sample_env.get_main_user()
6060
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# -----------------------------------------------------------------------------
2+
# Copyright (c) 2025, Oracle and/or its affiliates.
3+
#
4+
# This software is dual-licensed to you under the Universal Permissive License
5+
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
6+
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
7+
# either license.
8+
#
9+
# If you elect to accept the software under the Apache License, Version 2.0,
10+
# the following applies:
11+
#
12+
# Licensed under the Apache License, Version 2.0 (the "License");
13+
# you may not use this file except in compliance with the License.
14+
# You may obtain a copy of the License at
15+
#
16+
# https://www.apache.org/licenses/LICENSE-2.0
17+
#
18+
# Unless required by applicable law or agreed to in writing, software
19+
# distributed under the License is distributed on an "AS IS" BASIS,
20+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21+
# See the License for the specific language governing permissions and
22+
# limitations under the License.
23+
# -----------------------------------------------------------------------------
24+
25+
# -----------------------------------------------------------------------------
26+
# dataframe_numpy.py
27+
#
28+
# Shows how to use connection.fetch_df_all() to efficiently put data into a
29+
# NumPy ndarray via the DLPack standard memory layout.
30+
# -----------------------------------------------------------------------------
31+
32+
import pyarrow
33+
import numpy
34+
35+
import oracledb
36+
import sample_env
37+
38+
# determine whether to use python-oracledb thin mode or thick mode
39+
if not sample_env.get_is_thin():
40+
oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
41+
42+
connection = oracledb.connect(
43+
user=sample_env.get_main_user(),
44+
password=sample_env.get_main_password(),
45+
dsn=sample_env.get_connect_string(),
46+
params=sample_env.get_connect_params(),
47+
)
48+
49+
SQL = "select id from SampleQueryTab order by id"
50+
51+
# Get an OracleDataFrame
52+
# Adjust arraysize to tune the query fetch performance
53+
odf = connection.fetch_df_all(statement=SQL, arraysize=100)
54+
55+
# Convert to an ndarray via the Python DLPack specification
56+
pyarrow_array = pyarrow.array(odf.get_column_by_name("ID"))
57+
np = numpy.from_dlpack(pyarrow_array)
58+
59+
# If the array has nulls, an alternative is:
60+
# np = pyarrow_array.to_numpy(zero_copy_only=False)
61+
62+
print("Type:")
63+
print(type(np)) # <class 'numpy.ndarray'>
64+
65+
# Perform various numpy operations on the ndarray
66+
67+
print("\nSum:")
68+
print(numpy.sum(np))
69+
70+
print("\nLog10:")
71+
print(numpy.log10(np))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# -----------------------------------------------------------------------------
2+
# Copyright (c) 2025, Oracle and/or its affiliates.
3+
#
4+
# This software is dual-licensed to you under the Universal Permissive License
5+
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
6+
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
7+
# either license.
8+
#
9+
# If you elect to accept the software under the Apache License, Version 2.0,
10+
# the following applies:
11+
#
12+
# Licensed under the Apache License, Version 2.0 (the "License");
13+
# you may not use this file except in compliance with the License.
14+
# You may obtain a copy of the License at
15+
#
16+
# https://www.apache.org/licenses/LICENSE-2.0
17+
#
18+
# Unless required by applicable law or agreed to in writing, software
19+
# distributed under the License is distributed on an "AS IS" BASIS,
20+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21+
# See the License for the specific language governing permissions and
22+
# limitations under the License.
23+
# -----------------------------------------------------------------------------
24+
25+
# -----------------------------------------------------------------------------
26+
# dataframe_pandas.py
27+
#
28+
# Shows how to use connection.fetch_df_all() and connection.fetch_df_batches()
29+
# to create Pandas dataframes.
30+
# -----------------------------------------------------------------------------
31+
32+
import pandas
33+
import pyarrow
34+
35+
import oracledb
36+
import sample_env
37+
38+
# determine whether to use python-oracledb thin mode or thick mode
39+
if not sample_env.get_is_thin():
40+
oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
41+
42+
connection = oracledb.connect(
43+
user=sample_env.get_main_user(),
44+
password=sample_env.get_main_password(),
45+
dsn=sample_env.get_connect_string(),
46+
params=sample_env.get_connect_params(),
47+
)
48+
49+
SQL = "select id, name from SampleQueryTab order by id"
50+
51+
# -----------------------------------------------------------------------------
52+
#
53+
# Fetching all records
54+
55+
# Get an OracleDataFrame.
56+
# Adjust arraysize to tune the query fetch performance
57+
odf = connection.fetch_df_all(statement=SQL, arraysize=100)
58+
59+
# Get a Pandas DataFrame from the data
60+
df = pyarrow.Table.from_arrays(
61+
odf.column_arrays(), names=odf.column_names()
62+
).to_pandas()
63+
64+
# Perform various Pandas operations on the DataFrame
65+
66+
print("Columns:")
67+
print(df.columns)
68+
69+
print("\nDataframe description:")
70+
print(df.describe())
71+
72+
print("\nLast three rows:")
73+
print(df.tail(3))
74+
75+
print("\nTransform:")
76+
print(df.T)
77+
78+
# -----------------------------------------------------------------------------
79+
#
80+
# Batch record fetching
81+
#
82+
# Note that since this particular example ends up with all query rows being
83+
# held in memory, it would be more efficient to use fetch_df_all() as shown
84+
# above.
85+
86+
print("\nFetching in batches:")
87+
df = pandas.DataFrame()
88+
89+
# Tune 'size' for your data set. Here it is small to show the batch fetch
90+
# behavior on the sample table.
91+
for odf in connection.fetch_df_batches(statement=SQL, size=10):
92+
df_b = pyarrow.Table.from_arrays(
93+
odf.column_arrays(), names=odf.column_names()
94+
).to_pandas()
95+
print(f"Appending {df_b.shape[0]} rows")
96+
df = pandas.concat([df, df_b], ignore_index=True)
97+
98+
r, c = df.shape
99+
print(f"{r} rows, {c} columns")
100+
101+
print("\nLast three rows:")
102+
print(df.tail(3))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
# -----------------------------------------------------------------------------
2+
# Copyright (c) 2025, Oracle and/or its affiliates.
3+
#
4+
# This software is dual-licensed to you under the Universal Permissive License
5+
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
6+
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
7+
# either license.
8+
#
9+
# If you elect to accept the software under the Apache License, Version 2.0,
10+
# the following applies:
11+
#
12+
# Licensed under the Apache License, Version 2.0 (the "License");
13+
# you may not use this file except in compliance with the License.
14+
# You may obtain a copy of the License at
15+
#
16+
# https://www.apache.org/licenses/LICENSE-2.0
17+
#
18+
# Unless required by applicable law or agreed to in writing, software
19+
# distributed under the License is distributed on an "AS IS" BASIS,
20+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21+
# See the License for the specific language governing permissions and
22+
# limitations under the License.
23+
# -----------------------------------------------------------------------------
24+
25+
# -----------------------------------------------------------------------------
26+
# dataframe_pandas_async.py
27+
#
28+
# An asynchronous version of dataframe_pandas.py
29+
#
30+
# Shows how to use AsyncConnection.fetch_df_all() and
31+
# AsyncConnection.fetch_df_batches(). This example then creates Pandas
32+
# dataframes. Alternative dataframe libraries could be used similar to the
33+
# other, synchronous, data frame samples.
34+
# -----------------------------------------------------------------------------
35+
36+
import asyncio
37+
38+
import pandas
39+
import pyarrow
40+
41+
import oracledb
42+
import sample_env
43+
44+
45+
async def main():
46+
connection = await oracledb.connect_async(
47+
user=sample_env.get_main_user(),
48+
password=sample_env.get_main_password(),
49+
dsn=sample_env.get_connect_string(),
50+
params=sample_env.get_connect_params(),
51+
)
52+
53+
SQL = "select id, name from SampleQueryTab order by id"
54+
55+
# -------------------------------------------------------------------------
56+
#
57+
# Fetching all records
58+
59+
# Get an OracleDataFrame.
60+
# Adjust arraysize to tune the query fetch performance
61+
odf = await connection.fetch_df_all(statement=SQL, arraysize=100)
62+
63+
# Get a Pandas DataFrame from the data
64+
df = pyarrow.Table.from_arrays(
65+
odf.column_arrays(), names=odf.column_names()
66+
).to_pandas()
67+
68+
# Perform various Pandas operations on the DataFrame
69+
70+
print("Columns:")
71+
print(df.columns)
72+
73+
print("\nDataframe description:")
74+
print(df.describe())
75+
76+
print("\nLast three rows:")
77+
print(df.tail(3))
78+
79+
print("\nTransform:")
80+
print(df.T)
81+
82+
# -------------------------------------------------------------------------
83+
#
84+
# Batch record fetching
85+
#
86+
# Note that since this particular example ends up with all query rows being
87+
# held in memory, it would be more efficient to use fetch_df_all() as shown
88+
# above.
89+
90+
print("\nFetching in batches:")
91+
df = pandas.DataFrame()
92+
93+
# Tune 'size' for your data set. Here it is small to show the batch fetch
94+
# behavior on the sample table.
95+
async for odf in connection.fetch_df_batches(statement=SQL, size=10):
96+
df_b = pyarrow.Table.from_arrays(
97+
odf.column_arrays(), names=odf.column_names()
98+
).to_pandas()
99+
print(f"Appending {df_b.shape[0]} rows")
100+
df = pandas.concat([df, df_b], ignore_index=True)
101+
102+
r, c = df.shape
103+
print(f"{r} rows, {c} columns")
104+
105+
print("\nLast three rows:")
106+
print(df.tail(3))
107+
108+
109+
asyncio.run(main())
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# -----------------------------------------------------------------------------
2+
# Copyright (c) 2025, Oracle and/or its affiliates.
3+
#
4+
# This software is dual-licensed to you under the Universal Permissive License
5+
# (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl and Apache License
6+
# 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose
7+
# either license.
8+
#
9+
# If you elect to accept the software under the Apache License, Version 2.0,
10+
# the following applies:
11+
#
12+
# Licensed under the Apache License, Version 2.0 (the "License");
13+
# you may not use this file except in compliance with the License.
14+
# You may obtain a copy of the License at
15+
#
16+
# https://www.apache.org/licenses/LICENSE-2.0
17+
#
18+
# Unless required by applicable law or agreed to in writing, software
19+
# distributed under the License is distributed on an "AS IS" BASIS,
20+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21+
# See the License for the specific language governing permissions and
22+
# limitations under the License.
23+
# -----------------------------------------------------------------------------
24+
25+
# -----------------------------------------------------------------------------
26+
# dataframe_parquet_write.py
27+
#
28+
# Shows how to use connection.fetch_df_batches() to write files in Parquet
29+
# format.
30+
# -----------------------------------------------------------------------------
31+
32+
import os
33+
34+
import pyarrow
35+
import pyarrow.parquet as pq
36+
37+
import oracledb
38+
import sample_env
39+
40+
# determine whether to use python-oracledb thin mode or thick mode
41+
if not sample_env.get_is_thin():
42+
oracledb.init_oracle_client(lib_dir=sample_env.get_oracle_client())
43+
44+
connection = oracledb.connect(
45+
user=sample_env.get_main_user(),
46+
password=sample_env.get_main_password(),
47+
dsn=sample_env.get_connect_string(),
48+
params=sample_env.get_connect_params(),
49+
)
50+
51+
PARQUET_FILE_NAME = "sample.parquet"
52+
53+
if os.path.isfile(PARQUET_FILE_NAME):
54+
os.remove(PARQUET_FILE_NAME)
55+
56+
# Tune this for your query
57+
FETCH_BATCH_SIZE = 10
58+
59+
SQL = "select id, name from SampleQueryTab order by id"
60+
pqwriter = None
61+
62+
for odf in connection.fetch_df_batches(statement=SQL, size=FETCH_BATCH_SIZE):
63+
64+
pyarrow_table = pyarrow.Table.from_arrays(
65+
arrays=odf.column_arrays(), names=odf.column_names()
66+
)
67+
68+
if not pqwriter:
69+
pqwriter = pq.ParquetWriter(PARQUET_FILE_NAME, pyarrow_table.schema)
70+
71+
print(f"Writing a batch of {odf.num_rows()} rows")
72+
pqwriter.write_table(pyarrow_table)
73+
74+
pqwriter.close()
75+
76+
# -----------------------------------------------------------------------------
77+
# Check the file was created
78+
79+
print("\nParquet file metadata:")
80+
print(pq.read_metadata(PARQUET_FILE_NAME))
81+
82+
# -----------------------------------------------------------------------------
83+
# Read the file
84+
85+
print("\nParquet file data:")
86+
t = pq.read_table(PARQUET_FILE_NAME, columns=["ID", "NAME"])
87+
print(t)

0 commit comments

Comments
 (0)