系統會使用預設設定來設定其餘叢集。您可以覆寫預設叢集設定。舉例來說,您可以新增次要 VM (預設值 = 0),或為叢集指定非預設的 VPC 網路。詳情請參閱 CreateCluster。
defquickstart(project_id,region,cluster_name,gcs_bucket,pyspark_file):# Create the cluster client.cluster_client=dataproc_v1.ClusterControllerClient(client_options={"api_endpoint":f"{region}-dataproc.googleapis.com:443"})# Create the cluster config.cluster={"project_id":project_id,"cluster_name":cluster_name,"config":{"master_config":{"num_instances":1,"machine_type_uri":"n1-standard-2"},"worker_config":{"num_instances":2,"machine_type_uri":"n1-standard-2"},},}# Create the cluster.operation=cluster_client.create_cluster(request={"project_id":project_id,"region":region,"cluster":cluster})result=operation.result()print(f"Cluster created successfully: {result.cluster_name}")
# Create the job client.job_client=dataproc_v1.JobControllerClient(client_options={"api_endpoint":f"{region}-dataproc.googleapis.com:443"})# Create the job config.job={"placement":{"cluster_name":cluster_name},"pyspark_job":{"main_python_file_uri":f"gs://{gcs_bucket}/{spark_filename}"},}operation=job_client.submit_job_as_operation(request={"project_id":project_id,"region":region,"job":job})response=operation.result()# Dataproc job output is saved to the Cloud Storage bucket# allocated to the job. Use regex to obtain the bucket and blob info.matches=re.match("gs://(.*?)/(.*)",response.driver_output_resource_uri)output=(storage.Client().get_bucket(matches.group(1)).blob(f"{matches.group(2)}.000000000").download_as_bytes().decode("utf-8"))print(f"Job finished successfully: {output}\r\n")
# Delete the cluster once the job has terminated.operation=cluster_client.delete_cluster(request={"project_id":project_id,"region":region,"cluster_name":cluster_name,})operation.result()print(f"Cluster {cluster_name} successfully deleted.")
[[["容易理解","easyToUnderstand","thumb-up"],["確實解決了我的問題","solvedMyProblem","thumb-up"],["其他","otherUp","thumb-up"]],[["難以理解","hardToUnderstand","thumb-down"],["資訊或程式碼範例有誤","incorrectInformationOrSampleCode","thumb-down"],["缺少我需要的資訊/範例","missingTheInformationSamplesINeed","thumb-down"],["翻譯問題","translationIssue","thumb-down"],["其他","otherDown","thumb-down"]],["上次更新時間:2025-06-12 (世界標準時間)。"],[[["This tutorial guides users through a Cloud Shell walkthrough to interact with Dataproc gRPC APIs using Google Cloud client libraries for Python."],["The walkthrough code demonstrates how to programmatically create a Dataproc cluster, submit a job to the cluster, and then delete the cluster."],["The tutorial details the required values to set when creating a cluster, such as project ID, region, cluster name, and cluster configuration, allowing for default setting overides."],["The tutorial also describes the necessary values to submit a job, including project ID, region, cluster name, and the Cloud Storage filepath of the PySpark job."],["Users can utilize an inline workflow to perform all actions with one API request, rather than making separate requests, as shown in the provided example."]]],[]]