|
| 1 | +# Copyright 2021 The PyBigQuery Authors |
| 2 | +# |
| 3 | +# Use of this source code is governed by an MIT-style |
| 4 | +# license that can be found in the LICENSE file or at |
| 5 | +# https://opensource.org/licenses/MIT. |
| 6 | + |
| 7 | +import pathlib |
| 8 | + |
| 9 | +import pytest |
| 10 | +from google.cloud import bigquery |
| 11 | + |
| 12 | +from typing import List |
| 13 | + |
| 14 | + |
| 15 | +DATA_DIR = pathlib.Path(__file__).parent / "data" |
| 16 | + |
| 17 | + |
| 18 | +def load_sample_data( |
| 19 | + full_table_id: str, |
| 20 | + bigquery_client: bigquery.Client, |
| 21 | + bigquery_schema: List[bigquery.SchemaField], |
| 22 | + filename: str = "sample.json", |
| 23 | +): |
| 24 | + # Delete the table first. Even though we can use WRITE_TRUNCATE, the load |
| 25 | + # job fails if properties such as table description do not match. |
| 26 | + bigquery_client.delete_table(full_table_id, not_found_ok=True) |
| 27 | + sample_config = bigquery.LoadJobConfig() |
| 28 | + sample_config.destination_table_description = ( |
| 29 | + "A sample table containing most data types." |
| 30 | + ) |
| 31 | + sample_config.schema = bigquery_schema |
| 32 | + sample_config.time_partitioning = bigquery.TimePartitioning(field="timestamp") |
| 33 | + sample_config.clustering_fields = ["integer", "string"] |
| 34 | + sample_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON |
| 35 | + sample_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE |
| 36 | + |
| 37 | + with open(DATA_DIR / filename, "rb") as data_file: |
| 38 | + return bigquery_client.load_table_from_file( |
| 39 | + data_file, full_table_id, job_config=sample_config, |
| 40 | + ) |
| 41 | + |
| 42 | + |
| 43 | +@pytest.fixture(scope="session") |
| 44 | +def bigquery_client(): |
| 45 | + return bigquery.Client() |
| 46 | + |
| 47 | + |
| 48 | +@pytest.fixture(scope="session") |
| 49 | +def bigquery_schema(bigquery_client: bigquery.Client): |
| 50 | + return bigquery_client.schema_from_json(DATA_DIR / "schema.json") |
| 51 | + |
| 52 | + |
| 53 | +@pytest.fixture(scope="session", autouse=True) |
| 54 | +def bigquery_dataset( |
| 55 | + bigquery_client: bigquery.Client, bigquery_schema: List[bigquery.SchemaField] |
| 56 | +): |
| 57 | + project_id = bigquery_client.project |
| 58 | + dataset_id = "test_pybigquery" |
| 59 | + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") |
| 60 | + dataset = bigquery_client.create_dataset(dataset, exists_ok=True) |
| 61 | + empty_table = bigquery.Table( |
| 62 | + f"{project_id}.{dataset_id}.sample_dml", schema=bigquery_schema |
| 63 | + ) |
| 64 | + view = bigquery.Table(f"{project_id}.{dataset_id}.sample_view",) |
| 65 | + view.view_query = f"SELECT string FROM `{dataset_id}.sample`" |
| 66 | + job1 = load_sample_data( |
| 67 | + f"{project_id}.{dataset_id}.sample", bigquery_client, bigquery_schema |
| 68 | + ) |
| 69 | + job2 = load_sample_data( |
| 70 | + f"{project_id}.{dataset_id}.sample_one_row", |
| 71 | + bigquery_client, |
| 72 | + bigquery_schema, |
| 73 | + filename="sample_one_row.json", |
| 74 | + ) |
| 75 | + bigquery_client.create_table(empty_table, exists_ok=True) |
| 76 | + job1.result() |
| 77 | + job2.result() |
| 78 | + bigquery_client.create_table(view, exists_ok=True) |
| 79 | + return dataset_id |
| 80 | + |
| 81 | + |
| 82 | +@pytest.fixture(scope="session", autouse=True) |
| 83 | +def bigquery_alt_dataset( |
| 84 | + bigquery_client: bigquery.Client, bigquery_schema: List[bigquery.SchemaField] |
| 85 | +): |
| 86 | + project_id = bigquery_client.project |
| 87 | + dataset_id = "test_pybigquery_alt" |
| 88 | + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") |
| 89 | + dataset = bigquery_client.create_dataset(dataset, exists_ok=True) |
| 90 | + job = load_sample_data( |
| 91 | + f"{project_id}.{dataset_id}.sample_alt", bigquery_client, bigquery_schema |
| 92 | + ) |
| 93 | + job.result() |
| 94 | + return dataset_id |
| 95 | + |
| 96 | + |
| 97 | +@pytest.fixture(scope="session", autouse=True) |
| 98 | +def bigquery_regional_dataset(bigquery_client, bigquery_schema): |
| 99 | + project_id = bigquery_client.project |
| 100 | + dataset_id = "test_pybigquery_location" |
| 101 | + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") |
| 102 | + dataset.location = "asia-northeast1" |
| 103 | + dataset = bigquery_client.create_dataset(dataset, exists_ok=True) |
| 104 | + job = load_sample_data( |
| 105 | + f"{project_id}.{dataset_id}.sample_one_row", |
| 106 | + bigquery_client, |
| 107 | + bigquery_schema, |
| 108 | + filename="sample_one_row.json", |
| 109 | + ) |
| 110 | + job.result() |
| 111 | + return dataset_id |
0 commit comments