268 lines
6.2 KiB
Python
268 lines
6.2 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import pytest
|
|
from pyarrow import Codec
|
|
from pyarrow import fs
|
|
|
|
groups = [
|
|
'brotli',
|
|
'bz2',
|
|
'cython',
|
|
'dataset',
|
|
'hypothesis',
|
|
'fastparquet',
|
|
'gandiva',
|
|
'gcs',
|
|
'gdb',
|
|
'gzip',
|
|
'hdfs',
|
|
'large_memory',
|
|
'lz4',
|
|
'memory_leak',
|
|
'nopandas',
|
|
'orc',
|
|
'pandas',
|
|
'parquet',
|
|
'parquet_encryption',
|
|
'plasma',
|
|
's3',
|
|
'snappy',
|
|
'substrait',
|
|
'tensorflow',
|
|
'flight',
|
|
'slow',
|
|
'requires_testing_data',
|
|
'zstd',
|
|
]
|
|
|
|
defaults = {
|
|
'brotli': Codec.is_available('brotli'),
|
|
'bz2': Codec.is_available('bz2'),
|
|
'cython': False,
|
|
'dataset': False,
|
|
'fastparquet': False,
|
|
'flight': False,
|
|
'gandiva': False,
|
|
'gcs': False,
|
|
'gdb': True,
|
|
'gzip': Codec.is_available('gzip'),
|
|
'hdfs': False,
|
|
'hypothesis': False,
|
|
'large_memory': False,
|
|
'lz4': Codec.is_available('lz4'),
|
|
'memory_leak': False,
|
|
'nopandas': False,
|
|
'orc': False,
|
|
'pandas': False,
|
|
'parquet': False,
|
|
'parquet_encryption': False,
|
|
'plasma': False,
|
|
'requires_testing_data': True,
|
|
's3': False,
|
|
'slow': False,
|
|
'snappy': Codec.is_available('snappy'),
|
|
'substrait': False,
|
|
'tensorflow': False,
|
|
'zstd': Codec.is_available('zstd'),
|
|
}
|
|
|
|
try:
|
|
import cython # noqa
|
|
defaults['cython'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import fastparquet # noqa
|
|
defaults['fastparquet'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.gandiva # noqa
|
|
defaults['gandiva'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.dataset # noqa
|
|
defaults['dataset'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.orc # noqa
|
|
defaults['orc'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pandas # noqa
|
|
defaults['pandas'] = True
|
|
except ImportError:
|
|
defaults['nopandas'] = True
|
|
|
|
try:
|
|
import pyarrow.parquet # noqa
|
|
defaults['parquet'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.parquet.encryption # noqa
|
|
defaults['parquet_encryption'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
try:
|
|
import pyarrow.plasma # noqa
|
|
defaults['plasma'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import tensorflow # noqa
|
|
defaults['tensorflow'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.flight # noqa
|
|
defaults['flight'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
from pyarrow.fs import GcsFileSystem # noqa
|
|
defaults['gcs'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
try:
|
|
from pyarrow.fs import S3FileSystem # noqa
|
|
defaults['s3'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
from pyarrow.fs import HadoopFileSystem # noqa
|
|
defaults['hdfs'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import pyarrow.substrait # noqa
|
|
defaults['substrait'] = True
|
|
except ImportError:
|
|
pass
|
|
|
|
|
|
# Doctest should ignore files for the modules that are not built
|
|
def pytest_ignore_collect(path, config):
|
|
if config.option.doctestmodules:
|
|
# don't try to run doctests on the /tests directory
|
|
if "/pyarrow/tests/" in str(path):
|
|
return True
|
|
|
|
doctest_groups = [
|
|
'dataset',
|
|
'orc',
|
|
'parquet',
|
|
'plasma',
|
|
'flight',
|
|
'substrait',
|
|
]
|
|
|
|
# handle cuda, flight, etc
|
|
for group in doctest_groups:
|
|
if 'pyarrow/{}'.format(group) in str(path):
|
|
if not defaults[group]:
|
|
return True
|
|
|
|
if 'pyarrow/parquet/encryption' in str(path):
|
|
if not defaults['parquet_encryption']:
|
|
return True
|
|
|
|
if 'pyarrow/cuda' in str(path):
|
|
try:
|
|
import pyarrow.cuda # noqa
|
|
return False
|
|
except ImportError:
|
|
return True
|
|
|
|
if 'pyarrow/fs' in str(path):
|
|
try:
|
|
from pyarrow.fs import S3FileSystem # noqa
|
|
return False
|
|
except ImportError:
|
|
return True
|
|
|
|
if getattr(config.option, "doctest_cython", False):
|
|
if "/pyarrow/tests/" in str(path):
|
|
return True
|
|
if "/pyarrow/_parquet_encryption" in str(path):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
# Save output files from doctest examples into temp dir
|
|
@pytest.fixture(autouse=True)
|
|
def _docdir(request):
|
|
|
|
# Trigger ONLY for the doctests
|
|
doctest_m = request.config.option.doctestmodules
|
|
doctest_c = getattr(request.config.option, "doctest_cython", False)
|
|
|
|
if doctest_m or doctest_c:
|
|
|
|
# Get the fixture dynamically by its name.
|
|
tmpdir = request.getfixturevalue('tmpdir')
|
|
|
|
# Chdir only for the duration of the test.
|
|
with tmpdir.as_cwd():
|
|
yield
|
|
|
|
else:
|
|
yield
|
|
|
|
|
|
# Define doctest_namespace for fs module docstring import
|
|
@pytest.fixture(autouse=True)
|
|
def add_fs(doctest_namespace, request, tmp_path):
|
|
|
|
# Trigger ONLY for the doctests
|
|
doctest_m = request.config.option.doctestmodules
|
|
doctest_c = getattr(request.config.option, "doctest_cython", False)
|
|
|
|
if doctest_m or doctest_c:
|
|
# fs import
|
|
doctest_namespace["fs"] = fs
|
|
|
|
# Creation of an object and file with data
|
|
local = fs.LocalFileSystem()
|
|
path = tmp_path / 'pyarrow-fs-example.dat'
|
|
with local.open_output_stream(str(path)) as stream:
|
|
stream.write(b'data')
|
|
doctest_namespace["local"] = local
|
|
doctest_namespace["local_path"] = str(tmp_path)
|
|
doctest_namespace["path"] = str(path)
|
|
yield
|