Merging PR_218 openai_rev package with new streamlit chat app
This commit is contained in:
792
venv/lib/python3.9/site-packages/pyarrow/tests/test_cuda.py
Normal file
792
venv/lib/python3.9/site-packages/pyarrow/tests/test_cuda.py
Normal file
@@ -0,0 +1,792 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
UNTESTED:
|
||||
read_message
|
||||
"""
|
||||
|
||||
import sys
|
||||
import sysconfig
|
||||
|
||||
import pytest
|
||||
|
||||
import pyarrow as pa
|
||||
import numpy as np
|
||||
|
||||
|
||||
cuda = pytest.importorskip("pyarrow.cuda")
|
||||
|
||||
platform = sysconfig.get_platform()
|
||||
# TODO: enable ppc64 when Arrow C++ supports IPC in ppc64 systems:
|
||||
has_ipc_support = platform == 'linux-x86_64' # or 'ppc64' in platform
|
||||
|
||||
cuda_ipc = pytest.mark.skipif(
|
||||
not has_ipc_support,
|
||||
reason='CUDA IPC not supported in platform `%s`' % (platform))
|
||||
|
||||
global_context = None # for flake8
|
||||
global_context1 = None # for flake8
|
||||
|
||||
|
||||
def setup_module(module):
|
||||
module.global_context = cuda.Context(0)
|
||||
module.global_context1 = cuda.Context(cuda.Context.get_num_devices() - 1)
|
||||
|
||||
|
||||
def teardown_module(module):
|
||||
del module.global_context
|
||||
|
||||
|
||||
def test_Context():
|
||||
assert cuda.Context.get_num_devices() > 0
|
||||
assert global_context.device_number == 0
|
||||
assert global_context1.device_number == cuda.Context.get_num_devices() - 1
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match=("device_number argument must "
|
||||
"be non-negative less than")):
|
||||
cuda.Context(cuda.Context.get_num_devices())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_manage_allocate_free_host(size):
|
||||
buf = cuda.new_host_buffer(size)
|
||||
arr = np.frombuffer(buf, dtype=np.uint8)
|
||||
arr[size//4:3*size//4] = 1
|
||||
arr_cp = arr.copy()
|
||||
arr2 = np.frombuffer(buf, dtype=np.uint8)
|
||||
np.testing.assert_equal(arr2, arr_cp)
|
||||
assert buf.size == size
|
||||
|
||||
|
||||
def test_context_allocate_del():
|
||||
bytes_allocated = global_context.bytes_allocated
|
||||
cudabuf = global_context.new_buffer(128)
|
||||
assert global_context.bytes_allocated == bytes_allocated + 128
|
||||
del cudabuf
|
||||
assert global_context.bytes_allocated == bytes_allocated
|
||||
|
||||
|
||||
def make_random_buffer(size, target='host'):
|
||||
"""Return a host or device buffer with random data.
|
||||
"""
|
||||
if target == 'host':
|
||||
assert size >= 0
|
||||
buf = pa.allocate_buffer(size)
|
||||
assert buf.size == size
|
||||
arr = np.frombuffer(buf, dtype=np.uint8)
|
||||
assert arr.size == size
|
||||
arr[:] = np.random.randint(low=1, high=255, size=size, dtype=np.uint8)
|
||||
assert arr.sum() > 0 or size == 0
|
||||
arr_ = np.frombuffer(buf, dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr_)
|
||||
return arr, buf
|
||||
elif target == 'device':
|
||||
arr, buf = make_random_buffer(size, target='host')
|
||||
dbuf = global_context.new_buffer(size)
|
||||
assert dbuf.size == size
|
||||
dbuf.copy_from_host(buf, position=0, nbytes=size)
|
||||
return arr, dbuf
|
||||
raise ValueError('invalid target value')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_context_device_buffer(size):
|
||||
# Creating device buffer from host buffer;
|
||||
arr, buf = make_random_buffer(size)
|
||||
cudabuf = global_context.buffer_from_data(buf)
|
||||
assert cudabuf.size == size
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
# CudaBuffer does not support buffer protocol
|
||||
with pytest.raises(BufferError):
|
||||
memoryview(cudabuf)
|
||||
|
||||
# Creating device buffer from array:
|
||||
cudabuf = global_context.buffer_from_data(arr)
|
||||
assert cudabuf.size == size
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
# Creating device buffer from bytes:
|
||||
cudabuf = global_context.buffer_from_data(arr.tobytes())
|
||||
assert cudabuf.size == size
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
# Creating a device buffer from another device buffer, view:
|
||||
cudabuf2 = cudabuf.slice(0, cudabuf.size)
|
||||
assert cudabuf2.size == size
|
||||
arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
if size > 1:
|
||||
cudabuf2.copy_from_host(arr[size//2:])
|
||||
arr3 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(np.concatenate((arr[size//2:], arr[size//2:])),
|
||||
arr3)
|
||||
cudabuf2.copy_from_host(arr[:size//2]) # restoring arr
|
||||
|
||||
# Creating a device buffer from another device buffer, copy:
|
||||
cudabuf2 = global_context.buffer_from_data(cudabuf)
|
||||
assert cudabuf2.size == size
|
||||
arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
cudabuf2.copy_from_host(arr[size//2:])
|
||||
arr3 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr3)
|
||||
|
||||
# Slice of a device buffer
|
||||
cudabuf2 = cudabuf.slice(0, cudabuf.size+10)
|
||||
assert cudabuf2.size == size
|
||||
arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
cudabuf2 = cudabuf.slice(size//4, size+10)
|
||||
assert cudabuf2.size == size - size//4
|
||||
arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[size//4:], arr2)
|
||||
|
||||
# Creating a device buffer from a slice of host buffer
|
||||
soffset = size//4
|
||||
ssize = 2*size//4
|
||||
cudabuf = global_context.buffer_from_data(buf, offset=soffset,
|
||||
size=ssize)
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
|
||||
|
||||
cudabuf = global_context.buffer_from_data(buf.slice(offset=soffset,
|
||||
length=ssize))
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
|
||||
|
||||
# Creating a device buffer from a slice of an array
|
||||
cudabuf = global_context.buffer_from_data(arr, offset=soffset, size=ssize)
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
|
||||
|
||||
cudabuf = global_context.buffer_from_data(arr[soffset:soffset+ssize])
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
|
||||
|
||||
# Creating a device buffer from a slice of bytes
|
||||
cudabuf = global_context.buffer_from_data(arr.tobytes(),
|
||||
offset=soffset,
|
||||
size=ssize)
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset + ssize], arr2)
|
||||
|
||||
# Creating a device buffer from size
|
||||
cudabuf = global_context.new_buffer(size)
|
||||
assert cudabuf.size == size
|
||||
|
||||
# Creating device buffer from a slice of another device buffer:
|
||||
cudabuf = global_context.buffer_from_data(arr)
|
||||
cudabuf2 = cudabuf.slice(soffset, ssize)
|
||||
assert cudabuf2.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset+ssize], arr2)
|
||||
|
||||
# Creating device buffer from HostBuffer
|
||||
|
||||
buf = cuda.new_host_buffer(size)
|
||||
arr_ = np.frombuffer(buf, dtype=np.uint8)
|
||||
arr_[:] = arr
|
||||
cudabuf = global_context.buffer_from_data(buf)
|
||||
assert cudabuf.size == size
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
# Creating device buffer from HostBuffer slice
|
||||
|
||||
cudabuf = global_context.buffer_from_data(buf, offset=soffset, size=ssize)
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset+ssize], arr2)
|
||||
|
||||
cudabuf = global_context.buffer_from_data(
|
||||
buf.slice(offset=soffset, length=ssize))
|
||||
assert cudabuf.size == ssize
|
||||
arr2 = np.frombuffer(cudabuf.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr[soffset:soffset+ssize], arr2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_context_from_object(size):
|
||||
ctx = global_context
|
||||
arr, cbuf = make_random_buffer(size, target='device')
|
||||
dtype = arr.dtype
|
||||
|
||||
# Creating device buffer from a CUDA host buffer
|
||||
hbuf = cuda.new_host_buffer(size * arr.dtype.itemsize)
|
||||
np.frombuffer(hbuf, dtype=dtype)[:] = arr
|
||||
cbuf2 = ctx.buffer_from_object(hbuf)
|
||||
assert cbuf2.size == cbuf.size
|
||||
arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
# Creating device buffer from a device buffer
|
||||
cbuf2 = ctx.buffer_from_object(cbuf2)
|
||||
assert cbuf2.size == cbuf.size
|
||||
arr2 = np.frombuffer(cbuf2.copy_to_host(), dtype=dtype)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
# Trying to create a device buffer from a Buffer
|
||||
with pytest.raises(pa.ArrowTypeError,
|
||||
match=('buffer is not backed by a CudaBuffer')):
|
||||
ctx.buffer_from_object(pa.py_buffer(b"123"))
|
||||
|
||||
# Trying to create a device buffer from numpy.array
|
||||
with pytest.raises(pa.ArrowTypeError,
|
||||
match=("cannot create device buffer view from "
|
||||
".* \'numpy.ndarray\'")):
|
||||
ctx.buffer_from_object(np.array([1, 2, 3]))
|
||||
|
||||
|
||||
def test_foreign_buffer():
|
||||
ctx = global_context
|
||||
dtype = np.dtype(np.uint8)
|
||||
size = 10
|
||||
hbuf = cuda.new_host_buffer(size * dtype.itemsize)
|
||||
|
||||
# test host buffer memory reference counting
|
||||
rc = sys.getrefcount(hbuf)
|
||||
fbuf = ctx.foreign_buffer(hbuf.address, hbuf.size, hbuf)
|
||||
assert sys.getrefcount(hbuf) == rc + 1
|
||||
del fbuf
|
||||
assert sys.getrefcount(hbuf) == rc
|
||||
|
||||
# test postponed deallocation of host buffer memory
|
||||
fbuf = ctx.foreign_buffer(hbuf.address, hbuf.size, hbuf)
|
||||
del hbuf
|
||||
fbuf.copy_to_host()
|
||||
|
||||
# test deallocating the host buffer memory making it inaccessible
|
||||
hbuf = cuda.new_host_buffer(size * dtype.itemsize)
|
||||
fbuf = ctx.foreign_buffer(hbuf.address, hbuf.size)
|
||||
del hbuf
|
||||
with pytest.raises(pa.ArrowIOError,
|
||||
match=('Cuda error ')):
|
||||
fbuf.copy_to_host()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_CudaBuffer(size):
|
||||
arr, buf = make_random_buffer(size)
|
||||
assert arr.tobytes() == buf.to_pybytes()
|
||||
cbuf = global_context.buffer_from_data(buf)
|
||||
assert cbuf.size == size
|
||||
assert not cbuf.is_cpu
|
||||
assert arr.tobytes() == cbuf.to_pybytes()
|
||||
if size > 0:
|
||||
assert cbuf.address > 0
|
||||
|
||||
for i in range(size):
|
||||
assert cbuf[i] == arr[i]
|
||||
|
||||
for s in [
|
||||
slice(None),
|
||||
slice(size//4, size//2),
|
||||
]:
|
||||
assert cbuf[s].to_pybytes() == arr[s].tobytes()
|
||||
|
||||
sbuf = cbuf.slice(size//4, size//2)
|
||||
assert sbuf.parent == cbuf
|
||||
|
||||
with pytest.raises(TypeError,
|
||||
match="Do not call CudaBuffer's constructor directly"):
|
||||
cuda.CudaBuffer()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_HostBuffer(size):
|
||||
arr, buf = make_random_buffer(size)
|
||||
assert arr.tobytes() == buf.to_pybytes()
|
||||
hbuf = cuda.new_host_buffer(size)
|
||||
np.frombuffer(hbuf, dtype=np.uint8)[:] = arr
|
||||
assert hbuf.size == size
|
||||
assert hbuf.is_cpu
|
||||
assert arr.tobytes() == hbuf.to_pybytes()
|
||||
for i in range(size):
|
||||
assert hbuf[i] == arr[i]
|
||||
for s in [
|
||||
slice(None),
|
||||
slice(size//4, size//2),
|
||||
]:
|
||||
assert hbuf[s].to_pybytes() == arr[s].tobytes()
|
||||
|
||||
sbuf = hbuf.slice(size//4, size//2)
|
||||
assert sbuf.parent == hbuf
|
||||
|
||||
del hbuf
|
||||
|
||||
with pytest.raises(TypeError,
|
||||
match="Do not call HostBuffer's constructor directly"):
|
||||
cuda.HostBuffer()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_copy_from_to_host(size):
|
||||
|
||||
# Create a buffer in host containing range(size)
|
||||
buf = pa.allocate_buffer(size, resizable=True) # in host
|
||||
assert isinstance(buf, pa.Buffer)
|
||||
assert not isinstance(buf, cuda.CudaBuffer)
|
||||
arr = np.frombuffer(buf, dtype=np.uint8)
|
||||
assert arr.size == size
|
||||
arr[:] = range(size)
|
||||
arr_ = np.frombuffer(buf, dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr_)
|
||||
|
||||
device_buffer = global_context.new_buffer(size)
|
||||
assert isinstance(device_buffer, cuda.CudaBuffer)
|
||||
assert isinstance(device_buffer, pa.Buffer)
|
||||
assert device_buffer.size == size
|
||||
assert not device_buffer.is_cpu
|
||||
|
||||
device_buffer.copy_from_host(buf, position=0, nbytes=size)
|
||||
|
||||
buf2 = device_buffer.copy_to_host(position=0, nbytes=size)
|
||||
arr2 = np.frombuffer(buf2, dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_copy_to_host(size):
|
||||
arr, dbuf = make_random_buffer(size, target='device')
|
||||
|
||||
buf = dbuf.copy_to_host()
|
||||
assert buf.is_cpu
|
||||
np.testing.assert_equal(arr, np.frombuffer(buf, dtype=np.uint8))
|
||||
|
||||
buf = dbuf.copy_to_host(position=size//4)
|
||||
assert buf.is_cpu
|
||||
np.testing.assert_equal(arr[size//4:], np.frombuffer(buf, dtype=np.uint8))
|
||||
|
||||
buf = dbuf.copy_to_host(position=size//4, nbytes=size//8)
|
||||
assert buf.is_cpu
|
||||
np.testing.assert_equal(arr[size//4:size//4+size//8],
|
||||
np.frombuffer(buf, dtype=np.uint8))
|
||||
|
||||
buf = dbuf.copy_to_host(position=size//4, nbytes=0)
|
||||
assert buf.is_cpu
|
||||
assert buf.size == 0
|
||||
|
||||
for (position, nbytes) in [
|
||||
(size+2, -1), (-2, -1), (size+1, 0), (-3, 0),
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match='position argument is out-of-range'):
|
||||
dbuf.copy_to_host(position=position, nbytes=nbytes)
|
||||
|
||||
for (position, nbytes) in [
|
||||
(0, size+1), (size//2, (size+1)//2+1), (size, 1)
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match=('requested more to copy than'
|
||||
' available from device buffer')):
|
||||
dbuf.copy_to_host(position=position, nbytes=nbytes)
|
||||
|
||||
buf = pa.allocate_buffer(size//4)
|
||||
dbuf.copy_to_host(buf=buf)
|
||||
np.testing.assert_equal(arr[:size//4], np.frombuffer(buf, dtype=np.uint8))
|
||||
|
||||
if size < 12:
|
||||
return
|
||||
|
||||
dbuf.copy_to_host(buf=buf, position=12)
|
||||
np.testing.assert_equal(arr[12:12+size//4],
|
||||
np.frombuffer(buf, dtype=np.uint8))
|
||||
|
||||
dbuf.copy_to_host(buf=buf, nbytes=12)
|
||||
np.testing.assert_equal(arr[:12], np.frombuffer(buf, dtype=np.uint8)[:12])
|
||||
|
||||
dbuf.copy_to_host(buf=buf, nbytes=12, position=6)
|
||||
np.testing.assert_equal(arr[6:6+12],
|
||||
np.frombuffer(buf, dtype=np.uint8)[:12])
|
||||
|
||||
for (position, nbytes) in [
|
||||
(0, size+10), (10, size-5),
|
||||
(0, size//2), (size//4, size//4+1)
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match=('requested copy does not '
|
||||
'fit into host buffer')):
|
||||
dbuf.copy_to_host(buf=buf, position=position, nbytes=nbytes)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dest_ctx", ['same', 'another'])
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_copy_from_device(dest_ctx, size):
|
||||
arr, buf = make_random_buffer(size=size, target='device')
|
||||
lst = arr.tolist()
|
||||
if dest_ctx == 'another':
|
||||
dest_ctx = global_context1
|
||||
if buf.context.device_number == dest_ctx.device_number:
|
||||
pytest.skip("not a multi-GPU system")
|
||||
else:
|
||||
dest_ctx = buf.context
|
||||
dbuf = dest_ctx.new_buffer(size)
|
||||
|
||||
def put(*args, **kwargs):
|
||||
dbuf.copy_from_device(buf, *args, **kwargs)
|
||||
rbuf = dbuf.copy_to_host()
|
||||
return np.frombuffer(rbuf, dtype=np.uint8).tolist()
|
||||
assert put() == lst
|
||||
if size > 4:
|
||||
assert put(position=size//4) == lst[:size//4]+lst[:-size//4]
|
||||
assert put() == lst
|
||||
assert put(position=1, nbytes=size//2) == \
|
||||
lst[:1] + lst[:size//2] + lst[-(size-size//2-1):]
|
||||
|
||||
for (position, nbytes) in [
|
||||
(size+2, -1), (-2, -1), (size+1, 0), (-3, 0),
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match='position argument is out-of-range'):
|
||||
put(position=position, nbytes=nbytes)
|
||||
|
||||
for (position, nbytes) in [
|
||||
(0, size+1),
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match=('requested more to copy than'
|
||||
' available from device buffer')):
|
||||
put(position=position, nbytes=nbytes)
|
||||
|
||||
if size < 4:
|
||||
return
|
||||
|
||||
for (position, nbytes) in [
|
||||
(size//2, (size+1)//2+1)
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match=('requested more to copy than'
|
||||
' available in device buffer')):
|
||||
put(position=position, nbytes=nbytes)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_copy_from_host(size):
|
||||
arr, buf = make_random_buffer(size=size, target='host')
|
||||
lst = arr.tolist()
|
||||
dbuf = global_context.new_buffer(size)
|
||||
|
||||
def put(*args, **kwargs):
|
||||
dbuf.copy_from_host(buf, *args, **kwargs)
|
||||
rbuf = dbuf.copy_to_host()
|
||||
return np.frombuffer(rbuf, dtype=np.uint8).tolist()
|
||||
assert put() == lst
|
||||
if size > 4:
|
||||
assert put(position=size//4) == lst[:size//4]+lst[:-size//4]
|
||||
assert put() == lst
|
||||
assert put(position=1, nbytes=size//2) == \
|
||||
lst[:1] + lst[:size//2] + lst[-(size-size//2-1):]
|
||||
|
||||
for (position, nbytes) in [
|
||||
(size+2, -1), (-2, -1), (size+1, 0), (-3, 0),
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match='position argument is out-of-range'):
|
||||
put(position=position, nbytes=nbytes)
|
||||
|
||||
for (position, nbytes) in [
|
||||
(0, size+1),
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match=('requested more to copy than'
|
||||
' available from host buffer')):
|
||||
put(position=position, nbytes=nbytes)
|
||||
|
||||
if size < 4:
|
||||
return
|
||||
|
||||
for (position, nbytes) in [
|
||||
(size//2, (size+1)//2+1)
|
||||
]:
|
||||
with pytest.raises(ValueError,
|
||||
match=('requested more to copy than'
|
||||
' available in device buffer')):
|
||||
put(position=position, nbytes=nbytes)
|
||||
|
||||
|
||||
def test_BufferWriter():
|
||||
def allocate(size):
|
||||
cbuf = global_context.new_buffer(size)
|
||||
writer = cuda.BufferWriter(cbuf)
|
||||
return cbuf, writer
|
||||
|
||||
def test_writes(total_size, chunksize, buffer_size=0):
|
||||
cbuf, writer = allocate(total_size)
|
||||
arr, buf = make_random_buffer(size=total_size, target='host')
|
||||
|
||||
if buffer_size > 0:
|
||||
writer.buffer_size = buffer_size
|
||||
|
||||
position = writer.tell()
|
||||
assert position == 0
|
||||
writer.write(buf.slice(length=chunksize))
|
||||
assert writer.tell() == chunksize
|
||||
writer.seek(0)
|
||||
position = writer.tell()
|
||||
assert position == 0
|
||||
|
||||
while position < total_size:
|
||||
bytes_to_write = min(chunksize, total_size - position)
|
||||
writer.write(buf.slice(offset=position, length=bytes_to_write))
|
||||
position += bytes_to_write
|
||||
|
||||
writer.flush()
|
||||
assert cbuf.size == total_size
|
||||
cbuf.context.synchronize()
|
||||
buf2 = cbuf.copy_to_host()
|
||||
cbuf.context.synchronize()
|
||||
assert buf2.size == total_size
|
||||
arr2 = np.frombuffer(buf2, dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
total_size, chunk_size = 1 << 16, 1000
|
||||
test_writes(total_size, chunk_size)
|
||||
test_writes(total_size, chunk_size, total_size // 16)
|
||||
|
||||
cbuf, writer = allocate(100)
|
||||
writer.write(np.arange(100, dtype=np.uint8))
|
||||
writer.writeat(50, np.arange(25, dtype=np.uint8))
|
||||
writer.write(np.arange(25, dtype=np.uint8))
|
||||
writer.flush()
|
||||
|
||||
arr = np.frombuffer(cbuf.copy_to_host(), np.uint8)
|
||||
np.testing.assert_equal(arr[:50], np.arange(50, dtype=np.uint8))
|
||||
np.testing.assert_equal(arr[50:75], np.arange(25, dtype=np.uint8))
|
||||
np.testing.assert_equal(arr[75:], np.arange(25, dtype=np.uint8))
|
||||
|
||||
|
||||
def test_BufferWriter_edge_cases():
|
||||
# edge cases, see cuda-test.cc for more information:
|
||||
size = 1000
|
||||
cbuf = global_context.new_buffer(size)
|
||||
writer = cuda.BufferWriter(cbuf)
|
||||
arr, buf = make_random_buffer(size=size, target='host')
|
||||
|
||||
assert writer.buffer_size == 0
|
||||
writer.buffer_size = 100
|
||||
assert writer.buffer_size == 100
|
||||
|
||||
writer.write(buf.slice(length=0))
|
||||
assert writer.tell() == 0
|
||||
|
||||
writer.write(buf.slice(length=10))
|
||||
writer.buffer_size = 200
|
||||
assert writer.buffer_size == 200
|
||||
assert writer.num_bytes_buffered == 0
|
||||
|
||||
writer.write(buf.slice(offset=10, length=300))
|
||||
assert writer.num_bytes_buffered == 0
|
||||
|
||||
writer.write(buf.slice(offset=310, length=200))
|
||||
assert writer.num_bytes_buffered == 0
|
||||
|
||||
writer.write(buf.slice(offset=510, length=390))
|
||||
writer.write(buf.slice(offset=900, length=100))
|
||||
|
||||
writer.flush()
|
||||
|
||||
buf2 = cbuf.copy_to_host()
|
||||
assert buf2.size == size
|
||||
arr2 = np.frombuffer(buf2, dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
|
||||
def test_BufferReader():
|
||||
size = 1000
|
||||
arr, cbuf = make_random_buffer(size=size, target='device')
|
||||
|
||||
reader = cuda.BufferReader(cbuf)
|
||||
reader.seek(950)
|
||||
assert reader.tell() == 950
|
||||
|
||||
data = reader.read(100)
|
||||
assert len(data) == 50
|
||||
assert reader.tell() == 1000
|
||||
|
||||
reader.seek(925)
|
||||
arr2 = np.zeros(100, dtype=np.uint8)
|
||||
n = reader.readinto(arr2)
|
||||
assert n == 75
|
||||
assert reader.tell() == 1000
|
||||
np.testing.assert_equal(arr[925:], arr2[:75])
|
||||
|
||||
reader.seek(0)
|
||||
assert reader.tell() == 0
|
||||
buf2 = reader.read_buffer()
|
||||
arr2 = np.frombuffer(buf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
|
||||
def test_BufferReader_zero_size():
|
||||
arr, cbuf = make_random_buffer(size=0, target='device')
|
||||
reader = cuda.BufferReader(cbuf)
|
||||
reader.seek(0)
|
||||
data = reader.read()
|
||||
assert len(data) == 0
|
||||
assert reader.tell() == 0
|
||||
buf2 = reader.read_buffer()
|
||||
arr2 = np.frombuffer(buf2.copy_to_host(), dtype=np.uint8)
|
||||
np.testing.assert_equal(arr, arr2)
|
||||
|
||||
|
||||
def make_recordbatch(length):
|
||||
schema = pa.schema([pa.field('f0', pa.int16()),
|
||||
pa.field('f1', pa.int16())])
|
||||
a0 = pa.array(np.random.randint(0, 255, size=length, dtype=np.int16))
|
||||
a1 = pa.array(np.random.randint(0, 255, size=length, dtype=np.int16))
|
||||
batch = pa.record_batch([a0, a1], schema=schema)
|
||||
return batch
|
||||
|
||||
|
||||
def test_batch_serialize():
|
||||
batch = make_recordbatch(10)
|
||||
hbuf = batch.serialize()
|
||||
cbuf = cuda.serialize_record_batch(batch, global_context)
|
||||
|
||||
# Test that read_record_batch works properly
|
||||
cbatch = cuda.read_record_batch(cbuf, batch.schema)
|
||||
assert isinstance(cbatch, pa.RecordBatch)
|
||||
assert batch.schema == cbatch.schema
|
||||
assert batch.num_columns == cbatch.num_columns
|
||||
assert batch.num_rows == cbatch.num_rows
|
||||
|
||||
# Deserialize CUDA-serialized batch on host
|
||||
buf = cbuf.copy_to_host()
|
||||
assert hbuf.equals(buf)
|
||||
batch2 = pa.ipc.read_record_batch(buf, batch.schema)
|
||||
assert hbuf.equals(batch2.serialize())
|
||||
|
||||
assert batch.num_columns == batch2.num_columns
|
||||
assert batch.num_rows == batch2.num_rows
|
||||
assert batch.column(0).equals(batch2.column(0))
|
||||
assert batch.equals(batch2)
|
||||
|
||||
|
||||
def make_table():
|
||||
a0 = pa.array([0, 1, 42, None], type=pa.int16())
|
||||
a1 = pa.array([[0, 1], [2], [], None], type=pa.list_(pa.int32()))
|
||||
a2 = pa.array([("ab", True), ("cde", False), (None, None), None],
|
||||
type=pa.struct([("strs", pa.utf8()),
|
||||
("bools", pa.bool_())]))
|
||||
# Dictionaries are validated on the IPC read path, but that can produce
|
||||
# issues for GPU-located dictionaries. Check that they work fine.
|
||||
a3 = pa.DictionaryArray.from_arrays(
|
||||
indices=[0, 1, 1, None],
|
||||
dictionary=pa.array(['foo', 'bar']))
|
||||
a4 = pa.DictionaryArray.from_arrays(
|
||||
indices=[2, 1, 2, None],
|
||||
dictionary=a1)
|
||||
a5 = pa.DictionaryArray.from_arrays(
|
||||
indices=[2, 1, 0, None],
|
||||
dictionary=a2)
|
||||
|
||||
arrays = [a0, a1, a2, a3, a4, a5]
|
||||
schema = pa.schema([('f{}'.format(i), arr.type)
|
||||
for i, arr in enumerate(arrays)])
|
||||
batch = pa.record_batch(arrays, schema=schema)
|
||||
table = pa.Table.from_batches([batch])
|
||||
return table
|
||||
|
||||
|
||||
def make_table_cuda():
|
||||
htable = make_table()
|
||||
# Serialize the host table to bytes
|
||||
sink = pa.BufferOutputStream()
|
||||
with pa.ipc.new_stream(sink, htable.schema) as out:
|
||||
out.write_table(htable)
|
||||
hbuf = pa.py_buffer(sink.getvalue().to_pybytes())
|
||||
|
||||
# Copy the host bytes to a device buffer
|
||||
dbuf = global_context.new_buffer(len(hbuf))
|
||||
dbuf.copy_from_host(hbuf, nbytes=len(hbuf))
|
||||
# Deserialize the device buffer into a Table
|
||||
dtable = pa.ipc.open_stream(cuda.BufferReader(dbuf)).read_all()
|
||||
return hbuf, htable, dbuf, dtable
|
||||
|
||||
|
||||
def test_table_deserialize():
|
||||
# ARROW-9659: make sure that we can deserialize a GPU-located table
|
||||
# without crashing when initializing or validating the underlying arrays.
|
||||
hbuf, htable, dbuf, dtable = make_table_cuda()
|
||||
# Assert basic fields the same between host and device tables
|
||||
assert htable.schema == dtable.schema
|
||||
assert htable.num_rows == dtable.num_rows
|
||||
assert htable.num_columns == dtable.num_columns
|
||||
# Assert byte-level equality
|
||||
assert hbuf.equals(dbuf.copy_to_host())
|
||||
# Copy DtoH and assert the tables are still equivalent
|
||||
assert htable.equals(pa.ipc.open_stream(
|
||||
dbuf.copy_to_host()
|
||||
).read_all())
|
||||
|
||||
|
||||
def test_create_table_with_device_buffers():
|
||||
# ARROW-11872: make sure that we can create an Arrow Table from
|
||||
# GPU-located Arrays without crashing.
|
||||
hbuf, htable, dbuf, dtable = make_table_cuda()
|
||||
# Construct a new Table from the device Table
|
||||
dtable2 = pa.Table.from_arrays(dtable.columns, dtable.column_names)
|
||||
# Assert basic fields the same between host and device tables
|
||||
assert htable.schema == dtable2.schema
|
||||
assert htable.num_rows == dtable2.num_rows
|
||||
assert htable.num_columns == dtable2.num_columns
|
||||
# Assert byte-level equality
|
||||
assert hbuf.equals(dbuf.copy_to_host())
|
||||
# Copy DtoH and assert the tables are still equivalent
|
||||
assert htable.equals(pa.ipc.open_stream(
|
||||
dbuf.copy_to_host()
|
||||
).read_all())
|
||||
|
||||
|
||||
def other_process_for_test_IPC(handle_buffer, expected_arr):
|
||||
other_context = pa.cuda.Context(0)
|
||||
ipc_handle = pa.cuda.IpcMemHandle.from_buffer(handle_buffer)
|
||||
ipc_buf = other_context.open_ipc_buffer(ipc_handle)
|
||||
ipc_buf.context.synchronize()
|
||||
buf = ipc_buf.copy_to_host()
|
||||
assert buf.size == expected_arr.size, repr((buf.size, expected_arr.size))
|
||||
arr = np.frombuffer(buf, dtype=expected_arr.dtype)
|
||||
np.testing.assert_equal(arr, expected_arr)
|
||||
|
||||
|
||||
@cuda_ipc
|
||||
@pytest.mark.parametrize("size", [0, 1, 1000])
|
||||
def test_IPC(size):
|
||||
import multiprocessing
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
arr, cbuf = make_random_buffer(size=size, target='device')
|
||||
ipc_handle = cbuf.export_for_ipc()
|
||||
handle_buffer = ipc_handle.serialize()
|
||||
p = ctx.Process(target=other_process_for_test_IPC,
|
||||
args=(handle_buffer, arr))
|
||||
p.start()
|
||||
p.join()
|
||||
assert p.exitcode == 0
|
||||
Reference in New Issue
Block a user