Merging PR_218 openai_rev package with new streamlit chat app
This commit is contained in:
216
venv/lib/python3.9/site-packages/pyarrow/tests/test_tensor.py
Normal file
216
venv/lib/python3.9/site-packages/pyarrow/tests/test_tensor.py
Normal file
@@ -0,0 +1,216 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
import weakref
|
||||
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
|
||||
|
||||
tensor_type_pairs = [
|
||||
('i1', pa.int8()),
|
||||
('i2', pa.int16()),
|
||||
('i4', pa.int32()),
|
||||
('i8', pa.int64()),
|
||||
('u1', pa.uint8()),
|
||||
('u2', pa.uint16()),
|
||||
('u4', pa.uint32()),
|
||||
('u8', pa.uint64()),
|
||||
('f2', pa.float16()),
|
||||
('f4', pa.float32()),
|
||||
('f8', pa.float64())
|
||||
]
|
||||
|
||||
|
||||
def test_tensor_attrs():
|
||||
data = np.random.randn(10, 4)
|
||||
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
|
||||
assert tensor.ndim == 2
|
||||
assert tensor.dim_names == []
|
||||
assert tensor.size == 40
|
||||
assert tensor.shape == data.shape
|
||||
assert tensor.strides == data.strides
|
||||
|
||||
assert tensor.is_contiguous
|
||||
assert tensor.is_mutable
|
||||
|
||||
# not writeable
|
||||
data2 = data.copy()
|
||||
data2.flags.writeable = False
|
||||
tensor = pa.Tensor.from_numpy(data2)
|
||||
assert not tensor.is_mutable
|
||||
|
||||
# With dim_names
|
||||
tensor = pa.Tensor.from_numpy(data, dim_names=('x', 'y'))
|
||||
assert tensor.ndim == 2
|
||||
assert tensor.dim_names == ['x', 'y']
|
||||
assert tensor.dim_name(0) == 'x'
|
||||
assert tensor.dim_name(1) == 'y'
|
||||
|
||||
wr = weakref.ref(tensor)
|
||||
assert wr() is not None
|
||||
del tensor
|
||||
assert wr() is None
|
||||
|
||||
|
||||
def test_tensor_base_object():
|
||||
tensor = pa.Tensor.from_numpy(np.random.randn(10, 4))
|
||||
n = sys.getrefcount(tensor)
|
||||
array = tensor.to_numpy() # noqa
|
||||
assert sys.getrefcount(tensor) == n + 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
|
||||
def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
|
||||
dtype = np.dtype(dtype_str)
|
||||
data = (100 * np.random.randn(10, 4)).astype(dtype)
|
||||
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
assert tensor.type == arrow_type
|
||||
|
||||
repr(tensor)
|
||||
|
||||
result = tensor.to_numpy()
|
||||
assert (data == result).all()
|
||||
|
||||
|
||||
def test_tensor_ipc_roundtrip(tmpdir):
|
||||
data = np.random.randn(10, 4)
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
|
||||
path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip')
|
||||
mmap = pa.create_memory_map(path, 1024)
|
||||
|
||||
pa.ipc.write_tensor(tensor, mmap)
|
||||
|
||||
mmap.seek(0)
|
||||
result = pa.ipc.read_tensor(mmap)
|
||||
|
||||
assert result.equals(tensor)
|
||||
|
||||
|
||||
@pytest.mark.gzip
|
||||
def test_tensor_ipc_read_from_compressed(tempdir):
|
||||
# ARROW-5910
|
||||
data = np.random.randn(10, 4)
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
|
||||
path = tempdir / 'tensor-compressed-file'
|
||||
|
||||
out_stream = pa.output_stream(path, compression='gzip')
|
||||
pa.ipc.write_tensor(tensor, out_stream)
|
||||
out_stream.close()
|
||||
|
||||
result = pa.ipc.read_tensor(pa.input_stream(path, compression='gzip'))
|
||||
assert result.equals(tensor)
|
||||
|
||||
|
||||
def test_tensor_ipc_strided(tmpdir):
|
||||
data1 = np.random.randn(10, 4)
|
||||
tensor1 = pa.Tensor.from_numpy(data1[::2])
|
||||
|
||||
data2 = np.random.randn(10, 6, 4)
|
||||
tensor2 = pa.Tensor.from_numpy(data2[::, ::2, ::])
|
||||
|
||||
path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided')
|
||||
mmap = pa.create_memory_map(path, 2048)
|
||||
|
||||
for tensor in [tensor1, tensor2]:
|
||||
mmap.seek(0)
|
||||
pa.ipc.write_tensor(tensor, mmap)
|
||||
|
||||
mmap.seek(0)
|
||||
result = pa.ipc.read_tensor(mmap)
|
||||
|
||||
assert result.equals(tensor)
|
||||
|
||||
|
||||
def test_tensor_equals():
|
||||
def eq(a, b):
|
||||
assert a.equals(b)
|
||||
assert a == b
|
||||
assert not (a != b)
|
||||
|
||||
def ne(a, b):
|
||||
assert not a.equals(b)
|
||||
assert not (a == b)
|
||||
assert a != b
|
||||
|
||||
data = np.random.randn(10, 6, 4)[::, ::2, ::]
|
||||
tensor1 = pa.Tensor.from_numpy(data)
|
||||
tensor2 = pa.Tensor.from_numpy(np.ascontiguousarray(data))
|
||||
eq(tensor1, tensor2)
|
||||
data = data.copy()
|
||||
data[9, 0, 0] = 1.0
|
||||
tensor2 = pa.Tensor.from_numpy(np.ascontiguousarray(data))
|
||||
ne(tensor1, tensor2)
|
||||
|
||||
|
||||
def test_tensor_hashing():
|
||||
# Tensors are unhashable
|
||||
with pytest.raises(TypeError, match="unhashable"):
|
||||
hash(pa.Tensor.from_numpy(np.arange(10)))
|
||||
|
||||
|
||||
def test_tensor_size():
|
||||
data = np.random.randn(10, 4)
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
assert pa.ipc.get_tensor_size(tensor) > (data.size * 8)
|
||||
|
||||
|
||||
def test_read_tensor(tmpdir):
|
||||
# Create and write tensor tensor
|
||||
data = np.random.randn(10, 4)
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
data_size = pa.ipc.get_tensor_size(tensor)
|
||||
path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor')
|
||||
write_mmap = pa.create_memory_map(path, data_size)
|
||||
pa.ipc.write_tensor(tensor, write_mmap)
|
||||
# Try to read tensor
|
||||
read_mmap = pa.memory_map(path, mode='r')
|
||||
array = pa.ipc.read_tensor(read_mmap).to_numpy()
|
||||
np.testing.assert_equal(data, array)
|
||||
|
||||
|
||||
def test_tensor_memoryview():
|
||||
# Tensors support the PEP 3118 buffer protocol
|
||||
for dtype, expected_format in [(np.int8, '=b'),
|
||||
(np.int64, '=q'),
|
||||
(np.uint64, '=Q'),
|
||||
(np.float16, 'e'),
|
||||
(np.float64, 'd'),
|
||||
]:
|
||||
data = np.arange(10, dtype=dtype)
|
||||
dtype = data.dtype
|
||||
lst = data.tolist()
|
||||
tensor = pa.Tensor.from_numpy(data)
|
||||
m = memoryview(tensor)
|
||||
assert m.format == expected_format
|
||||
assert m.shape == data.shape
|
||||
assert m.strides == data.strides
|
||||
assert m.ndim == 1
|
||||
assert m.nbytes == data.nbytes
|
||||
assert m.itemsize == data.itemsize
|
||||
assert m.itemsize * 8 == tensor.type.bit_width
|
||||
assert np.frombuffer(m, dtype).tolist() == lst
|
||||
del tensor, data
|
||||
assert np.frombuffer(m, dtype).tolist() == lst
|
||||
Reference in New Issue
Block a user