Merging PR_218 openai_rev package with new streamlit chat app

2023-04-27 20:29:30 -04:00
parent 479b8d6d10
commit 355dee533b
8378 changed files with 2931636 additions and 3 deletions
--- a/venv/lib/python3.9/site-packages/pyarrow/tests/test_tensor.py
+++ b/venv/lib/python3.9/site-packages/pyarrow/tests/test_tensor.py
@@ -0,0 +1,216 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import sys
+import pytest
+import weakref
+
+import numpy as np
+import pyarrow as pa
+
+
+tensor_type_pairs = [
+    ('i1', pa.int8()),
+    ('i2', pa.int16()),
+    ('i4', pa.int32()),
+    ('i8', pa.int64()),
+    ('u1', pa.uint8()),
+    ('u2', pa.uint16()),
+    ('u4', pa.uint32()),
+    ('u8', pa.uint64()),
+    ('f2', pa.float16()),
+    ('f4', pa.float32()),
+    ('f8', pa.float64())
+]
+
+
+def test_tensor_attrs():
+    data = np.random.randn(10, 4)
+
+    tensor = pa.Tensor.from_numpy(data)
+
+    assert tensor.ndim == 2
+    assert tensor.dim_names == []
+    assert tensor.size == 40
+    assert tensor.shape == data.shape
+    assert tensor.strides == data.strides
+
+    assert tensor.is_contiguous
+    assert tensor.is_mutable
+
+    # not writeable
+    data2 = data.copy()
+    data2.flags.writeable = False
+    tensor = pa.Tensor.from_numpy(data2)
+    assert not tensor.is_mutable
+
+    # With dim_names
+    tensor = pa.Tensor.from_numpy(data, dim_names=('x', 'y'))
+    assert tensor.ndim == 2
+    assert tensor.dim_names == ['x', 'y']
+    assert tensor.dim_name(0) == 'x'
+    assert tensor.dim_name(1) == 'y'
+
+    wr = weakref.ref(tensor)
+    assert wr() is not None
+    del tensor
+    assert wr() is None
+
+
+def test_tensor_base_object():
+    tensor = pa.Tensor.from_numpy(np.random.randn(10, 4))
+    n = sys.getrefcount(tensor)
+    array = tensor.to_numpy()  # noqa
+    assert sys.getrefcount(tensor) == n + 1
+
+
+@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
+def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
+    dtype = np.dtype(dtype_str)
+    data = (100 * np.random.randn(10, 4)).astype(dtype)
+
+    tensor = pa.Tensor.from_numpy(data)
+    assert tensor.type == arrow_type
+
+    repr(tensor)
+
+    result = tensor.to_numpy()
+    assert (data == result).all()
+
+
+def test_tensor_ipc_roundtrip(tmpdir):
+    data = np.random.randn(10, 4)
+    tensor = pa.Tensor.from_numpy(data)
+
+    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip')
+    mmap = pa.create_memory_map(path, 1024)
+
+    pa.ipc.write_tensor(tensor, mmap)
+
+    mmap.seek(0)
+    result = pa.ipc.read_tensor(mmap)
+
+    assert result.equals(tensor)
+
+
+@pytest.mark.gzip
+def test_tensor_ipc_read_from_compressed(tempdir):
+    # ARROW-5910
+    data = np.random.randn(10, 4)
+    tensor = pa.Tensor.from_numpy(data)
+
+    path = tempdir / 'tensor-compressed-file'
+
+    out_stream = pa.output_stream(path, compression='gzip')
+    pa.ipc.write_tensor(tensor, out_stream)
+    out_stream.close()
+
+    result = pa.ipc.read_tensor(pa.input_stream(path, compression='gzip'))
+    assert result.equals(tensor)
+
+
+def test_tensor_ipc_strided(tmpdir):
+    data1 = np.random.randn(10, 4)
+    tensor1 = pa.Tensor.from_numpy(data1[::2])
+
+    data2 = np.random.randn(10, 6, 4)
+    tensor2 = pa.Tensor.from_numpy(data2[::, ::2, ::])
+
+    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided')
+    mmap = pa.create_memory_map(path, 2048)
+
+    for tensor in [tensor1, tensor2]:
+        mmap.seek(0)
+        pa.ipc.write_tensor(tensor, mmap)
+
+        mmap.seek(0)
+        result = pa.ipc.read_tensor(mmap)
+
+        assert result.equals(tensor)
+
+
+def test_tensor_equals():
+    def eq(a, b):
+        assert a.equals(b)
+        assert a == b
+        assert not (a != b)
+
+    def ne(a, b):
+        assert not a.equals(b)
+        assert not (a == b)
+        assert a != b
+
+    data = np.random.randn(10, 6, 4)[::, ::2, ::]
+    tensor1 = pa.Tensor.from_numpy(data)
+    tensor2 = pa.Tensor.from_numpy(np.ascontiguousarray(data))
+    eq(tensor1, tensor2)
+    data = data.copy()
+    data[9, 0, 0] = 1.0
+    tensor2 = pa.Tensor.from_numpy(np.ascontiguousarray(data))
+    ne(tensor1, tensor2)
+
+
+def test_tensor_hashing():
+    # Tensors are unhashable
+    with pytest.raises(TypeError, match="unhashable"):
+        hash(pa.Tensor.from_numpy(np.arange(10)))
+
+
+def test_tensor_size():
+    data = np.random.randn(10, 4)
+    tensor = pa.Tensor.from_numpy(data)
+    assert pa.ipc.get_tensor_size(tensor) > (data.size * 8)
+
+
+def test_read_tensor(tmpdir):
+    # Create and write tensor tensor
+    data = np.random.randn(10, 4)
+    tensor = pa.Tensor.from_numpy(data)
+    data_size = pa.ipc.get_tensor_size(tensor)
+    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor')
+    write_mmap = pa.create_memory_map(path, data_size)
+    pa.ipc.write_tensor(tensor, write_mmap)
+    # Try to read tensor
+    read_mmap = pa.memory_map(path, mode='r')
+    array = pa.ipc.read_tensor(read_mmap).to_numpy()
+    np.testing.assert_equal(data, array)
+
+
+def test_tensor_memoryview():
+    # Tensors support the PEP 3118 buffer protocol
+    for dtype, expected_format in [(np.int8, '=b'),
+                                   (np.int64, '=q'),
+                                   (np.uint64, '=Q'),
+                                   (np.float16, 'e'),
+                                   (np.float64, 'd'),
+                                   ]:
+        data = np.arange(10, dtype=dtype)
+        dtype = data.dtype
+        lst = data.tolist()
+        tensor = pa.Tensor.from_numpy(data)
+        m = memoryview(tensor)
+        assert m.format == expected_format
+        assert m.shape == data.shape
+        assert m.strides == data.strides
+        assert m.ndim == 1
+        assert m.nbytes == data.nbytes
+        assert m.itemsize == data.itemsize
+        assert m.itemsize * 8 == tensor.type.bit_width
+        assert np.frombuffer(m, dtype).tolist() == lst
+        del tensor, data
+        assert np.frombuffer(m, dtype).tolist() == lst